Skip to content

Commit 9c45f6f

Browse files
committed
BUG: pivot should raise exception if there are duplicate entries,
address GH #147
1 parent 877e596 commit 9c45f6f

File tree

5 files changed

+55
-24
lines changed

5 files changed

+55
-24
lines changed

RELEASE.rst

+29-15
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,28 @@
1-
========================
2-
pandas 0.4 Release Notes
3-
========================
1+
=============
2+
Release Notes
3+
=============
44

5-
What is it
5+
This is the list of changes to pandas between each release. For full details,
6+
see the commit logs at http://github.com/wesm/pandas
7+
8+
9+
pandas 0.4.1
10+
============
11+
12+
**Release date:** Not yet released
13+
14+
This is a bug fix release
15+
16+
**Bug fixes**
17+
18+
-
19+
20+
pandas 0.4
621
==========
722

23+
What is it
24+
----------
25+
826
**pandas** is a library of powerful labeled-axis data structures, statistical
927
tools, and general code for working with relational data sets, including time
1028
series and cross-sectional data. It was designed with the practical needs of
@@ -13,14 +31,14 @@ particularly well suited for, among other things, financial data analysis
1331
applications.
1432

1533
Where to get it
16-
===============
34+
---------------
1735

1836
Source code: http://github.com/wesm/pandas
1937
Binary installers on PyPI: http://pypi.python.org/pypi/pandas
2038
Documentation: http://pandas.sourceforge.net
2139

2240
Release notes
23-
=============
41+
-------------
2442

2543
**Release date:** 9/12/2011
2644

@@ -279,12 +297,8 @@ Thanks
279297
- Skipper Seabold
280298
- Chris Jordan-Squire
281299

282-
========================
283-
pandas 0.3 Release Notes
284-
========================
285-
286-
Release Notes
287-
=============
300+
pandas 0.3
301+
==========
288302

289303
This major release of pandas represents approximately 1 year of continuous
290304
development work and brings with it many new features, bug fixes, speed
@@ -293,22 +307,22 @@ change from the 0.2 release has been the completion of a rigorous unit test
293307
suite covering all of the core functionality.
294308

295309
What is it
296-
==========
310+
----------
297311

298312
**pandas** is a library of labeled data structures, statistical models, and
299313
general code for working with time series and cross-sectional data. It was
300314
designed with the practical needs of statistical modeling and large,
301315
inhomogeneous data sets in mind.
302316

303317
Where to get it
304-
===============
318+
---------------
305319

306320
Source code: http://github.com/wesm/pandas
307321
Binary installers on PyPI: http://pypi.python.org/pypi/pandas
308322
Documentation: http://pandas.sourceforge.net
309323

310324
Release notes
311-
=============
325+
-------------
312326

313327
**Release date:** February 20, 2011
314328

pandas/core/reshape.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
from pandas.core.common import notnull
1212
from pandas.core.index import MultiIndex
1313

14+
class ReshapeError(Exception):
15+
pass
16+
17+
1418
class _Unstacker(object):
1519
"""
1620
Helper class to unstack data / pivot with multi-level index
@@ -88,7 +92,8 @@ def _make_selectors(self):
8892

8993
# make the mask
9094
group_index = self.sorted_labels[0]
91-
prev_stride = np.prod([len(x) for x in new_levels[1:]])
95+
prev_stride = np.prod([len(x) for x in new_levels[1:]],
96+
dtype=int)
9297

9398
for lev, lab in zip(new_levels[1:], self.sorted_labels[1:-1]):
9499
group_index = group_index * prev_stride + lab
@@ -106,6 +111,10 @@ def _make_selectors(self):
106111
unique_groups = np.arange(self.full_shape[0])[group_mask]
107112
compressor = group_index.searchsorted(unique_groups)
108113

114+
if mask.sum() < len(self.index):
115+
raise ReshapeError('Index contains duplicate entries, '
116+
'cannot reshape')
117+
109118
self.group_mask = group_mask
110119
self.group_index = group_index
111120
self.mask = mask
@@ -192,10 +201,6 @@ def pivot(self, index=None, columns=None, values=None):
192201
index_vals = self[index]
193202
column_vals = self[columns]
194203
mindex = MultiIndex.from_arrays([index_vals, column_vals])
195-
try:
196-
mindex._verify_integrity()
197-
except Exception:
198-
raise Exception('duplicate index/column pairs!')
199204

200205
if values is None:
201206
items = self.columns - [index, columns]

pandas/tests/test_frame.py

+11
Original file line numberDiff line numberDiff line change
@@ -1820,6 +1820,17 @@ def test_pivot(self):
18201820
df = DataFrame.from_records(lp.toRecords())
18211821
assert_frame_equal(df.pivot('major', 'minor'), lp.unstack())
18221822

1823+
def test_pivot_duplicates(self):
1824+
data = DataFrame({'a' : ['bar', 'bar', 'foo', 'foo', 'foo'],
1825+
'b' : ['one', 'two', 'one', 'one', 'two'],
1826+
'c' : [1., 2., 3., 3., 4.]})
1827+
# expected = DataFrame([[1., 2.], [3., 4.]], index=['bar', 'foo'],
1828+
# columns=['one', 'two'])
1829+
# result = data.pivot('a', 'b', 'c')
1830+
# assert_frame_equal(result, expected)
1831+
1832+
self.assertRaises(Exception, data.pivot, 'a', 'b', 'c')
1833+
18231834
def test_reindex(self):
18241835
newFrame = self.frame.reindex(self.ts1.index)
18251836

pandas/tests/test_panel.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1120,9 +1120,10 @@ def test_pivot(self):
11201120
a, b, c = (np.array([1, 2, 3, 4, 4]),
11211121
np.array(['a', 'a', 'a', 'a', 'a']),
11221122
np.array([1., 2., 3., 4., 5.]))
1123-
df = pivot(a, b, c)
1124-
expected = _slow_pivot(a, b, c)
1125-
assert_frame_equal(df, expected)
1123+
self.assertRaises(Exception, pivot, a, b, c)
1124+
# df = pivot(a, b, c)
1125+
# expected = _slow_pivot(a, b, c)
1126+
# assert_frame_equal(df, expected)
11261127

11271128
# corner case, empty
11281129
df = pivot(np.array([]), np.array([]), np.array([]))

pandas/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version = '0.4.0'
1+
version = '0.4.1.dev-877e596'

0 commit comments

Comments
 (0)