Skip to content

Commit 684e9dd

Browse files
committed
BUG: fix indexing issue with duplicate dates, close #1821
1 parent c8ac563 commit 684e9dd

File tree

3 files changed

+42
-0
lines changed

3 files changed

+42
-0
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ pandas 0.9.0
195195
- Fix DataFrame.apply with axis=1 on a non-unique index (#1878)
196196
- Proper handling of Index subclasses in pandas.unique (#1759)
197197
- Set index names in DataFrame.from_records (#1744)
198+
- Fix time series indexing error with duplicates, under and over hash table
199+
size cutoff (#1821)
198200

199201
pandas 0.8.1
200202
============

pandas/src/engines.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ cdef class DatetimeEngine(Int64Engine):
416416

417417
if self.over_size_threshold and self.is_monotonic:
418418
if not self.is_unique:
419+
val = _to_i8(val)
419420
return self._get_loc_duplicates(val)
420421
values = self._get_index_values()
421422
conv = _to_i8(val)

pandas/tseries/tests/test_timeseries.py

+39
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,45 @@ def test_groupby_average_dup_values(self):
100100
expected = self.dups.groupby(self.dups.index).mean()
101101
assert_series_equal(result, expected)
102102

103+
def test_indexing_over_size_cutoff(self):
104+
import datetime
105+
# #1821
106+
107+
old_cutoff = lib._SIZE_CUTOFF
108+
try:
109+
lib._SIZE_CUTOFF = 1000
110+
111+
# create large list of non periodic datetime
112+
dates = []
113+
sec = datetime.timedelta(seconds=1)
114+
half_sec = sec / 2
115+
d = datetime.datetime(2011, 12, 5, 20, 30)
116+
n = 1100
117+
for i in range(n):
118+
dates.append(d)
119+
dates.append(d + sec)
120+
dates.append(d + sec + half_sec)
121+
dates.append(d + sec + sec + half_sec)
122+
d += 3 * sec
123+
124+
# duplicate some values in the list
125+
duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
126+
for p in duplicate_positions:
127+
dates[p + 1] = dates[p]
128+
129+
df = DataFrame(np.random.randn(len(dates), 4),
130+
index=dates,
131+
columns=list('ABCD'))
132+
133+
pos = n * 3
134+
timestamp = df.index[pos]
135+
self.assert_(timestamp in df.index)
136+
137+
# it works!
138+
df.ix[timestamp]
139+
self.assert_(len(df.ix[[timestamp]]) > 0)
140+
finally:
141+
lib._SIZE_CUTOFF = old_cutoff
103142

104143
def assert_range_equal(left, right):
105144
assert(left.equals(right))

0 commit comments

Comments
 (0)