Skip to content

BUG: PeriodIndex.order doesnt preserve freq #10305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 8, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -612,5 +612,8 @@ Bug Fixes
- Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`)
- Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`)


- Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`)
- Bug in ``pd.unique`` for arrays with the ``datetime64`` or ``timedelta64`` dtype that meant an array with object dtype was returned instead the original dtype (:issue: `9431`)
- Bug in ``DatetimeIndex.take`` and ``TimedeltaIndex.take`` may not raise ``IndexError`` against invalid index (:issue:`10295`)
- Bug in ``PeriodIndex.order`` reset freq (:issue:`10295`)
4 changes: 2 additions & 2 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2486,7 +2486,7 @@ def get_slice_bound(self, label, side, kind):
if is_bool_dtype(slc):
slc = lib.maybe_booleans_to_slice(slc.view('u1'))
else:
slc = lib.maybe_indices_to_slice(slc.astype('i8'))
slc = lib.maybe_indices_to_slice(slc.astype('i8'), len(self))
if isinstance(slc, np.ndarray):
raise KeyError(
"Cannot get %s slice bound for non-unique label:"
Expand Down Expand Up @@ -5108,7 +5108,7 @@ def _maybe_to_slice(loc):
if not isinstance(loc, np.ndarray) or loc.dtype != 'int64':
return loc

loc = lib.maybe_indices_to_slice(loc)
loc = lib.maybe_indices_to_slice(loc, len(self))
if isinstance(loc, slice):
return loc

Expand Down
37 changes: 31 additions & 6 deletions pandas/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -633,17 +633,42 @@ def convert_timestamps(ndarray values):

return out

def maybe_indices_to_slice(ndarray[int64_t] indices):

def maybe_indices_to_slice(ndarray[int64_t] indices, int max_len):
cdef:
Py_ssize_t i, n = len(indices)
int k, vstart, vlast, v

if n == 0:
return slice(0, 0)

if not n or indices[0] < 0:
vstart = indices[0]
if vstart < 0 or max_len <= vstart:
return indices

for i in range(1, n):
if indices[i] - indices[i - 1] != 1:
return indices
return slice(indices[0], indices[n - 1] + 1)
if n == 1:
return slice(vstart, vstart + 1)

vlast = indices[n - 1]
if vlast < 0 or max_len <= vlast:
return indices

k = indices[1] - indices[0]
if k == 0:
return indices
else:
for i in range(2, n):
v = indices[i]
if v - indices[i - 1] != k:
return indices

if k > 0:
return slice(vstart, vlast + 1, k)
else:
if vlast == 0:
return slice(vstart, None, k)
else:
return slice(vstart, vlast - 1, k)


def maybe_booleans_to_slice(ndarray[uint8_t] mask):
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2266,6 +2266,16 @@ def test_get_loc_na(self):

idx = Float64Index([np.nan, 1, np.nan])
self.assertEqual(idx.get_loc(1), 1)

# representable by slice [0:2:2]
# self.assertRaises(KeyError, idx.slice_locs, np.nan)
sliced = idx.slice_locs(np.nan)
self.assertTrue(isinstance(sliced, tuple))
self.assertEqual(sliced, (0, 3))

# not representable by slice
idx = Float64Index([np.nan, 1, np.nan, np.nan])
self.assertEqual(idx.get_loc(1), 1)
self.assertRaises(KeyError, idx.slice_locs, np.nan)

def test_contains_nans(self):
Expand Down
237 changes: 183 additions & 54 deletions pandas/tests/test_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np

import pandas as pd
from pandas.lib import isscalar, item_from_zerodim, max_len_string_array
import pandas.lib as lib
import pandas.util.testing as tm
from pandas.compat import u, PY2

Expand All @@ -14,19 +14,19 @@ class TestMisc(tm.TestCase):
def test_max_len_string_array(self):

arr = a = np.array(['foo', 'b', np.nan], dtype='object')
self.assertTrue(max_len_string_array(arr), 3)
self.assertTrue(lib.max_len_string_array(arr), 3)

# unicode
arr = a.astype('U').astype(object)
self.assertTrue(max_len_string_array(arr), 3)
self.assertTrue(lib.max_len_string_array(arr), 3)

# bytes for python3
arr = a.astype('S').astype(object)
self.assertTrue(max_len_string_array(arr), 3)
self.assertTrue(lib.max_len_string_array(arr), 3)

# raises
tm.assertRaises(TypeError,
lambda: max_len_string_array(arr.astype('U')))
lambda: lib.max_len_string_array(arr.astype('U')))

def test_infer_dtype_bytes(self):
compare = 'string' if PY2 else 'bytes'
Expand All @@ -39,68 +39,197 @@ def test_infer_dtype_bytes(self):
arr = arr.astype(object)
self.assertEqual(pd.lib.infer_dtype(arr), compare)


class TestIsscalar(tm.TestCase):
def test_maybe_indices_to_slice_left_edge(self):
target = np.arange(100)

# slice
indices = np.array([], dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertTrue(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

for end in [1, 2, 5, 20, 99]:
for step in [1, 2, 4]:
indices = np.arange(0, end, step, dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertTrue(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

# reverse
indices = indices[::-1]
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertTrue(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

# not slice
for case in [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]]:
indices = np.array(case, dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertFalse(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(maybe_slice, indices)
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

def test_maybe_indices_to_slice_right_edge(self):
target = np.arange(100)

# slice
for start in [0, 2, 5, 20, 97, 98]:
for step in [1, 2, 4]:
indices = np.arange(start, 99, step, dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertTrue(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

# reverse
indices = indices[::-1]
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertTrue(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

# not slice
indices = np.array([97, 98, 99, 100], dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertFalse(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(maybe_slice, indices)
with self.assertRaises(IndexError):
target[indices]
with self.assertRaises(IndexError):
target[maybe_slice]

indices = np.array([100, 99, 98, 97], dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertFalse(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(maybe_slice, indices)
with self.assertRaises(IndexError):
target[indices]
with self.assertRaises(IndexError):
target[maybe_slice]

for case in [[99, 97, 99, 96], [99, 99, 98, 97], [98, 98, 97, 96]]:
indices = np.array(case, dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertFalse(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(maybe_slice, indices)
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

def test_maybe_indices_to_slice_both_edges(self):
target = np.arange(10)

# slice
for step in [1, 2, 4, 5, 8, 9]:
indices = np.arange(0, 9, step, dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertTrue(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

# reverse
indices = indices[::-1]
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertTrue(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

# not slice
for case in [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]]:
indices = np.array(case, dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertFalse(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(maybe_slice, indices)
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

def test_maybe_indices_to_slice_middle(self):
target = np.arange(100)

# slice
for start, end in [(2, 10), (5, 25), (65, 97)]:
for step in [1, 2, 4, 20]:
indices = np.arange(start, end, step, dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertTrue(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

# reverse
indices = indices[::-1]
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertTrue(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(target[indices], target[maybe_slice])

# not slice
for case in [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]]:
indices = np.array(case, dtype=np.int64)
maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
self.assertFalse(isinstance(maybe_slice, slice))
self.assert_numpy_array_equal(maybe_slice, indices)
self.assert_numpy_array_equal(target[indices], target[maybe_slice])


class Testisscalar(tm.TestCase):

def test_isscalar_builtin_scalars(self):
self.assertTrue(isscalar(None))
self.assertTrue(isscalar(True))
self.assertTrue(isscalar(False))
self.assertTrue(isscalar(0.))
self.assertTrue(isscalar(np.nan))
self.assertTrue(isscalar('foobar'))
self.assertTrue(isscalar(b'foobar'))
self.assertTrue(isscalar(u('efoobar')))
self.assertTrue(isscalar(datetime(2014, 1, 1)))
self.assertTrue(isscalar(date(2014, 1, 1)))
self.assertTrue(isscalar(time(12, 0)))
self.assertTrue(isscalar(timedelta(hours=1)))
self.assertTrue(isscalar(pd.NaT))
self.assertTrue(lib.isscalar(None))
self.assertTrue(lib.isscalar(True))
self.assertTrue(lib.isscalar(False))
self.assertTrue(lib.isscalar(0.))
self.assertTrue(lib.isscalar(np.nan))
self.assertTrue(lib.isscalar('foobar'))
self.assertTrue(lib.isscalar(b'foobar'))
self.assertTrue(lib.isscalar(u('efoobar')))
self.assertTrue(lib.isscalar(datetime(2014, 1, 1)))
self.assertTrue(lib.isscalar(date(2014, 1, 1)))
self.assertTrue(lib.isscalar(time(12, 0)))
self.assertTrue(lib.isscalar(timedelta(hours=1)))
self.assertTrue(lib.isscalar(pd.NaT))

def test_isscalar_builtin_nonscalars(self):
self.assertFalse(isscalar({}))
self.assertFalse(isscalar([]))
self.assertFalse(isscalar([1]))
self.assertFalse(isscalar(()))
self.assertFalse(isscalar((1,)))
self.assertFalse(isscalar(slice(None)))
self.assertFalse(isscalar(Ellipsis))
self.assertFalse(lib.isscalar({}))
self.assertFalse(lib.isscalar([]))
self.assertFalse(lib.isscalar([1]))
self.assertFalse(lib.isscalar(()))
self.assertFalse(lib.isscalar((1,)))
self.assertFalse(lib.isscalar(slice(None)))
self.assertFalse(lib.isscalar(Ellipsis))

def test_isscalar_numpy_array_scalars(self):
self.assertTrue(isscalar(np.int64(1)))
self.assertTrue(isscalar(np.float64(1.)))
self.assertTrue(isscalar(np.int32(1)))
self.assertTrue(isscalar(np.object_('foobar')))
self.assertTrue(isscalar(np.str_('foobar')))
self.assertTrue(isscalar(np.unicode_(u('foobar'))))
self.assertTrue(isscalar(np.bytes_(b'foobar')))
self.assertTrue(isscalar(np.datetime64('2014-01-01')))
self.assertTrue(isscalar(np.timedelta64(1, 'h')))
self.assertTrue(lib.isscalar(np.int64(1)))
self.assertTrue(lib.isscalar(np.float64(1.)))
self.assertTrue(lib.isscalar(np.int32(1)))
self.assertTrue(lib.isscalar(np.object_('foobar')))
self.assertTrue(lib.isscalar(np.str_('foobar')))
self.assertTrue(lib.isscalar(np.unicode_(u('foobar'))))
self.assertTrue(lib.isscalar(np.bytes_(b'foobar')))
self.assertTrue(lib.isscalar(np.datetime64('2014-01-01')))
self.assertTrue(lib.isscalar(np.timedelta64(1, 'h')))

def test_isscalar_numpy_zerodim_arrays(self):
for zerodim in [np.array(1),
np.array('foobar'),
np.array(np.datetime64('2014-01-01')),
np.array(np.timedelta64(1, 'h'))]:
self.assertFalse(isscalar(zerodim))
self.assertTrue(isscalar(item_from_zerodim(zerodim)))
self.assertFalse(lib.isscalar(zerodim))
self.assertTrue(lib.isscalar(lib.item_from_zerodim(zerodim)))

def test_isscalar_numpy_arrays(self):
self.assertFalse(isscalar(np.array([])))
self.assertFalse(isscalar(np.array([[]])))
self.assertFalse(isscalar(np.matrix('1; 2')))
self.assertFalse(lib.isscalar(np.array([])))
self.assertFalse(lib.isscalar(np.array([[]])))
self.assertFalse(lib.isscalar(np.matrix('1; 2')))

def test_isscalar_pandas_scalars(self):
self.assertTrue(isscalar(pd.Timestamp('2014-01-01')))
self.assertTrue(isscalar(pd.Timedelta(hours=1)))
self.assertTrue(isscalar(pd.Period('2014-01-01')))

def test_isscalar_pandas_containers(self):
self.assertFalse(isscalar(pd.Series()))
self.assertFalse(isscalar(pd.Series([1])))
self.assertFalse(isscalar(pd.DataFrame()))
self.assertFalse(isscalar(pd.DataFrame([[1]])))
self.assertFalse(isscalar(pd.Panel()))
self.assertFalse(isscalar(pd.Panel([[[1]]])))
self.assertFalse(isscalar(pd.Index([])))
self.assertFalse(isscalar(pd.Index([1])))
self.assertTrue(lib.isscalar(pd.Timestamp('2014-01-01')))
self.assertTrue(lib.isscalar(pd.Timedelta(hours=1)))
self.assertTrue(lib.isscalar(pd.Period('2014-01-01')))

def test_lisscalar_pandas_containers(self):
self.assertFalse(lib.isscalar(pd.Series()))
self.assertFalse(lib.isscalar(pd.Series([1])))
self.assertFalse(lib.isscalar(pd.DataFrame()))
self.assertFalse(lib.isscalar(pd.DataFrame([[1]])))
self.assertFalse(lib.isscalar(pd.Panel()))
self.assertFalse(lib.isscalar(pd.Panel([[[1]]])))
self.assertFalse(lib.isscalar(pd.Index([])))
self.assertFalse(lib.isscalar(pd.Index([1])))


if __name__ == '__main__':
import nose

nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
Loading