From ea51437852c0eb85c203d602099ca3171055ca82 Mon Sep 17 00:00:00 2001 From: "Ilya V. Schurov" Date: Fri, 9 Dec 2016 09:04:47 +0300 Subject: [PATCH 01/10] Made this code clearer. --- pandas/tseries/index.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 0824072cc383f..76cc747341421 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1293,12 +1293,11 @@ def _parsed_string_to_bounds(self, reso, parsed): def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): is_monotonic = self.is_monotonic - if ((reso in ['day', 'hour', 'minute'] and - not (self._resolution < Resolution.get_reso(reso) or - not is_monotonic)) or - (reso == 'second' and - not (self._resolution <= Resolution.RESO_SEC or - not is_monotonic))): + if (is_monotonic + and ((reso in ['day', 'hour', 'minute'] + and self._resolution >= Resolution.get_reso(reso)) + or (reso == 'second' + and self._resolution > Resolution.RESO_SEC))): # These resolution/monotonicity validations came from GH3931, # GH3452 and GH2369. raise KeyError From cc86bddf47f928ca7a8931a243061cb582d5952d Mon Sep 17 00:00:00 2001 From: "Ilya V. Schurov" Date: Sun, 11 Dec 2016 10:11:21 +0300 Subject: [PATCH 02/10] Fix inconsistency in Partial String Index with 'second' resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See #14826. Now the following logic applies: - If timestamp resolution is strictly less precise than index resolution, timetamp is a slice as it can (in theory) correspond to more than one elements in the index. For `Series`, `[]` should return `Series`, for `DataFrame` — `DataFrame`. - If timestamp resolution is equal to index resolution, then timestamp is considered as an attempt to get a kind of "exact match". For `Series`, `[]` should return scalar, for `DataFrame` — try to find column with this key (if any), and most probably raise `KeyError`. - If timestamp resolution is strictly more precise than index resolution and does not resolve to exact match, `KeyError` have to be raised in both cases. Testsuite is updated as well. --- pandas/tseries/index.py | 8 +- .../tests/test_partial_string_indexes.py | 168 ++++++++++++++++++ pandas/tseries/tests/test_timeseries.py | 6 +- 3 files changed, 175 insertions(+), 7 deletions(-) create mode 100644 pandas/tseries/tests/test_partial_string_indexes.py diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 76cc747341421..5e76a73c9dd77 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1294,12 +1294,12 @@ def _parsed_string_to_bounds(self, reso, parsed): def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): is_monotonic = self.is_monotonic if (is_monotonic - and ((reso in ['day', 'hour', 'minute'] - and self._resolution >= Resolution.get_reso(reso)) - or (reso == 'second' - and self._resolution > Resolution.RESO_SEC))): + and reso in ['day', 'hour', 'minute', 'second'] + and self._resolution >= Resolution.get_reso(reso)): # These resolution/monotonicity validations came from GH3931, # GH3452 and GH2369. + + # See also GH14826 raise KeyError if reso == 'microsecond': diff --git a/pandas/tseries/tests/test_partial_string_indexes.py b/pandas/tseries/tests/test_partial_string_indexes.py new file mode 100644 index 0000000000000..55e64750e9978 --- /dev/null +++ b/pandas/tseries/tests/test_partial_string_indexes.py @@ -0,0 +1,168 @@ +import nose +import numpy as np + +import pandas.util.testing as tm +from pandas import ( + Index, Series, DataFrame, isnull, date_range, Timestamp, Period, + DatetimeIndex, Int64Index, to_datetime, bdate_range, Float64Index, + NaT, timedelta_range, Timedelta, _np_version_under1p8, concat) + +from pandas.util.testing import ( + assert_frame_equal, assert_series_equal, assert_almost_equal, + _skip_if_has_locale, slow) + +class TestTimeSeriesPartialSlices(tm.TestCase): + _multiprocess_can_split_ = True + def assert_exact(self, df, ts, value): + element = df['a'][ts] + + # Series should return scalar + self.assertIsInstance(element, np.int64) + self.assertEqual(element, value) + + # Frame should raise (exact match) + self.assertRaises(KeyError, df.__getitem__, ts) + + #TODO: test falling to column selection + + def assert_slice(self, df, ts, slice): + # Series should return slice + expected = df['a'][slice] + assert_series_equal(df['a'][ts], expected) + + # Frame should return slice as well + expected = df[slice] + assert_frame_equal(df[ts], expected) + + def assert_key_error(self, df, ts): + self.assertRaises(KeyError, df['a'].__getitem__, ts) + self.assertRaises(KeyError, df.__getitem__, ts) + + def test_partial_slices_day(self): + df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31', + '2012-01-01', + '2012-01-02']), + dtype=np.int64) + + self.assertEqual(df.index.resolution, 'day') + + # Timestamp with resolution 'day' + self.assert_exact(df, '2011-12-31', 1) + self.assert_exact(df, '2012-01-01', 2) + self.assert_exact(df, '2012-01-02', 3) + + # Timestamp with resolution less precise than 'day' + for ts in ['2011', '2011-12']: + self.assert_slice(df, ts, slice(None, 1)) + + # The same as previous but several elements in the slice + for ts in ['2012', '2012-01']: + self.assert_slice(df, ts, slice(1, None)) + + # Timestamp with resolution more precise than 'day' + # Compatible with existing key + for ts in ['2012-01-01 00', '2012-01-01 00:00', + '2012-01-01 00:00:00']: + self.assert_exact(df, ts, 2) + + # Timestamp with resolution more precise than 'day' + # Not compatible with existing key + for ts in ['2012-01-01 01', '2012-01-01 00:01', + '2012-01-01 00:00:01']: + self.assert_key_error(df, ts) + + + def test_partial_slice_hour(self): + df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31 23', + '2012-01-01 00', + '2012-01-01 01']), + dtype=np.int64) + + self.assertEqual(df.index.resolution, 'hour') + + # Timestamp with resolution 'hour' + self.assert_exact(df, '2011-12-31 23', 1) + self.assert_exact(df, '2012-01-01 00', 2) + self.assert_exact(df, '2012-01-01 01', 3) + + # Timestamp with resolution less precise than 'hour' + for ts in ['2011', '2011-12', '2011-12-31']: + self.assert_slice(df, ts, slice(None, 1)) + + # The same as previous but several elements in the slice + for ts in ['2012', '2012-01', '2012-01-01']: + self.assert_slice(df, ts, slice(1, None)) + + # Timestamp with resolution more precise than 'hour' + # Compatible with existing key + for ts in ['2012-01-01 00:00', + '2012-01-01 00:00:00']: + self.assert_exact(df, ts, 2) + + # Timestamp with resolution more precise than 'hour' + # Not compatible with existing key + for ts in ['2012-01-01 00:01', + '2012-01-01 00:00:01']: + self.assert_key_error(df, ts) + + def test_partial_slice_minute(self): + df = DataFrame({'a': [1, 2, 3]}, + DatetimeIndex(['2011-12-31 23:59', + '2012-01-01 00:00', + '2012-01-01 00:01']), + dtype=np.int64) + + self.assertEqual(df.index.resolution, 'minute') + + # Timestamp with resolution 'minute' + self.assert_exact(df, '2011-12-31 23:59', 1) + self.assert_exact(df, '2012-01-01 00:00', 2) + self.assert_exact(df, '2012-01-01 00:01', 3) + + # Timestamp with resolution less precise than 'minute' + for ts in ['2011', '2011-12', '2011-12-31', + '2011-12-31 23']: + self.assert_slice(df, ts, slice(None, 1)) + + # The same as previous but several elements in the slice + for ts in ['2012', '2012-01', '2012-01-01', + '2012-01-01 00']: + self.assert_slice(df, ts, slice(1, None)) + + # Timestamp with resolution more precise than 'minute' + # Compatible with existing key + for ts in ['2012-01-01 00:00:00']: + self.assert_exact(df, ts, 2) + + # Timestamp with resolution more precise than 'minute' + # Not compatible with existing key + for ts in ['2012-01-01 00:00:01']: + self.assert_key_error(df, ts) + + def test_partial_slice_second(self): + # See GH14826 + df = DataFrame({'a': [1, 2, 3]}, + DatetimeIndex(['2011-12-31 23:59:59', + '2012-01-01 00:00:00', + '2012-01-01 00:00:01']), + dtype=np.int64) + + self.assertEqual(df.index.resolution, 'second') + + # Timestamp with resolution 'second' + self.assert_exact(df, '2011-12-31 23:59:59', 1) + self.assert_exact(df, '2012-01-01 00:00:00', 2) + self.assert_exact(df, '2012-01-01 00:00:01', 3) + + # Timestamp with resolution less precise than 'second' + for ts in ['2011', '2011-12', '2011-12-31', + '2011-12-31 23', '2011-12-31 23:59']: + self.assert_slice(df, ts, slice(None, 1)) + + # The same as previous but several elements in the slice + for ts in ['2012', '2012-01', '2012-01-01', + '2012-01-01 00', '2012-01-01 00:00']: + self.assert_slice(df, ts, slice(1, None)) + + # Not possible to create a string that represents timestamp + # that is more exact then 'second' \ No newline at end of file diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 67b203d011d1a..b445e9084845c 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -266,18 +266,18 @@ def test_indexing(self): expected = ts['2013'] assert_series_equal(expected, ts) - # GH 3925, indexing with a seconds resolution string / datetime object + # GH14826, indexing with a seconds resolution string / datetime object df = DataFrame(randn(5, 5), columns=['open', 'high', 'low', 'close', 'volume'], index=date_range('2012-01-02 18:01:00', periods=5, tz='US/Central', freq='s')) expected = df.loc[[df.index[2]]] - result = df['2012-01-02 18:01:02'] - assert_frame_equal(result, expected) # this is a single date, so will raise + self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) self.assertRaises(KeyError, df.__getitem__, df.index[2], ) + def test_recreate_from_data(self): freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', 'C'] From b30039d1266e84caca91586afb030c1440addebc Mon Sep 17 00:00:00 2001 From: "Ilya V. Schurov" Date: Sun, 11 Dec 2016 10:23:20 +0300 Subject: [PATCH 03/10] Make flake8 happy. --- .../tests/test_partial_string_indexes.py | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/pandas/tseries/tests/test_partial_string_indexes.py b/pandas/tseries/tests/test_partial_string_indexes.py index 55e64750e9978..a5600e99b4028 100644 --- a/pandas/tseries/tests/test_partial_string_indexes.py +++ b/pandas/tseries/tests/test_partial_string_indexes.py @@ -1,18 +1,14 @@ -import nose import numpy as np import pandas.util.testing as tm -from pandas import ( - Index, Series, DataFrame, isnull, date_range, Timestamp, Period, - DatetimeIndex, Int64Index, to_datetime, bdate_range, Float64Index, - NaT, timedelta_range, Timedelta, _np_version_under1p8, concat) +from pandas import DataFrame, DatetimeIndex + +from pandas.util.testing import assert_frame_equal, assert_series_equal -from pandas.util.testing import ( - assert_frame_equal, assert_series_equal, assert_almost_equal, - _skip_if_has_locale, slow) class TestTimeSeriesPartialSlices(tm.TestCase): _multiprocess_can_split_ = True + def assert_exact(self, df, ts, value): element = df['a'][ts] @@ -23,15 +19,15 @@ def assert_exact(self, df, ts, value): # Frame should raise (exact match) self.assertRaises(KeyError, df.__getitem__, ts) - #TODO: test falling to column selection + # TODO: test falling to column selection - def assert_slice(self, df, ts, slice): + def assert_slice(self, df, ts, the_slice): # Series should return slice - expected = df['a'][slice] + expected = df['a'][the_slice] assert_series_equal(df['a'][ts], expected) # Frame should return slice as well - expected = df[slice] + expected = df[the_slice] assert_frame_equal(df[ts], expected) def assert_key_error(self, df, ts): @@ -71,7 +67,6 @@ def test_partial_slices_day(self): '2012-01-01 00:00:01']: self.assert_key_error(df, ts) - def test_partial_slice_hour(self): df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31 23', '2012-01-01 00', @@ -165,4 +160,4 @@ def test_partial_slice_second(self): self.assert_slice(df, ts, slice(1, None)) # Not possible to create a string that represents timestamp - # that is more exact then 'second' \ No newline at end of file + # that is more exact then 'second' From 9b5511722182c2d2eec9ad497e53b1f14f4a52d3 Mon Sep 17 00:00:00 2001 From: "Ilya V. Schurov" Date: Sun, 18 Dec 2016 18:39:00 +0300 Subject: [PATCH 04/10] Addressing code review - new tests moved to test_timeseries.py and refactored to avoid creating non-standard helper functions --- pandas/tseries/index.py | 5 +- .../tests/test_partial_string_indexes.py | 163 ------------- pandas/tseries/tests/test_timeseries.py | 224 +++++++++++++++++- 3 files changed, 225 insertions(+), 167 deletions(-) delete mode 100644 pandas/tseries/tests/test_partial_string_indexes.py diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 5e76a73c9dd77..efad232d64867 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1293,9 +1293,8 @@ def _parsed_string_to_bounds(self, reso, parsed): def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): is_monotonic = self.is_monotonic - if (is_monotonic - and reso in ['day', 'hour', 'minute', 'second'] - and self._resolution >= Resolution.get_reso(reso)): + if (is_monotonic and reso in ['day', 'hour', 'minute', 'second'] and + self._resolution >= Resolution.get_reso(reso)): # These resolution/monotonicity validations came from GH3931, # GH3452 and GH2369. diff --git a/pandas/tseries/tests/test_partial_string_indexes.py b/pandas/tseries/tests/test_partial_string_indexes.py deleted file mode 100644 index a5600e99b4028..0000000000000 --- a/pandas/tseries/tests/test_partial_string_indexes.py +++ /dev/null @@ -1,163 +0,0 @@ -import numpy as np - -import pandas.util.testing as tm -from pandas import DataFrame, DatetimeIndex - -from pandas.util.testing import assert_frame_equal, assert_series_equal - - -class TestTimeSeriesPartialSlices(tm.TestCase): - _multiprocess_can_split_ = True - - def assert_exact(self, df, ts, value): - element = df['a'][ts] - - # Series should return scalar - self.assertIsInstance(element, np.int64) - self.assertEqual(element, value) - - # Frame should raise (exact match) - self.assertRaises(KeyError, df.__getitem__, ts) - - # TODO: test falling to column selection - - def assert_slice(self, df, ts, the_slice): - # Series should return slice - expected = df['a'][the_slice] - assert_series_equal(df['a'][ts], expected) - - # Frame should return slice as well - expected = df[the_slice] - assert_frame_equal(df[ts], expected) - - def assert_key_error(self, df, ts): - self.assertRaises(KeyError, df['a'].__getitem__, ts) - self.assertRaises(KeyError, df.__getitem__, ts) - - def test_partial_slices_day(self): - df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31', - '2012-01-01', - '2012-01-02']), - dtype=np.int64) - - self.assertEqual(df.index.resolution, 'day') - - # Timestamp with resolution 'day' - self.assert_exact(df, '2011-12-31', 1) - self.assert_exact(df, '2012-01-01', 2) - self.assert_exact(df, '2012-01-02', 3) - - # Timestamp with resolution less precise than 'day' - for ts in ['2011', '2011-12']: - self.assert_slice(df, ts, slice(None, 1)) - - # The same as previous but several elements in the slice - for ts in ['2012', '2012-01']: - self.assert_slice(df, ts, slice(1, None)) - - # Timestamp with resolution more precise than 'day' - # Compatible with existing key - for ts in ['2012-01-01 00', '2012-01-01 00:00', - '2012-01-01 00:00:00']: - self.assert_exact(df, ts, 2) - - # Timestamp with resolution more precise than 'day' - # Not compatible with existing key - for ts in ['2012-01-01 01', '2012-01-01 00:01', - '2012-01-01 00:00:01']: - self.assert_key_error(df, ts) - - def test_partial_slice_hour(self): - df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31 23', - '2012-01-01 00', - '2012-01-01 01']), - dtype=np.int64) - - self.assertEqual(df.index.resolution, 'hour') - - # Timestamp with resolution 'hour' - self.assert_exact(df, '2011-12-31 23', 1) - self.assert_exact(df, '2012-01-01 00', 2) - self.assert_exact(df, '2012-01-01 01', 3) - - # Timestamp with resolution less precise than 'hour' - for ts in ['2011', '2011-12', '2011-12-31']: - self.assert_slice(df, ts, slice(None, 1)) - - # The same as previous but several elements in the slice - for ts in ['2012', '2012-01', '2012-01-01']: - self.assert_slice(df, ts, slice(1, None)) - - # Timestamp with resolution more precise than 'hour' - # Compatible with existing key - for ts in ['2012-01-01 00:00', - '2012-01-01 00:00:00']: - self.assert_exact(df, ts, 2) - - # Timestamp with resolution more precise than 'hour' - # Not compatible with existing key - for ts in ['2012-01-01 00:01', - '2012-01-01 00:00:01']: - self.assert_key_error(df, ts) - - def test_partial_slice_minute(self): - df = DataFrame({'a': [1, 2, 3]}, - DatetimeIndex(['2011-12-31 23:59', - '2012-01-01 00:00', - '2012-01-01 00:01']), - dtype=np.int64) - - self.assertEqual(df.index.resolution, 'minute') - - # Timestamp with resolution 'minute' - self.assert_exact(df, '2011-12-31 23:59', 1) - self.assert_exact(df, '2012-01-01 00:00', 2) - self.assert_exact(df, '2012-01-01 00:01', 3) - - # Timestamp with resolution less precise than 'minute' - for ts in ['2011', '2011-12', '2011-12-31', - '2011-12-31 23']: - self.assert_slice(df, ts, slice(None, 1)) - - # The same as previous but several elements in the slice - for ts in ['2012', '2012-01', '2012-01-01', - '2012-01-01 00']: - self.assert_slice(df, ts, slice(1, None)) - - # Timestamp with resolution more precise than 'minute' - # Compatible with existing key - for ts in ['2012-01-01 00:00:00']: - self.assert_exact(df, ts, 2) - - # Timestamp with resolution more precise than 'minute' - # Not compatible with existing key - for ts in ['2012-01-01 00:00:01']: - self.assert_key_error(df, ts) - - def test_partial_slice_second(self): - # See GH14826 - df = DataFrame({'a': [1, 2, 3]}, - DatetimeIndex(['2011-12-31 23:59:59', - '2012-01-01 00:00:00', - '2012-01-01 00:00:01']), - dtype=np.int64) - - self.assertEqual(df.index.resolution, 'second') - - # Timestamp with resolution 'second' - self.assert_exact(df, '2011-12-31 23:59:59', 1) - self.assert_exact(df, '2012-01-01 00:00:00', 2) - self.assert_exact(df, '2012-01-01 00:00:01', 3) - - # Timestamp with resolution less precise than 'second' - for ts in ['2011', '2011-12', '2011-12-31', - '2011-12-31 23', '2011-12-31 23:59']: - self.assert_slice(df, ts, slice(None, 1)) - - # The same as previous but several elements in the slice - for ts in ['2012', '2012-01', '2012-01-01', - '2012-01-01 00', '2012-01-01 00:00']: - self.assert_slice(df, ts, slice(1, None)) - - # Not possible to create a string that represents timestamp - # that is more exact then 'second' diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index b445e9084845c..2066790a9ae77 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -277,7 +277,6 @@ def test_indexing(self): self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', ) self.assertRaises(KeyError, df.__getitem__, df.index[2], ) - def test_recreate_from_data(self): freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', 'C'] @@ -4897,6 +4896,64 @@ def test_partial_slice_daily(self): self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') + # GH14856 + # DatetimeIndex without explicit freq + df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31', + '2012-01-01', + '2012-01-02']), + dtype=np.int64) + + self.assertEqual(df.index.resolution, 'day') + + # Timestamp with resolution 'day' + # Should be exact match for series and raise KeyError for Frame + for ts, expected in (('2011-12-31', 1), ('2012-01-01', 2), + ('2012-01-02', 3)): + result = df['a'][ts] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, expected) + self.assertRaises(KeyError, df.__getitem__, ts) + + # Timestamp with resolution less precise than 'day' + for ts in ('2011', '2011-12'): + # Series should return slice + result = df['a'][ts] + expected = df['a'][:1] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts] + expected = df[:1] + assert_frame_equal(result, expected) + + # The same as previous but several elements in the slice + for ts in ('2012', '2012-01'): + # Series should return slice + result = df['a'][ts] + expected = df['a'][1:] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts] + expected = df[1:] + assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than 'day' + # Compatible with existing key + for ts in ('2012-01-01 00', '2012-01-01 00:00', + '2012-01-01 00:00:00'): + result = df['a'][ts] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, 2) + self.assertRaises(KeyError, df.__getitem__, ts) + + # Timestamp with resolution more precise than 'day' + # Not compatible with existing key + for ts in ('2012-01-01 01', '2012-01-01 00:01', + '2012-01-01 00:00:01'): + self.assertRaises(KeyError, df['a'].__getitem__, ts) + self.assertRaises(KeyError, df.__getitem__, ts) + def test_partial_slice_hourly(self): rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), periods=500) @@ -4911,6 +4968,63 @@ def test_partial_slice_hourly(self): self.assertEqual(s['2005-1-1 20:00'], s.ix[0]) self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') + # GH14856 + # DatetimeIndex without explicit freq + df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31 23', + '2012-01-01 00', + '2012-01-01 01']), + dtype=np.int64) + + self.assertEqual(df.index.resolution, 'hour') + + # Timestamp with resolution 'hour' + # Should be exact match for series and raise KeyError for Frame + for ts, expected in (('2011-12-31 23', 1), + ('2012-01-01 00', 2), + ('2012-01-01 01', 3)): + result = df['a'][ts] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, expected) + self.assertRaises(KeyError, df.__getitem__, ts) + + # Timestamp with resolution less precise than 'hour' + for ts in ('2011', '2011-12', '2011-12-31'): + # Series should return slice + result = df['a'][ts] + expected = df['a'][:1] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts] + expected = df[:1] + assert_frame_equal(result, expected) + + # The same as previous but several elements in the slice + for ts in ('2012', '2012-01', '2012-01-01'): + # Series should return slice + result = df['a'][ts] + expected = df['a'][1:] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts] + expected = df[1:] + assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than 'hour' + # Compatible with existing key + for ts in ('2012-01-01 00:00', '2012-01-01 00:00:00'): + result = df['a'][ts] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, 2) + self.assertRaises(KeyError, df.__getitem__, ts) + + # Timestamp with resolution more precise than 'day' + # Not compatible with existing key + for ts in ('2012-01-01 00:01', '2012-01-01 00:00:01'): + self.assertRaises(KeyError, df['a'].__getitem__, ts) + self.assertRaises(KeyError, df.__getitem__, ts) + def test_partial_slice_minutely(self): rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), periods=500) @@ -4925,6 +5039,64 @@ def test_partial_slice_minutely(self): self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.ix[0]) self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') + # GH14856 + # DatetimeIndex without explicit freq + df = DataFrame({'a': [1, 2, 3]}, + DatetimeIndex(['2011-12-31 23:59', + '2012-01-01 00:00', + '2012-01-01 00:01']), + dtype=np.int64) + + self.assertEqual(df.index.resolution, 'minute') + + # Timestamp with resolution 'minute' + # Should be exact match for series and raise KeyError for Frame + for ts, expected in (('2011-12-31 23:59', 1), + ('2012-01-01 00:00', 2), + ('2012-01-01 00:01', 3)): + result = df['a'][ts] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, expected) + self.assertRaises(KeyError, df.__getitem__, ts) + + # Timestamp with resolution less precise than 'minute' + for ts in ('2011', '2011-12', '2011-12-31', '2011-12-31 23'): + # Series should return slice + result = df['a'][ts] + expected = df['a'][:1] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts] + expected = df[:1] + assert_frame_equal(result, expected) + + # The same as previous but several elements in the slice + for ts in ('2012', '2012-01', '2012-01-01', '2012-01-01 00'): + # Series should return slice + result = df['a'][ts] + expected = df['a'][1:] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts] + expected = df[1:] + assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than 'minute' + # Compatible with existing key + ts = '2012-01-01 00:00:00' + result = df['a'][ts] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, 2) + self.assertRaises(KeyError, df.__getitem__, ts) + + # Timestamp with resolution more precise than 'day' + # Not compatible with existing key + ts = '2012-01-01 00:00:01' + self.assertRaises(KeyError, df['a'].__getitem__, ts) + self.assertRaises(KeyError, df.__getitem__, ts) + def test_partial_slice_second_precision(self): rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990), @@ -4941,6 +5113,56 @@ def test_partial_slice_second_precision(self): self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', lambda: s['2005-1-1 00:00:00']) + # GH14856 + # DatetimeIndex without explicit freq + # Without microseconds + df = DataFrame({'a': [1, 2, 3]}, + DatetimeIndex(['2011-12-31 23:59:59', + '2012-01-01 00:00:00', + '2012-01-01 00:00:01']), + dtype=np.int64) + + self.assertEqual(df.index.resolution, 'second') + + # Timestamp with resolution 'second' + # Should be exact match for series and raise KeyError for Frame + for ts, expected in (('2011-12-31 23:59:59', 1), + ('2012-01-01 00:00:00', 2), + ('2012-01-01 00:00:01', 3)): + result = df['a'][ts] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, expected) + self.assertRaises(KeyError, df.__getitem__, ts) + + # Timestamp with resolution less precise than 'minute' + for ts in ('2011', '2011-12', '2011-12-31', '2011-12-31 23', + '2011-12-31 23:59'): + # Series should return slice + result = df['a'][ts] + expected = df['a'][:1] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts] + expected = df[:1] + assert_frame_equal(result, expected) + + # The same as previous but several elements in the slice + for ts in ('2012', '2012-01', '2012-01-01', '2012-01-01 00', + '2012-01-01 00:00'): + # Series should return slice + result = df['a'][ts] + expected = df['a'][1:] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts] + expected = df[1:] + assert_frame_equal(result, expected) + + # Not possible to create a string that represents timestamp + # that is more exact then 'second' + def test_partial_slicing_with_multiindex(self): # GH 4758 From c901588b7dce80402b1cf152876ef29d3d447b97 Mon Sep 17 00:00:00 2001 From: "Ilya V. Schurov" Date: Sun, 18 Dec 2016 20:20:13 +0300 Subject: [PATCH 05/10] Addressing code review: testing different combinations with the loop instead of copy-pasting of the code --- pandas/tseries/tests/test_timeseries.py | 275 +++++------------------- 1 file changed, 54 insertions(+), 221 deletions(-) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 2066790a9ae77..877bc36f956e5 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -4896,64 +4896,6 @@ def test_partial_slice_daily(self): self.assertRaises(Exception, s.__getitem__, '2004-12-31 00') - # GH14856 - # DatetimeIndex without explicit freq - df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31', - '2012-01-01', - '2012-01-02']), - dtype=np.int64) - - self.assertEqual(df.index.resolution, 'day') - - # Timestamp with resolution 'day' - # Should be exact match for series and raise KeyError for Frame - for ts, expected in (('2011-12-31', 1), ('2012-01-01', 2), - ('2012-01-02', 3)): - result = df['a'][ts] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, expected) - self.assertRaises(KeyError, df.__getitem__, ts) - - # Timestamp with resolution less precise than 'day' - for ts in ('2011', '2011-12'): - # Series should return slice - result = df['a'][ts] - expected = df['a'][:1] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts] - expected = df[:1] - assert_frame_equal(result, expected) - - # The same as previous but several elements in the slice - for ts in ('2012', '2012-01'): - # Series should return slice - result = df['a'][ts] - expected = df['a'][1:] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts] - expected = df[1:] - assert_frame_equal(result, expected) - - # Timestamp with resolution more precise than 'day' - # Compatible with existing key - for ts in ('2012-01-01 00', '2012-01-01 00:00', - '2012-01-01 00:00:00'): - result = df['a'][ts] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, 2) - self.assertRaises(KeyError, df.__getitem__, ts) - - # Timestamp with resolution more precise than 'day' - # Not compatible with existing key - for ts in ('2012-01-01 01', '2012-01-01 00:01', - '2012-01-01 00:00:01'): - self.assertRaises(KeyError, df['a'].__getitem__, ts) - self.assertRaises(KeyError, df.__getitem__, ts) - def test_partial_slice_hourly(self): rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0), periods=500) @@ -4968,63 +4910,6 @@ def test_partial_slice_hourly(self): self.assertEqual(s['2005-1-1 20:00'], s.ix[0]) self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15') - # GH14856 - # DatetimeIndex without explicit freq - df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31 23', - '2012-01-01 00', - '2012-01-01 01']), - dtype=np.int64) - - self.assertEqual(df.index.resolution, 'hour') - - # Timestamp with resolution 'hour' - # Should be exact match for series and raise KeyError for Frame - for ts, expected in (('2011-12-31 23', 1), - ('2012-01-01 00', 2), - ('2012-01-01 01', 3)): - result = df['a'][ts] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, expected) - self.assertRaises(KeyError, df.__getitem__, ts) - - # Timestamp with resolution less precise than 'hour' - for ts in ('2011', '2011-12', '2011-12-31'): - # Series should return slice - result = df['a'][ts] - expected = df['a'][:1] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts] - expected = df[:1] - assert_frame_equal(result, expected) - - # The same as previous but several elements in the slice - for ts in ('2012', '2012-01', '2012-01-01'): - # Series should return slice - result = df['a'][ts] - expected = df['a'][1:] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts] - expected = df[1:] - assert_frame_equal(result, expected) - - # Timestamp with resolution more precise than 'hour' - # Compatible with existing key - for ts in ('2012-01-01 00:00', '2012-01-01 00:00:00'): - result = df['a'][ts] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, 2) - self.assertRaises(KeyError, df.__getitem__, ts) - - # Timestamp with resolution more precise than 'day' - # Not compatible with existing key - for ts in ('2012-01-01 00:01', '2012-01-01 00:00:01'): - self.assertRaises(KeyError, df['a'].__getitem__, ts) - self.assertRaises(KeyError, df.__getitem__, ts) - def test_partial_slice_minutely(self): rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0), periods=500) @@ -5039,64 +4924,6 @@ def test_partial_slice_minutely(self): self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.ix[0]) self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') - # GH14856 - # DatetimeIndex without explicit freq - df = DataFrame({'a': [1, 2, 3]}, - DatetimeIndex(['2011-12-31 23:59', - '2012-01-01 00:00', - '2012-01-01 00:01']), - dtype=np.int64) - - self.assertEqual(df.index.resolution, 'minute') - - # Timestamp with resolution 'minute' - # Should be exact match for series and raise KeyError for Frame - for ts, expected in (('2011-12-31 23:59', 1), - ('2012-01-01 00:00', 2), - ('2012-01-01 00:01', 3)): - result = df['a'][ts] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, expected) - self.assertRaises(KeyError, df.__getitem__, ts) - - # Timestamp with resolution less precise than 'minute' - for ts in ('2011', '2011-12', '2011-12-31', '2011-12-31 23'): - # Series should return slice - result = df['a'][ts] - expected = df['a'][:1] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts] - expected = df[:1] - assert_frame_equal(result, expected) - - # The same as previous but several elements in the slice - for ts in ('2012', '2012-01', '2012-01-01', '2012-01-01 00'): - # Series should return slice - result = df['a'][ts] - expected = df['a'][1:] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts] - expected = df[1:] - assert_frame_equal(result, expected) - - # Timestamp with resolution more precise than 'minute' - # Compatible with existing key - ts = '2012-01-01 00:00:00' - result = df['a'][ts] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, 2) - self.assertRaises(KeyError, df.__getitem__, ts) - - # Timestamp with resolution more precise than 'day' - # Not compatible with existing key - ts = '2012-01-01 00:00:01' - self.assertRaises(KeyError, df['a'].__getitem__, ts) - self.assertRaises(KeyError, df.__getitem__, ts) - def test_partial_slice_second_precision(self): rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990), @@ -5113,55 +4940,61 @@ def test_partial_slice_second_precision(self): self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00', lambda: s['2005-1-1 00:00:00']) + def test_partial_slicing_dataframe(self): # GH14856 - # DatetimeIndex without explicit freq - # Without microseconds - df = DataFrame({'a': [1, 2, 3]}, - DatetimeIndex(['2011-12-31 23:59:59', - '2012-01-01 00:00:00', - '2012-01-01 00:00:01']), - dtype=np.int64) - - self.assertEqual(df.index.resolution, 'second') - - # Timestamp with resolution 'second' - # Should be exact match for series and raise KeyError for Frame - for ts, expected in (('2011-12-31 23:59:59', 1), - ('2012-01-01 00:00:00', 2), - ('2012-01-01 00:00:01', 3)): - result = df['a'][ts] - self.assertIsInstance(result, np.int64) - self.assertEqual(result, expected) - self.assertRaises(KeyError, df.__getitem__, ts) - - # Timestamp with resolution less precise than 'minute' - for ts in ('2011', '2011-12', '2011-12-31', '2011-12-31 23', - '2011-12-31 23:59'): - # Series should return slice - result = df['a'][ts] - expected = df['a'][:1] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts] - expected = df[:1] - assert_frame_equal(result, expected) - - # The same as previous but several elements in the slice - for ts in ('2012', '2012-01', '2012-01-01', '2012-01-01 00', - '2012-01-01 00:00'): - # Series should return slice - result = df['a'][ts] - expected = df['a'][1:] - assert_series_equal(result, expected) - - # Frame should return slice as well - result = df[ts] - expected = df[1:] - assert_frame_equal(result, expected) - - # Not possible to create a string that represents timestamp - # that is more exact then 'second' + # Test various combinations of string slicing + formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', + '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] + resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] + for rnum, resolution in enumerate(resolutions[2:], 2): + unit = Timedelta(1, resolution[0]) + middate = datetime(2012, 1, 1, 0, 0, 0) + index = DatetimeIndex([middate - unit, + middate, middate + unit]) + values = [1, 2, 3] + df = DataFrame({'a': values}, index, dtype=np.int64) + self.assertEqual(df.index.resolution, resolution) + + # Timestamp with the same resolution as index + # Should be exact match for series and raise KeyError for Frame + for timestamp, expected in zip(index, values): + ts_string = timestamp.strftime(formats[rnum]) + # make ts_string as precise as index + result = df['a'][ts_string] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, expected) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + # Timestamp with resolution less precise than index + for fmt in formats[:rnum]: + for element, theslice in [[0, slice(None, 1)], + [1, slice(1, None)]]: + ts_string = index[element].strftime(fmt) + # Series should return slice + result = df['a'][ts_string] + expected = df['a'][theslice] + assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts_string] + expected = df[theslice] + assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than index + # Compatible with existing key + for fmt in formats[rnum + 1:]: + ts_string = index[1].strftime(fmt) + result = df['a'][ts_string] + self.assertIsInstance(result, np.int64) + self.assertEqual(result, 2) + self.assertRaises(KeyError, df.__getitem__, ts_string) + + # Not compatible with existing key + for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: + ts = index[1] + Timedelta(1, res[0]) + ts_string = ts.strftime(fmt) + self.assertRaises(KeyError, df['a'].__getitem__, ts_string) + self.assertRaises(KeyError, df.__getitem__, ts_string) def test_partial_slicing_with_multiindex(self): From 67e6bab3a58c825fab2d976bbf8c9a64b49268a8 Mon Sep 17 00:00:00 2001 From: "Ilya V. Schurov" Date: Mon, 19 Dec 2016 00:50:04 +0300 Subject: [PATCH 06/10] Addressing code review: more comments added --- pandas/tseries/tests/test_timeseries.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 877bc36f956e5..54d7892f933b7 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -4942,12 +4942,18 @@ def test_partial_slice_second_precision(self): def test_partial_slicing_dataframe(self): # GH14856 - # Test various combinations of string slicing + # Test various combinations of string slicing resolution vs. + # index resolution + # - If string resolution is less precise than index resolution, + # string is considered a slice + # - If string resolution is equal to or more precise than index + # resolution, string is considered an exact match formats = ['%Y', '%Y-%m', '%Y-%m-%d', '%Y-%m-%d %H', '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S'] resolutions = ['year', 'month', 'day', 'hour', 'minute', 'second'] for rnum, resolution in enumerate(resolutions[2:], 2): - unit = Timedelta(1, resolution[0]) + # we check only 'day', 'hour', 'minute' and 'second' + unit = Timedelta("1 " + resolution) middate = datetime(2012, 1, 1, 0, 0, 0) index = DatetimeIndex([middate - unit, middate, middate + unit]) @@ -4956,7 +4962,8 @@ def test_partial_slicing_dataframe(self): self.assertEqual(df.index.resolution, resolution) # Timestamp with the same resolution as index - # Should be exact match for series and raise KeyError for Frame + # Should be exact match for Series (return scalar) + # and raise KeyError for Frame for timestamp, expected in zip(index, values): ts_string = timestamp.strftime(formats[rnum]) # make ts_string as precise as index @@ -4970,6 +4977,7 @@ def test_partial_slicing_dataframe(self): for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]: ts_string = index[element].strftime(fmt) + # Series should return slice result = df['a'][ts_string] expected = df['a'][theslice] @@ -4982,6 +4990,8 @@ def test_partial_slicing_dataframe(self): # Timestamp with resolution more precise than index # Compatible with existing key + # Should return scalar for Series + # and raise KeyError for Frame for fmt in formats[rnum + 1:]: ts_string = index[1].strftime(fmt) result = df['a'][ts_string] @@ -4990,8 +5000,9 @@ def test_partial_slicing_dataframe(self): self.assertRaises(KeyError, df.__getitem__, ts_string) # Not compatible with existing key + # Should raise KeyError for fmt, res in list(zip(formats, resolutions))[rnum + 1:]: - ts = index[1] + Timedelta(1, res[0]) + ts = index[1] + Timedelta("1 " + res) ts_string = ts.strftime(fmt) self.assertRaises(KeyError, df['a'].__getitem__, ts_string) self.assertRaises(KeyError, df.__getitem__, ts_string) From e17d210dbab0797c18a57f63c6027506cbda363f Mon Sep 17 00:00:00 2001 From: "Ilya V. Schurov" Date: Mon, 19 Dec 2016 03:20:03 +0300 Subject: [PATCH 07/10] - Whatsnew section added - Documentation section added --- doc/source/timeseries.rst | 55 +++++++++++++++++++++++---------- doc/source/whatsnew/v0.20.0.txt | 32 +++++++++++++++++++ 2 files changed, 71 insertions(+), 16 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 854de443ac5ee..5ec1ec28a9701 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -457,22 +457,6 @@ We are stopping on the included end-point as it is part of the index dft['2013-1-15':'2013-1-15 12:30:00'] -.. warning:: - - The following selection will raise a ``KeyError``; otherwise this selection methodology - would be inconsistent with other selection methods in pandas (as this is not a *slice*, nor does it - resolve to one) - - .. code-block:: python - - dft['2013-1-15 12:30:00'] - - To select a single row, use ``.loc`` - - .. ipython:: python - - dft.loc['2013-1-15 12:30:00'] - .. versionadded:: 0.18.0 DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiIndex``. For example: @@ -491,6 +475,45 @@ DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiInd dft2 = dft2.swaplevel(0, 1).sort_index() dft2.loc[idx[:, '2013-01-05'], :] +String Indexing: slice vs. exact match +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The same string used as an indexing parameter can be treated either as slice or as exact match depending on the resolution of an index. If the string is less precise than index, it will be treated as a slice, otherwise as an exact match. + +.. ipython:: python + series_minute = pd.Series([1, 2, 3], DatetimeIndex(['2011-12-31 23:59:00', + '2012-01-01 00:00:00', + '2012-01-01 00:01:00'])) + series_minute.index.resolution + series_minute['2011-12-31 23'] # returns Series + series_minute['2012-12-31 23:59'] # returns scalar + + series_second = pd.Series([1, 2, 3], DatetimeIndex(['2011-12-31 23:59:59', + '2012-01-01 00:00:00', + '2012-01-01 00:00:01'])) + series_second.index.resolution + series_second['2012-12-31 23:59'] # now it returns scalar + +It also works for ``DataFrame``: + +.. ipython:: python + dft_minute = pd.DataFrame(series_minute) + dft_minute['2011-12-31 23'] + +.. warning:: + + If a string used in ``DataFrame``'s ``[]`` indexing is treated as an exact match the selection will be column-wise and not row-wise. This is consistent with :ref:`Indexing Basics `. For example, the following code will raise ``KeyError`` as there is no column with index ``'2012-12-31 23:59'``: + + .. code-block:: python + + df_minute['2012-12-31 23:59'] + + To select a single row, use ``.loc`` + + .. ipython:: python + + df_minute.loc['2012-12-31 23:59'] + Datetime Indexing ~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0bfd755aae40c..d05b34eb5dfeb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -70,6 +70,38 @@ Backwards incompatible API changes Other API Changes ^^^^^^^^^^^^^^^^^ +- :ref:`DatetimeIndex Partial String Indexing ` now works as exact match provided that string resolution coincides with index resolution (:issue:`14826`). + + .. ipython:: python + + df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31 23:59:59', + '2012-01-01 00:00:00', + '2012-01-01 00:00:01'])) + Previous Behavior: + + .. code-block:: ipython + + In [4]: df['2011-12-31 23:59:59'] + Out[4]: + a + 2011-12-31 23:59:59 1 + + In [5]: df['a']['2011-12-31 23:59:59'] + Out[5]: + 2011-12-31 23:59:59 1 + Name: a, dtype: int64 + + + New Behavior: + + .. code-block:: ipython + + In [4]: df['2011-12-31 23:59:59'] + KeyError: '2011-12-31 23:59:59' + + In [5]: df['a']['2011-12-31 23:59:59'] + Out[5]: 1 + .. _whatsnew_0200.deprecations: Deprecations From 40eddc3d7aaaa0dfcb3cd2ab707db887e5ed56e7 Mon Sep 17 00:00:00 2001 From: "Ilya V. Schurov" Date: Mon, 19 Dec 2016 03:51:10 +0300 Subject: [PATCH 08/10] - Documentation fixes --- doc/source/timeseries.rst | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 5ec1ec28a9701..4d12a5568a978 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -475,44 +475,50 @@ DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiInd dft2 = dft2.swaplevel(0, 1).sort_index() dft2.loc[idx[:, '2013-01-05'], :] -String Indexing: slice vs. exact match -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Slice vs. exact match +^^^^^^^^^^^^^^^^^^^^^ -The same string used as an indexing parameter can be treated either as slice or as exact match depending on the resolution of an index. If the string is less precise than index, it will be treated as a slice, otherwise as an exact match. +The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of an index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match. .. ipython:: python - series_minute = pd.Series([1, 2, 3], DatetimeIndex(['2011-12-31 23:59:00', - '2012-01-01 00:00:00', - '2012-01-01 00:01:00'])) + + series_minute = pd.Series([1, 2, 3], + pd.DatetimeIndex(['2011-12-31 23:59:00', + '2012-01-01 00:00:00', + '2012-01-01 00:01:00'])) series_minute.index.resolution series_minute['2011-12-31 23'] # returns Series - series_minute['2012-12-31 23:59'] # returns scalar + series_minute['2011-12-31 23:59'] # returns scalar - series_second = pd.Series([1, 2, 3], DatetimeIndex(['2011-12-31 23:59:59', - '2012-01-01 00:00:00', - '2012-01-01 00:00:01'])) + series_second = pd.Series([1, 2, 3], + pd.DatetimeIndex(['2011-12-31 23:59:59', + '2012-01-01 00:00:00', + '2012-01-01 00:00:01'])) series_second.index.resolution - series_second['2012-12-31 23:59'] # now it returns scalar + series_second['2011-12-31 23:59'] # now it returns Series It also works for ``DataFrame``: .. ipython:: python - dft_minute = pd.DataFrame(series_minute) + + dft_minute = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, + index=series_minute.index) dft_minute['2011-12-31 23'] .. warning:: - If a string used in ``DataFrame``'s ``[]`` indexing is treated as an exact match the selection will be column-wise and not row-wise. This is consistent with :ref:`Indexing Basics `. For example, the following code will raise ``KeyError`` as there is no column with index ``'2012-12-31 23:59'``: + If string used in ``DataFrame``'s ``[]`` indexing is treated as an exact match the selection will be column-wise and not row-wise. This is consistent with :ref:`Indexing Basics `. For example, the following code will raise ``KeyError`` as there is no column with index ``'2012-12-31 23:59'``: .. code-block:: python - df_minute['2012-12-31 23:59'] + dft_minute['2011-12-31 23:59'] + # KeyError: '2011-12-31 23:59' To select a single row, use ``.loc`` .. ipython:: python - df_minute.loc['2012-12-31 23:59'] + dft_minute.loc['2011-12-31 23:59'] Datetime Indexing ~~~~~~~~~~~~~~~~~ From d215905212c5caa364fd26a71be7ac6845cdaf47 Mon Sep 17 00:00:00 2001 From: "Ilya V. Schurov" Date: Tue, 20 Dec 2016 00:26:55 +0300 Subject: [PATCH 09/10] - Addressing code review: documentation clarification. --- doc/source/timeseries.rst | 52 ++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 6d4b178f9fc1b..6d56a28170645 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -475,29 +475,48 @@ DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiInd dft2 = dft2.swaplevel(0, 1).sort_index() dft2.loc[idx[:, '2013-01-05'], :] +.. _timeseries.slice_vs_exact_match: + Slice vs. exact match ^^^^^^^^^^^^^^^^^^^^^ The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of an index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match. +For example, let us consider ``Series`` object which index has minute resolution. + .. ipython:: python series_minute = pd.Series([1, 2, 3], pd.DatetimeIndex(['2011-12-31 23:59:00', '2012-01-01 00:00:00', - '2012-01-01 00:01:00'])) + '2012-01-01 00:02:00'])) series_minute.index.resolution - series_minute['2011-12-31 23'] # returns Series - series_minute['2011-12-31 23:59'] # returns scalar + +Timestamp string less accurate than minute gives ``Series`` object. + +.. ipython:: python + + series_minute['2011-12-31 23'] + +Timestamp string with minute resolution (or more accurate) gives scalar instead, i.e. it is not casted to a slice. + +.. ipython:: python + + series_minute['2011-12-31 23:59'] + series_minute['2011-12-31 23:59:00'] + +If index resolution is second, the minute-accurate timestamp gives ``Series``. + +.. ipython:: python series_second = pd.Series([1, 2, 3], pd.DatetimeIndex(['2011-12-31 23:59:59', '2012-01-01 00:00:00', '2012-01-01 00:00:01'])) series_second.index.resolution - series_second['2011-12-31 23:59'] # now it returns Series + series_second['2011-12-31 23:59'] -It also works for ``DataFrame``: +If the timestamp string is treated as a slice, it can be used to index ``DataFrame`` with ``[]`` as well. .. ipython:: python @@ -505,25 +524,30 @@ It also works for ``DataFrame``: index=series_minute.index) dft_minute['2011-12-31 23'] -.. warning:: +However if the string is treated as an exact match the selection in ``DataFrame``'s ``[]`` will be column-wise and not row-wise, see :ref:`Indexing Basics `. For example ``dft_minute['2011-12-31 23:59']`` will raise ``KeyError`` as ``'2012-12-31 23:59'`` has the same resolution as index and there is no column with such name: - If string used in ``DataFrame``'s ``[]`` indexing is treated as an exact match the selection will be column-wise and not row-wise. This is consistent with :ref:`Indexing Basics `. For example, the following code will raise ``KeyError`` as there is no column with index ``'2012-12-31 23:59'``: +To select a single row, use ``.loc``. - .. code-block:: python +.. ipython:: python - dft_minute['2011-12-31 23:59'] - # KeyError: '2011-12-31 23:59' + dft_minute.loc['2011-12-31 23:59'] - To select a single row, use ``.loc`` +Note also that ``DatetimeIndex`` resolution cannot be less precise than day. - .. ipython:: python +.. ipython:: python + + series_monthly = pd.Series([1, 2, 3], + pd.DatetimeIndex(['2011-12', + '2012-01', + '2012-02'])) + series_monthly.index.resolution + series_monthly['2011-12'] # returns Series - dft_minute.loc['2011-12-31 23:59'] Datetime Indexing ~~~~~~~~~~~~~~~~~ -Indexing a ``DateTimeIndex`` with a partial string depends on the "accuracy" of the period, in other words how specific the interval is in relation to the frequency of the index. In contrast, indexing with datetime objects is exact, because the objects have exact meaning. These also follow the semantics of *including both endpoints*. +As discussed in previous section, indexing a ``DateTimeIndex`` with a partial string depends on the "accuracy" of the period, in other words how specific the interval is in relation to the resolution of the index. In contrast, indexing with datetime objects is exact, because the objects have exact meaning. These also follow the semantics of *including both endpoints*. These ``datetime`` objects are specific ``hours, minutes,`` and ``seconds`` even though they were not explicitly specified (they are ``0``). From 0814e5b9587819c51b3c6009bd7e84c6b6d156c1 Mon Sep 17 00:00:00 2001 From: "Ilya V. Schurov" Date: Tue, 20 Dec 2016 00:56:50 +0300 Subject: [PATCH 10/10] - Addressing code review: added reference to new docs section in whatsnew. --- doc/source/whatsnew/v0.20.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2c355b5c917ec..54b0c316c3f5f 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -193,7 +193,7 @@ Map on Index types now return other Index types Other API Changes ^^^^^^^^^^^^^^^^^ -- :ref:`DatetimeIndex Partial String Indexing ` now works as exact match provided that string resolution coincides with index resolution (:issue:`14826`). +- :ref:`DatetimeIndex Partial String Indexing ` now works as exact match provided that string resolution coincides with index resolution, including a case when both are seconds (:issue:`14826`). See :ref:`Slice vs. Exact Match ` for details. .. ipython:: python