Skip to content

ERR: between_time now checks for argument types #11832

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,19 @@ other anchored offsets like ``MonthBegin`` and ``YearBegin``.
Other API Changes
^^^^^^^^^^^^^^^^^

- ``DataFrame.between_time`` and ``Series.between_time`` now only parse a fixed set of time strings. Parsing
of date strings is no longer supported and raises a ValueError. (:issue:`11818`)

.. code-block:: python

In [3]: s = pd.Series(range(10), pd.date_range('2015-01-01', freq='H', periods=10))

In [4]: s.between_time("7:00am", "9:00am")
Out[4]:
2015-01-01 07:00:00 7
2015-01-01 08:00:00 8
2015-01-01 09:00:00 9
Freq: H, dtype: int64



Expand Down
34 changes: 15 additions & 19 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
Resolution)
from pandas.tseries.base import DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
from pandas.tseries.tools import parse_time_string, normalize_date
from pandas.tseries.tools import parse_time_string, normalize_date, to_time
from pandas.tseries.timedeltas import to_timedelta
from pandas.util.decorators import cache_readonly, deprecate_kwarg
import pandas.core.common as com
Expand Down Expand Up @@ -109,12 +109,12 @@ def _ensure_datetime64(other):
return other
raise TypeError('%s type object %s' % (type(other), str(other)))


_midnight = time(0, 0)


def _new_DatetimeIndex(cls, d):
""" This is called upon unpickling, rather than the default which doesn't have arguments
and breaks __new__ """
""" This is called upon unpickling, rather than the default which doesn't
have arguments and breaks __new__ """

# data are already in UTC
# so need to localize
Expand Down Expand Up @@ -1755,12 +1755,18 @@ def indexer_at_time(self, time, asof=False):
def indexer_between_time(self, start_time, end_time, include_start=True,
include_end=True):
"""
Select values between particular times of day (e.g., 9:00-9:30AM)
Select values between particular times of day (e.g., 9:00-9:30AM).
Return values of the index between two times. If start_time or
end_time are strings then tseres.tools.to_time is used to convert to
a time object.
Parameters
----------
start_time : datetime.time or string
end_time : datetime.time or string
start_time, end_time : datetime.time, str
datetime.time or string in appropriate format ("%H:%M", "%H%M",
"%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
"%I%M%S%p")
include_start : boolean, default True
include_end : boolean, default True
tz : string or pytz.timezone or dateutil.tz.tzfile, default None
Expand All @@ -1769,18 +1775,8 @@ def indexer_between_time(self, start_time, end_time, include_start=True,
-------
values_between_time : TimeSeries
"""
from dateutil.parser import parse

if isinstance(start_time, compat.string_types):
start_time = parse(start_time).time()

if isinstance(end_time, compat.string_types):
end_time = parse(end_time).time()

if start_time.tzinfo or end_time.tzinfo:
raise NotImplementedError("argument 'time' with timezone info is "
"not supported")

start_time = to_time(start_time)
end_time = to_time(end_time)
time_micros = self._get_time_micros()
start_micros = _time_to_micros(start_time)
end_micros = _time_to_micros(end_time)
Expand Down
92 changes: 64 additions & 28 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,27 @@
import operator
import warnings
import nose

import numpy as np
randn = np.random.randn

import pandas.tseries.frequencies as frequencies
import pandas.lib as lib
import pandas.tslib as tslib
import pandas.index as _index
import pandas as pd
from pandas import (Index, Series, DataFrame,
isnull, date_range, Timestamp, Period, DatetimeIndex,
Int64Index, to_datetime, bdate_range, Float64Index,
TimedeltaIndex, NaT, timedelta_range, Timedelta)
NaT, timedelta_range, Timedelta)

import pandas.core.datetools as datetools
import pandas.tseries.offsets as offsets
import pandas.tseries.tools as tools
import pandas.tseries.frequencies as frequencies
import pandas as pd

from pandas.util.testing import assert_series_equal, assert_almost_equal
import pandas.util.testing as tm

from pandas.tslib import NaT, iNaT
import pandas.lib as lib
import pandas.tslib as tslib
from pandas.util.testing import assert_series_equal, assert_almost_equal,\
_skip_if_has_locale
import pandas.util.testing as tm

import pandas.index as _index
from pandas.tslib import iNaT

from pandas.compat import range, long, StringIO, lrange, lmap, zip, product
from numpy.random import rand
Expand All @@ -40,12 +38,7 @@

from numpy.testing.decorators import slow


def _skip_if_has_locale():
import locale
lang, _ = locale.getlocale()
if lang is not None:
raise nose.SkipTest("Specific locale is set {0}".format(lang))
randn = np.random.randn


class TestTimeSeriesDuplicates(tm.TestCase):
Expand Down Expand Up @@ -93,7 +86,8 @@ def test_index_unique(self):
self.assertEqual(idx.nunique(), 20)
self.assertEqual(idx.nunique(dropna=False), 21)

arr = [ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT]
arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for
t in range(20) ] + [NaT]
idx = DatetimeIndex(arr * 3)
self.assertTrue(idx.unique().equals(DatetimeIndex(arr)))
self.assertEqual(idx.nunique(), 20)
Expand Down Expand Up @@ -258,23 +252,29 @@ def test_indexing(self):
assert_series_equal(expected, result)

# GH3546 (not including times on the last day)
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', freq='H')
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00',
freq='H')
ts = Series(lrange(len(idx)), index=idx)
expected = ts['2013-05']
assert_series_equal(expected, ts)

idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', freq='S')
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59',
freq='S')
ts = Series(lrange(len(idx)), index=idx)
expected = ts['2013-05']
assert_series_equal(expected,ts)

idx = [ Timestamp('2013-05-31 00:00'), Timestamp(datetime(2013,5,31,23,59,59,999999))]
ts = Series(lrange(len(idx)), index=idx)
idx = [Timestamp('2013-05-31 00:00'),
Timestamp(datetime(2013,5,31,23,59,59,999999))]
ts = Series(lrange(len(idx)), index=idx)
expected = ts['2013']
assert_series_equal(expected,ts)

# GH 3925, indexing with a seconds resolution string / datetime object
df = DataFrame(randn(5,5),columns=['open','high','low','close','volume'],index=date_range('2012-01-02 18:01:00',periods=5,tz='US/Central',freq='s'))
df = DataFrame(randn(5,5),
columns=['open', 'high', 'low', 'close', 'volume'],
index=date_range('2012-01-02 18:01:00',
periods=5, tz='US/Central', freq='s'))
expected = df.loc[[df.index[2]]]
result = df['2012-01-02 18:01:02']
assert_frame_equal(result,expected)
Expand All @@ -283,14 +283,16 @@ def test_indexing(self):
self.assertRaises(KeyError, df.__getitem__, df.index[2],)

def test_recreate_from_data(self):
freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', 'C']
freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T',
'S', 'L', 'U', 'H', 'N', 'C']

for f in freqs:
org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1)
idx = DatetimeIndex(org, freq=f)
self.assertTrue(idx.equals(org))

org = DatetimeIndex(start='2001/02/01 09:00', freq=f, tz='US/Pacific', periods=1)
org = DatetimeIndex(start='2001/02/01 09:00', freq=f,
tz='US/Pacific', periods=1)
idx = DatetimeIndex(org, freq=f, tz='US/Pacific')
self.assertTrue(idx.equals(org))

Expand Down Expand Up @@ -459,7 +461,8 @@ def _check_rng(rng):
self.assertEqual(x.tzinfo, stamp.tzinfo)

rng = date_range('20090415', '20090519')
rng_eastern = date_range('20090415', '20090519', tz=pytz.timezone('US/Eastern'))
rng_eastern = date_range('20090415', '20090519',
tz=pytz.timezone('US/Eastern'))
rng_utc = date_range('20090415', '20090519', tz=pytz.utc)

_check_rng(rng)
Expand All @@ -479,7 +482,8 @@ def _check_rng(rng):
self.assertEqual(x.tzinfo, stamp.tzinfo)

rng = date_range('20090415', '20090519')
rng_eastern = date_range('20090415', '20090519', tz='dateutil/US/Eastern')
rng_eastern = date_range('20090415', '20090519',
tz='dateutil/US/Eastern')
rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc())

_check_rng(rng)
Expand Down Expand Up @@ -1524,6 +1528,38 @@ def test_between_time_frame(self):
else:
self.assertTrue((t < etime) or (t >= stime))

def test_between_time_types(self):
# GH11818
rng = date_range('1/1/2000', '1/5/2000', freq='5min')
self.assertRaises(ValueError, rng.indexer_between_time,
datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))

frame = DataFrame({'A': 0}, index=rng)
self.assertRaises(ValueError, frame.between_time,
datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))

series = Series(0, index=rng)
self.assertRaises(ValueError, series.between_time,
datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))

def test_between_time_formats(self):
# GH11818
_skip_if_has_locale()

rng = date_range('1/1/2000', '1/5/2000', freq='5min')
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

strings = [("2:00", "2:30"), ("0200", "0230"),
("2:00am", "2:30am"), ("0200am", "0230am"),
("2:00:00", "2:30:00"), ("020000", "023000"),
("2:00:00am", "2:30:00am"), ("020000am", "023000am")]
expected_length = 28

for time_string in strings:
self.assertEqual(len(ts.between_time(*time_string)),
expected_length,
"%s - %s" % time_string)

def test_dti_constructor_preserve_dti_freq(self):
rng = date_range('1/1/2000', '1/2/2000', freq='5min')

Expand Down
38 changes: 36 additions & 2 deletions pandas/tseries/tests/test_tslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
import pandas.tseries.offsets as offsets
import pandas.util.testing as tm
import pandas.compat as compat
from pandas.util.testing import assert_series_equal
import pandas.compat as compat
from pandas.util.testing import assert_series_equal, _skip_if_has_locale


class TestTimestamp(tm.TestCase):
Expand Down Expand Up @@ -617,6 +616,41 @@ def test_parsers_timestring(self):
self.assertEqual(result4, exp_now)
self.assertEqual(result5, exp_now)

def test_parsers_time(self):
# GH11818
_skip_if_has_locale()
strings = ["14:15", "1415", "2:15pm", "0215pm", "14:15:00", "141500",
"2:15:00pm", "021500pm", datetime.time(14, 15)]
expected = datetime.time(14, 15)

for time_string in strings:
self.assertEqual(tools.to_time(time_string), expected)

new_string = "14.15"
self.assertRaises(ValueError, tools.to_time, new_string)
self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected)
tools.add_time_format("%H.%M")
self.assertEqual(tools.to_time(new_string), expected)

arg = ["14:15", "20:20"]
expected_arr = [datetime.time(14, 15), datetime.time(20, 20)]
self.assertEqual(tools.to_time(arg), expected_arr)
self.assertEqual(tools.to_time(arg, format="%H:%M"), expected_arr)
self.assertEqual(tools.to_time(arg, infer_time_format=True),
expected_arr)
self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"),
[None, None])
self.assert_numpy_array_equal(tools.to_time(arg, format="%I:%M%p",
errors="ignore"),
np.array(arg))
self.assertRaises(ValueError, lambda: tools.to_time(arg,
format="%I:%M%p",
errors="raise"))
self.assert_series_equal(tools.to_time(Series(arg, name="test")),
Series(expected_arr, name="test"))
self.assert_numpy_array_equal(tools.to_time(np.array(arg)),
np.array(expected_arr))

def test_parsers_monthfreq(self):
cases = {'201101': datetime.datetime(2011, 1, 1, 0, 0),
'200005': datetime.datetime(2000, 5, 1, 0, 0)}
Expand Down
Loading