Skip to content

Commit eadc308

Browse files
committed
ERR/ENH: between_time checks argument types and new to_time function
1 parent a1e7d53 commit eadc308

File tree

6 files changed

+296
-78
lines changed

6 files changed

+296
-78
lines changed

doc/source/whatsnew/v0.18.0.txt

+13
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,19 @@ other anchored offsets like ``MonthBegin`` and ``YearBegin``.
266266
Other API Changes
267267
^^^^^^^^^^^^^^^^^
268268

269+
- ``DataFrame.between_time`` and ``Series.between_time`` now only parse a fixed set of time strings. Parsing
270+
of date strings is no longer supported and raises a ValueError. (:issue:`11818`)
271+
272+
.. code-block:: python
273+
274+
In [3]: s = pd.Series(range(10), pd.date_range('2015-01-01', freq='H', periods=10))
275+
276+
In [4]: s.between_time("7:00am", "9:00am")
277+
Out[4]:
278+
2015-01-01 07:00:00 7
279+
2015-01-01 08:00:00 8
280+
2015-01-01 09:00:00 9
281+
Freq: H, dtype: int64
269282

270283

271284

pandas/tseries/index.py

+15-19
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
Resolution)
2222
from pandas.tseries.base import DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin
2323
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
24-
from pandas.tseries.tools import parse_time_string, normalize_date
24+
from pandas.tseries.tools import parse_time_string, normalize_date, to_time
2525
from pandas.tseries.timedeltas import to_timedelta
2626
from pandas.util.decorators import cache_readonly, deprecate_kwarg
2727
import pandas.core.common as com
@@ -109,12 +109,12 @@ def _ensure_datetime64(other):
109109
return other
110110
raise TypeError('%s type object %s' % (type(other), str(other)))
111111

112-
113112
_midnight = time(0, 0)
114113

114+
115115
def _new_DatetimeIndex(cls, d):
116-
""" This is called upon unpickling, rather than the default which doesn't have arguments
117-
and breaks __new__ """
116+
""" This is called upon unpickling, rather than the default which doesn't
117+
have arguments and breaks __new__ """
118118

119119
# data are already in UTC
120120
# so need to localize
@@ -1755,12 +1755,18 @@ def indexer_at_time(self, time, asof=False):
17551755
def indexer_between_time(self, start_time, end_time, include_start=True,
17561756
include_end=True):
17571757
"""
1758-
Select values between particular times of day (e.g., 9:00-9:30AM)
1758+
Select values between particular times of day (e.g., 9:00-9:30AM).
1759+
1760+
Return values of the index between two times. If start_time or
1761+
end_time are strings then tseres.tools.to_time is used to convert to
1762+
a time object.
17591763
17601764
Parameters
17611765
----------
1762-
start_time : datetime.time or string
1763-
end_time : datetime.time or string
1766+
start_time, end_time : datetime.time, str
1767+
datetime.time or string in appropriate format ("%H:%M", "%H%M",
1768+
"%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
1769+
"%I%M%S%p")
17641770
include_start : boolean, default True
17651771
include_end : boolean, default True
17661772
tz : string or pytz.timezone or dateutil.tz.tzfile, default None
@@ -1769,18 +1775,8 @@ def indexer_between_time(self, start_time, end_time, include_start=True,
17691775
-------
17701776
values_between_time : TimeSeries
17711777
"""
1772-
from dateutil.parser import parse
1773-
1774-
if isinstance(start_time, compat.string_types):
1775-
start_time = parse(start_time).time()
1776-
1777-
if isinstance(end_time, compat.string_types):
1778-
end_time = parse(end_time).time()
1779-
1780-
if start_time.tzinfo or end_time.tzinfo:
1781-
raise NotImplementedError("argument 'time' with timezone info is "
1782-
"not supported")
1783-
1778+
start_time = to_time(start_time)
1779+
end_time = to_time(end_time)
17841780
time_micros = self._get_time_micros()
17851781
start_micros = _time_to_micros(start_time)
17861782
end_micros = _time_to_micros(end_time)

pandas/tseries/tests/test_timeseries.py

+64-28
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,27 @@
55
import operator
66
import warnings
77
import nose
8-
98
import numpy as np
10-
randn = np.random.randn
11-
9+
import pandas.tseries.frequencies as frequencies
10+
import pandas.lib as lib
11+
import pandas.tslib as tslib
12+
import pandas.index as _index
13+
import pandas as pd
1214
from pandas import (Index, Series, DataFrame,
1315
isnull, date_range, Timestamp, Period, DatetimeIndex,
1416
Int64Index, to_datetime, bdate_range, Float64Index,
15-
TimedeltaIndex, NaT, timedelta_range, Timedelta)
17+
NaT, timedelta_range, Timedelta)
1618

1719
import pandas.core.datetools as datetools
1820
import pandas.tseries.offsets as offsets
1921
import pandas.tseries.tools as tools
20-
import pandas.tseries.frequencies as frequencies
21-
import pandas as pd
2222

23-
from pandas.util.testing import assert_series_equal, assert_almost_equal
24-
import pandas.util.testing as tm
2523

26-
from pandas.tslib import NaT, iNaT
27-
import pandas.lib as lib
28-
import pandas.tslib as tslib
24+
from pandas.util.testing import assert_series_equal, assert_almost_equal,\
25+
_skip_if_has_locale
26+
import pandas.util.testing as tm
2927

30-
import pandas.index as _index
28+
from pandas.tslib import iNaT
3129

3230
from pandas.compat import range, long, StringIO, lrange, lmap, zip, product
3331
from numpy.random import rand
@@ -40,12 +38,7 @@
4038

4139
from numpy.testing.decorators import slow
4240

43-
44-
def _skip_if_has_locale():
45-
import locale
46-
lang, _ = locale.getlocale()
47-
if lang is not None:
48-
raise nose.SkipTest("Specific locale is set {0}".format(lang))
41+
randn = np.random.randn
4942

5043

5144
class TestTimeSeriesDuplicates(tm.TestCase):
@@ -93,7 +86,8 @@ def test_index_unique(self):
9386
self.assertEqual(idx.nunique(), 20)
9487
self.assertEqual(idx.nunique(dropna=False), 21)
9588

96-
arr = [ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT]
89+
arr = [Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for
90+
t in range(20) ] + [NaT]
9791
idx = DatetimeIndex(arr * 3)
9892
self.assertTrue(idx.unique().equals(DatetimeIndex(arr)))
9993
self.assertEqual(idx.nunique(), 20)
@@ -258,23 +252,29 @@ def test_indexing(self):
258252
assert_series_equal(expected, result)
259253

260254
# GH3546 (not including times on the last day)
261-
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', freq='H')
255+
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00',
256+
freq='H')
262257
ts = Series(lrange(len(idx)), index=idx)
263258
expected = ts['2013-05']
264259
assert_series_equal(expected, ts)
265260

266-
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', freq='S')
261+
idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59',
262+
freq='S')
267263
ts = Series(lrange(len(idx)), index=idx)
268264
expected = ts['2013-05']
269265
assert_series_equal(expected,ts)
270266

271-
idx = [ Timestamp('2013-05-31 00:00'), Timestamp(datetime(2013,5,31,23,59,59,999999))]
272-
ts = Series(lrange(len(idx)), index=idx)
267+
idx = [Timestamp('2013-05-31 00:00'),
268+
Timestamp(datetime(2013,5,31,23,59,59,999999))]
269+
ts = Series(lrange(len(idx)), index=idx)
273270
expected = ts['2013']
274271
assert_series_equal(expected,ts)
275272

276273
# GH 3925, indexing with a seconds resolution string / datetime object
277-
df = DataFrame(randn(5,5),columns=['open','high','low','close','volume'],index=date_range('2012-01-02 18:01:00',periods=5,tz='US/Central',freq='s'))
274+
df = DataFrame(randn(5,5),
275+
columns=['open', 'high', 'low', 'close', 'volume'],
276+
index=date_range('2012-01-02 18:01:00',
277+
periods=5, tz='US/Central', freq='s'))
278278
expected = df.loc[[df.index[2]]]
279279
result = df['2012-01-02 18:01:02']
280280
assert_frame_equal(result,expected)
@@ -283,14 +283,16 @@ def test_indexing(self):
283283
self.assertRaises(KeyError, df.__getitem__, df.index[2],)
284284

285285
def test_recreate_from_data(self):
286-
freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N', 'C']
286+
freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T',
287+
'S', 'L', 'U', 'H', 'N', 'C']
287288

288289
for f in freqs:
289290
org = DatetimeIndex(start='2001/02/01 09:00', freq=f, periods=1)
290291
idx = DatetimeIndex(org, freq=f)
291292
self.assertTrue(idx.equals(org))
292293

293-
org = DatetimeIndex(start='2001/02/01 09:00', freq=f, tz='US/Pacific', periods=1)
294+
org = DatetimeIndex(start='2001/02/01 09:00', freq=f,
295+
tz='US/Pacific', periods=1)
294296
idx = DatetimeIndex(org, freq=f, tz='US/Pacific')
295297
self.assertTrue(idx.equals(org))
296298

@@ -459,7 +461,8 @@ def _check_rng(rng):
459461
self.assertEqual(x.tzinfo, stamp.tzinfo)
460462

461463
rng = date_range('20090415', '20090519')
462-
rng_eastern = date_range('20090415', '20090519', tz=pytz.timezone('US/Eastern'))
464+
rng_eastern = date_range('20090415', '20090519',
465+
tz=pytz.timezone('US/Eastern'))
463466
rng_utc = date_range('20090415', '20090519', tz=pytz.utc)
464467

465468
_check_rng(rng)
@@ -479,7 +482,8 @@ def _check_rng(rng):
479482
self.assertEqual(x.tzinfo, stamp.tzinfo)
480483

481484
rng = date_range('20090415', '20090519')
482-
rng_eastern = date_range('20090415', '20090519', tz='dateutil/US/Eastern')
485+
rng_eastern = date_range('20090415', '20090519',
486+
tz='dateutil/US/Eastern')
483487
rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc())
484488

485489
_check_rng(rng)
@@ -1524,6 +1528,38 @@ def test_between_time_frame(self):
15241528
else:
15251529
self.assertTrue((t < etime) or (t >= stime))
15261530

1531+
def test_between_time_types(self):
1532+
# GH11818
1533+
rng = date_range('1/1/2000', '1/5/2000', freq='5min')
1534+
self.assertRaises(ValueError, rng.indexer_between_time,
1535+
datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
1536+
1537+
frame = DataFrame({'A': 0}, index=rng)
1538+
self.assertRaises(ValueError, frame.between_time,
1539+
datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
1540+
1541+
series = Series(0, index=rng)
1542+
self.assertRaises(ValueError, series.between_time,
1543+
datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
1544+
1545+
def test_between_time_formats(self):
1546+
# GH11818
1547+
_skip_if_has_locale()
1548+
1549+
rng = date_range('1/1/2000', '1/5/2000', freq='5min')
1550+
ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
1551+
1552+
strings = [("2:00", "2:30"), ("0200", "0230"),
1553+
("2:00am", "2:30am"), ("0200am", "0230am"),
1554+
("2:00:00", "2:30:00"), ("020000", "023000"),
1555+
("2:00:00am", "2:30:00am"), ("020000am", "023000am")]
1556+
expected_length = 28
1557+
1558+
for time_string in strings:
1559+
self.assertEqual(len(ts.between_time(*time_string)),
1560+
expected_length,
1561+
"%s - %s" % time_string)
1562+
15271563
def test_dti_constructor_preserve_dti_freq(self):
15281564
rng = date_range('1/1/2000', '1/2/2000', freq='5min')
15291565

pandas/tseries/tests/test_tslib.py

+36-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515
import pandas.tseries.offsets as offsets
1616
import pandas.util.testing as tm
1717
import pandas.compat as compat
18-
from pandas.util.testing import assert_series_equal
19-
import pandas.compat as compat
18+
from pandas.util.testing import assert_series_equal, _skip_if_has_locale
2019

2120

2221
class TestTimestamp(tm.TestCase):
@@ -617,6 +616,41 @@ def test_parsers_timestring(self):
617616
self.assertEqual(result4, exp_now)
618617
self.assertEqual(result5, exp_now)
619618

619+
def test_parsers_time(self):
620+
# GH11818
621+
_skip_if_has_locale()
622+
strings = ["14:15", "1415", "2:15pm", "0215pm", "14:15:00", "141500",
623+
"2:15:00pm", "021500pm", datetime.time(14, 15)]
624+
expected = datetime.time(14, 15)
625+
626+
for time_string in strings:
627+
self.assertEqual(tools.to_time(time_string), expected)
628+
629+
new_string = "14.15"
630+
self.assertRaises(ValueError, tools.to_time, new_string)
631+
self.assertEqual(tools.to_time(new_string, format="%H.%M"), expected)
632+
tools.add_time_format("%H.%M")
633+
self.assertEqual(tools.to_time(new_string), expected)
634+
635+
arg = ["14:15", "20:20"]
636+
expected_arr = [datetime.time(14, 15), datetime.time(20, 20)]
637+
self.assertEqual(tools.to_time(arg), expected_arr)
638+
self.assertEqual(tools.to_time(arg, format="%H:%M"), expected_arr)
639+
self.assertEqual(tools.to_time(arg, infer_time_format=True),
640+
expected_arr)
641+
self.assertEqual(tools.to_time(arg, format="%I:%M%p", errors="coerce"),
642+
[None, None])
643+
self.assert_numpy_array_equal(tools.to_time(arg, format="%I:%M%p",
644+
errors="ignore"),
645+
np.array(arg))
646+
self.assertRaises(ValueError, lambda: tools.to_time(arg,
647+
format="%I:%M%p",
648+
errors="raise"))
649+
self.assert_series_equal(tools.to_time(Series(arg, name="test")),
650+
Series(expected_arr, name="test"))
651+
self.assert_numpy_array_equal(tools.to_time(np.array(arg)),
652+
np.array(expected_arr))
653+
620654
def test_parsers_monthfreq(self):
621655
cases = {'201101': datetime.datetime(2011, 1, 1, 0, 0),
622656
'200005': datetime.datetime(2000, 5, 1, 0, 0)}

0 commit comments

Comments
 (0)