Skip to content

Commit 828a218

Browse files
committed
ENH: add lite frequency inference to DatetimeIndex setops, close #998
1 parent 6532671 commit 828a218

File tree

3 files changed

+63
-24
lines changed

3 files changed

+63
-24
lines changed

pandas/tests/test_frame.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2511,7 +2511,9 @@ def test_to_csv_multiindex(self):
25112511
# empty
25122512
tsframe[:0].to_csv(path)
25132513
recons = DataFrame.from_csv(path)
2514-
assert_frame_equal(recons, tsframe[:0])
2514+
exp = tsframe[:0]
2515+
exp.index = []
2516+
assert_frame_equal(recons, exp)
25152517

25162518
def test_to_csv_float32_nanrep(self):
25172519
df = DataFrame(np.random.randn(1, 4).astype(np.float32))

pandas/tseries/index.py

+38-23
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
import numpy as np
55

66
from pandas.core.index import Index, Int64Index
7-
from pandas.tseries.frequencies import infer_freq
8-
from pandas.tseries.tools import parse_time_string
7+
from pandas.tseries.frequencies import infer_freq, to_offset
8+
from pandas.tseries.offsets import DateOffset, generate_range, Tick
9+
from pandas.tseries.tools import parse_time_string, normalize_date
910
from pandas.util.decorators import cache_readonly
1011
import pandas.core.common as com
11-
import pandas.core.datetools as datetools
1212
import pandas.tseries.tools as tools
1313

1414
from pandas._engines import DatetimeEngine
@@ -170,19 +170,19 @@ def __new__(cls, data=None,
170170
freq = kwds['offset']
171171
warn = True
172172

173-
if not isinstance(freq, datetools.DateOffset):
174-
freq = datetools.to_offset(freq)
173+
if not isinstance(freq, DateOffset):
174+
freq = to_offset(freq)
175175

176176
if warn:
177177
import warnings
178178
warnings.warn("parameter 'offset' is deprecated, "
179179
"please use 'freq' instead",
180180
FutureWarning)
181181
if isinstance(freq, basestring):
182-
freq = datetools.get_offset(freq)
182+
freq = to_offset(freq)
183183
else:
184184
if isinstance(freq, basestring):
185-
freq = datetools.to_offset(freq)
185+
freq = to_offset(freq)
186186

187187
offset = freq
188188

@@ -200,7 +200,7 @@ def __new__(cls, data=None,
200200
% start)
201201

202202
if normalize:
203-
start = datetools.normalize_date(start)
203+
start = normalize_date(start)
204204
_normalized = True
205205
else:
206206
_normalized = _normalized and start.time() == _midnight
@@ -212,7 +212,7 @@ def __new__(cls, data=None,
212212
% end)
213213

214214
if normalize:
215-
end = datetools.normalize_date(end)
215+
end = normalize_date(end)
216216
_normalized = True
217217
else:
218218
_normalized = _normalized and end.time() == _midnight
@@ -221,7 +221,7 @@ def __new__(cls, data=None,
221221

222222
if (offset._should_cache() and
223223
not (offset._normalize_cache and not _normalized) and
224-
datetools._naive_in_cache_range(start, end)):
224+
_naive_in_cache_range(start, end)):
225225
index = cls._cached_range(start, end, periods=periods,
226226
offset=offset, name=name)
227227
else:
@@ -310,8 +310,8 @@ def _cached_range(cls, start=None, end=None, periods=None, offset=None,
310310

311311
drc = _daterange_cache
312312
if offset not in _daterange_cache:
313-
xdr = datetools.generate_range(offset=offset,
314-
start=_CACHE_START, end=_CACHE_END)
313+
xdr = generate_range(offset=offset, start=_CACHE_START,
314+
end=_CACHE_END)
315315

316316
arr = np.array(_to_m8_array(list(xdr)),
317317
dtype='M8[us]', copy=False)
@@ -418,7 +418,7 @@ def __setstate__(self, state):
418418
def __add__(self, other):
419419
if isinstance(other, Index):
420420
return self.union(other)
421-
elif isinstance(other, (datetools.DateOffset, timedelta)):
421+
elif isinstance(other, (DateOffset, timedelta)):
422422
new_values = self.astype('O') + other
423423
return DatetimeIndex(new_values, tz=self.tz)
424424
else:
@@ -494,7 +494,7 @@ def snap(self, freq='S'):
494494
495495
"""
496496
# Superdumb, punting on any optimizing
497-
freq = datetools.to_offset(freq)
497+
freq = to_offset(freq)
498498

499499
snapped = np.empty(len(self), dtype='M8[us]')
500500

@@ -580,7 +580,10 @@ def union(self, other):
580580
return this._fast_union(other)
581581
else:
582582
result = Index.union(this, other)
583-
result.tz = self.tz
583+
if isinstance(result, DatetimeIndex):
584+
result.tz = self.tz
585+
if result.freq is None:
586+
result.offset = to_offset(result.inferred_freq)
584587
return result
585588

586589
def join(self, other, how='left', level=None, return_indexers=False):
@@ -695,10 +698,19 @@ def intersection(self, other):
695698
other = DatetimeIndex(other)
696699
except TypeError:
697700
pass
698-
return Index.intersection(self, other)
701+
result = Index.intersection(self, other)
702+
if isinstance(result, DatetimeIndex):
703+
if result.freq is None:
704+
result.offset = to_offset(result.inferred_freq)
705+
return result
706+
699707
elif other.offset != self.offset or (not self.is_monotonic or
700708
not other.is_monotonic):
701-
return Index.intersection(self, other)
709+
result = Index.intersection(self, other)
710+
if isinstance(result, DatetimeIndex):
711+
if result.freq is None:
712+
result.offset = to_offset(result.inferred_freq)
713+
return result
702714

703715
# to make our life easier, "sort" the two ranges
704716
if self[0] <= other[0]:
@@ -756,7 +768,7 @@ def get_value(self, series, key):
756768
except KeyError:
757769

758770
try:
759-
asdt, parsed, reso = datetools.parse_time_string(key)
771+
asdt, parsed, reso = parse_time_string(key)
760772
key = asdt
761773
loc = self._partial_date_slice(reso, parsed)
762774
return series[loc]
@@ -792,7 +804,7 @@ def get_loc(self, key):
792804
raise KeyError(stamp)
793805

794806
def _get_string_slice(self, key):
795-
asdt, parsed, reso = datetools.parse_time_string(key)
807+
asdt, parsed, reso = parse_time_string(key)
796808
key = asdt
797809
loc = self._partial_date_slice(reso, parsed)
798810
return loc
@@ -858,7 +870,10 @@ def freq(self):
858870

859871
@cache_readonly
860872
def inferred_freq(self):
861-
return infer_freq(self)
873+
try:
874+
return infer_freq(self)
875+
except ValueError:
876+
return None
862877

863878
@property
864879
def freqstr(self):
@@ -1020,7 +1035,7 @@ def tz_validate(self):
10201035

10211036
# See if there are any DST resolution problems
10221037
try:
1023-
lib.tz_localize_array(self.asi8, self.tz)
1038+
lib.tz_localize(self.asi8, self.tz)
10241039
except:
10251040
return False
10261041

@@ -1031,7 +1046,7 @@ def _generate_regular_range(start, end, periods, offset):
10311046
if com._count_not_none(start, end, periods) < 2:
10321047
raise ValueError('Must specify two of start, end, or periods')
10331048

1034-
if isinstance(offset, datetools.Tick):
1049+
if isinstance(offset, Tick):
10351050
stride = offset.us_stride()
10361051
if periods is None:
10371052
b = Timestamp(start).value
@@ -1049,7 +1064,7 @@ def _generate_regular_range(start, end, periods, offset):
10491064
data = np.arange(b, e, stride, dtype=np.int64)
10501065
data = data.view('M8[us]')
10511066
else:
1052-
xdr = datetools.generate_range(start=start, end=end,
1067+
xdr = generate_range(start=start, end=end,
10531068
periods=periods, offset=offset)
10541069

10551070
data = np.array(list(xdr), dtype='M8[us]')

pandas/tseries/tests/test_timeseries.py

+22
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,27 @@ def test_shift_multiple_of_same_base(self):
725725

726726
self.assert_(result.index.equals(exp_index))
727727

728+
def test_setops_preserve_freq(self):
729+
rng = date_range('1/1/2000', '1/1/2002')
730+
731+
result = rng[:50].union(rng[50:100])
732+
self.assert_(result.freq == rng.freq)
733+
734+
result = rng[:50].union(rng[30:100])
735+
self.assert_(result.freq == rng.freq)
736+
737+
result = rng[:50].union(rng[60:100])
738+
self.assert_(result.freq is None)
739+
740+
result = rng[:50].intersection(rng[25:75])
741+
self.assert_(result.freqstr == 'D')
742+
743+
nofreq = DatetimeIndex(list(rng[25:75]))
744+
result = rng[:50].union(nofreq)
745+
self.assert_(result.freq == rng.freq)
746+
747+
result = rng[:50].intersection(nofreq)
748+
self.assert_(result.freq == rng.freq)
728749

729750
class TestTimeZones(unittest.TestCase):
730751

@@ -797,6 +818,7 @@ def test_intersection(self):
797818
result = left.intersection(right)
798819
self.assert_(result.tz == left.tz)
799820

821+
800822
class TestLegacyCompat(unittest.TestCase):
801823

802824
def setUp(self):

0 commit comments

Comments
 (0)