Skip to content

Commit 04d4609

Browse files
committed
REF: more nanosecond support fixes, test suite passes #1238
1 parent af2a46f commit 04d4609

20 files changed

+180
-79
lines changed

pandas/core/algorithms.py

+8
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
108108
Returns
109109
-------
110110
"""
111+
values = np.asarray(values)
112+
is_datetime = com.is_datetime64_dtype(values)
111113
hash_klass, values = _get_data_algo(values, _hashtables)
112114

113115
uniques = []
@@ -129,6 +131,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
129131
uniques = uniques.take(sorter)
130132
counts = counts.take(sorter)
131133

134+
if is_datetime:
135+
uniques = np.array(uniques, dtype='M8[ns]')
136+
132137
return labels, uniques, counts
133138

134139
def value_counts(values, sort=True, ascending=False):
@@ -179,6 +184,9 @@ def _get_data_algo(values, func_map):
179184
if com.is_float_dtype(values):
180185
f = func_map['float64']
181186
values = com._ensure_float64(values)
187+
elif com.is_datetime64_dtype(values):
188+
f = func_map['int64']
189+
values = values.view('i8')
182190
elif com.is_integer_dtype(values):
183191
f = func_map['int64']
184192
values = com._ensure_int64(values)

pandas/core/common.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def wrapper(arr, indexer, out, fill_value=np.nan):
171171
'int64' : _algos.take_1d_int64,
172172
'object' : _algos.take_1d_object,
173173
'bool' : _view_wrapper(_algos.take_1d_bool, np.uint8),
174-
'datetime64[us]' : _view_wrapper(_algos.take_1d_int64, np.int64,
174+
'datetime64[ns]' : _view_wrapper(_algos.take_1d_int64, np.int64,
175175
na_override=lib.NaT),
176176
}
177177

@@ -181,7 +181,7 @@ def wrapper(arr, indexer, out, fill_value=np.nan):
181181
'int64' : _algos.take_2d_axis0_int64,
182182
'object' : _algos.take_2d_axis0_object,
183183
'bool' : _view_wrapper(_algos.take_2d_axis0_bool, np.uint8),
184-
'datetime64[us]' : _view_wrapper(_algos.take_2d_axis0_int64, np.int64,
184+
'datetime64[ns]' : _view_wrapper(_algos.take_2d_axis0_int64, np.int64,
185185
na_override=lib.NaT),
186186
}
187187

@@ -191,7 +191,7 @@ def wrapper(arr, indexer, out, fill_value=np.nan):
191191
'int64' : _algos.take_2d_axis1_int64,
192192
'object' : _algos.take_2d_axis1_object,
193193
'bool' : _view_wrapper(_algos.take_2d_axis1_bool, np.uint8),
194-
'datetime64[us]' : _view_wrapper(_algos.take_2d_axis1_int64, np.int64,
194+
'datetime64[ns]' : _view_wrapper(_algos.take_2d_axis1_int64, np.int64,
195195
na_override=lib.NaT),
196196
}
197197

@@ -201,7 +201,7 @@ def wrapper(arr, indexer, out, fill_value=np.nan):
201201
'int64' : _algos.take_2d_multi_int64,
202202
'object' : _algos.take_2d_multi_object,
203203
'bool' : _view_wrapper(_algos.take_2d_multi_bool, np.uint8),
204-
'datetime64[us]' : _view_wrapper(_algos.take_2d_multi_int64, np.int64,
204+
'datetime64[ns]' : _view_wrapper(_algos.take_2d_multi_int64, np.int64,
205205
na_override=lib.NaT),
206206
}
207207

@@ -246,7 +246,7 @@ def take_1d(arr, indexer, out=None, fill_value=np.nan):
246246
out.dtype)
247247
out = _maybe_upcast(out)
248248
np.putmask(out, mask, fill_value)
249-
elif dtype_str in ('float64', 'object', 'datetime64[us]'):
249+
elif dtype_str in ('float64', 'object', 'datetime64[ns]'):
250250
if out is None:
251251
out = np.empty(n, dtype=arr.dtype)
252252
take_f(arr, _ensure_int64(indexer), out=out, fill_value=fill_value)
@@ -284,7 +284,7 @@ def take_2d_multi(arr, row_idx, col_idx, fill_value=np.nan):
284284
_ensure_int64(col_idx), out=out,
285285
fill_value=fill_value)
286286
return out
287-
elif dtype_str in ('float64', 'object', 'datetime64[us]'):
287+
elif dtype_str in ('float64', 'object', 'datetime64[ns]'):
288288
out = np.empty(out_shape, dtype=arr.dtype)
289289
take_f(arr, _ensure_int64(row_idx), _ensure_int64(col_idx), out=out,
290290
fill_value=fill_value)
@@ -326,7 +326,7 @@ def take_2d(arr, indexer, out=None, mask=None, needs_masking=None, axis=0,
326326
take_f = _get_take2d_function(dtype_str, axis=axis)
327327
take_f(arr, _ensure_int64(indexer), out=out, fill_value=fill_value)
328328
return out
329-
elif dtype_str in ('float64', 'object', 'datetime64[us]'):
329+
elif dtype_str in ('float64', 'object', 'datetime64[ns]'):
330330
if out is None:
331331
out = np.empty(out_shape, dtype=arr.dtype)
332332
take_f = _get_take2d_function(dtype_str, axis=axis)

pandas/core/factor.py

+11-23
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,17 @@ class Factor(np.ndarray):
1818
* levels : ndarray
1919
"""
2020
def __new__(cls, data):
21-
data = np.asarray(data, dtype=object)
22-
levels, factor = unique_with_labels(data)
23-
factor = factor.view(Factor)
24-
factor.levels = levels
25-
return factor
21+
from pandas.core.index import _ensure_index
22+
from pandas.core.algorithms import factorize
23+
24+
try:
25+
labels, levels, _ = factorize(data, sort=True)
26+
except TypeError:
27+
labels, levels, _ = factorize(data, sort=False)
28+
29+
labels = labels.view(Factor)
30+
labels.levels = _ensure_index(levels)
31+
return labels
2632

2733
levels = None
2834

@@ -51,21 +57,3 @@ def __getitem__(self, key):
5157
else:
5258
return np.ndarray.__getitem__(self, key)
5359

54-
55-
def unique_with_labels(values):
56-
from pandas.core.index import Index
57-
rizer = lib.Factorizer(len(values))
58-
labels, _ = rizer.factorize(values, sort=False)
59-
uniques = Index(rizer.uniques)
60-
labels = com._ensure_platform_int(labels)
61-
try:
62-
sorter = uniques.argsort()
63-
reverse_indexer = np.empty(len(sorter), dtype=np.int_)
64-
reverse_indexer.put(sorter, np.arange(len(sorter)))
65-
labels = reverse_indexer.take(labels)
66-
uniques = uniques.take(sorter)
67-
except TypeError:
68-
pass
69-
70-
return uniques, labels
71-

pandas/core/format.py

+20-6
Original file line numberDiff line numberDiff line change
@@ -571,16 +571,30 @@ def get_result(self):
571571
if self.formatter:
572572
formatter = self.formatter
573573
else:
574-
def formatter(x):
575-
if isnull(x):
576-
return 'NaT'
577-
else:
578-
return str(x)
574+
formatter = _format_datetime64
579575

580576
fmt_values = [formatter(x) for x in self.values]
581-
582577
return _make_fixed_width(fmt_values, self.justify)
583578

579+
def _format_datetime64(x):
580+
if isnull(x):
581+
return 'NaT'
582+
583+
stamp = lib.Timestamp(x)
584+
base = stamp.strftime('%Y-%m-%d %H:%M:%S')
585+
586+
fraction = stamp.microsecond * 1000 + stamp.nanosecond
587+
digits = 9
588+
589+
if fraction == 0:
590+
return base
591+
592+
while (fraction % 10) == 0:
593+
fraction /= 10
594+
digits -= 1
595+
596+
return base + ('.%%.%id' % digits) % fraction
597+
584598

585599
def _make_fixed_width(strings, justify='right'):
586600
if len(strings) == 0:

pandas/core/index.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -691,8 +691,8 @@ def get_indexer(self, target, method=None, limit=None):
691691
return pself.get_indexer(ptarget, method=method, limit=limit)
692692

693693
if self.dtype != target.dtype:
694-
this = Index(self, dtype=object)
695-
target = Index(target, dtype=object)
694+
this = self.astype(object)
695+
target = target.astype(object)
696696
return this.get_indexer(target, method=method, limit=limit)
697697

698698
if not self.is_unique:
@@ -1172,8 +1172,12 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None):
11721172
levels = [_ensure_index(lev) for lev in levels]
11731173
labels = [np.asarray(labs, dtype=np.int_) for labs in labels]
11741174

1175-
values = [ndtake(np.asarray(lev), lab)
1175+
values = [ndtake(lev.values, lab)
11761176
for lev, lab in zip(levels, labels)]
1177+
1178+
# Need to box timestamps, etc.
1179+
values = _clean_arrays(values)
1180+
11771181
subarr = lib.fast_zip(values).view(cls)
11781182

11791183
subarr.levels = levels
@@ -2372,3 +2376,13 @@ def _maybe_box_dtindex(idx):
23722376
return Index(_dt_box_array(idx.asi8), dtype='object')
23732377
return idx
23742378

2379+
def _clean_arrays(values):
2380+
result = []
2381+
for arr in values:
2382+
if np.issubdtype(arr.dtype, np.datetime_):
2383+
result.append(lib.map_infer(arr, lib.Timestamp))
2384+
else:
2385+
result.append(arr)
2386+
return result
2387+
2388+

pandas/core/nanops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ def unique1d(values):
405405
uniques = np.array(table.unique(com._ensure_int64(values)),
406406
dtype=np.int64)
407407

408-
if values.dtype == np.datetime64:
408+
if issubclass(values.dtype.type, np.datetime_):
409409
uniques = uniques.view('M8[ns]')
410410
else:
411411
table = lib.PyObjectHashTable(len(values))

pandas/io/pytables.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -839,8 +839,7 @@ def _read_panel_table(self, group, where=None):
839839

840840
columns = _maybe_convert(sel.values['column'],
841841
table._v_attrs.columns_kind)
842-
index = _maybe_convert(sel.values['index'],
843-
table._v_attrs.index_kind)
842+
index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind)
844843
values = sel.values['values']
845844

846845
major = Factor(index)
@@ -995,7 +994,7 @@ def _maybe_convert(values, val_kind):
995994

996995
def _get_converter(kind):
997996
if kind == 'datetime64':
998-
return lambda x: np.datetime64(x)
997+
return lambda x: np.array(x, dtype='M8[ns]')
999998
if kind == 'datetime':
1000999
return lib.convert_timestamps
10011000
else: # pragma: no cover
@@ -1069,7 +1068,7 @@ def generate(self, where):
10691068
field = c['field']
10701069

10711070
if field == 'index' and self.index_kind == 'datetime64':
1072-
val = np.datetime64(value).view('i8')
1071+
val = lib.Timestamp(value).value
10731072
self.conditions.append('(%s %s %s)' % (field,op,val))
10741073
elif field == 'index' and isinstance(value, datetime):
10751074
value = time.mktime(value.timetuple())

pandas/io/tests/test_parsers.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,8 @@ def test_parse_dates_column_list(self):
376376
lev = expected.index.levels[0]
377377
expected.index.levels[0] = lev.to_datetime(dayfirst=True)
378378
expected['aux_date'] = to_datetime(expected['aux_date'],
379-
dayfirst=True).astype('O')
379+
dayfirst=True)
380+
expected['aux_date'] = map(Timestamp, expected['aux_date'])
380381
self.assert_(isinstance(expected['aux_date'][0], datetime))
381382

382383
df = read_csv(StringIO(data), sep=";", index_col = range(4),

pandas/sparse/frame.py

+17
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,23 @@ def apply(self, func, axis=0, broadcast=False):
741741
else:
742742
return self._apply_broadcast(func, axis)
743743

744+
def applymap(self, func):
745+
"""
746+
Apply a function to a DataFrame that is intended to operate
747+
elementwise, i.e. like doing map(func, series) for each series in the
748+
DataFrame
749+
750+
Parameters
751+
----------
752+
func : function
753+
Python function, returns a single value from a single value
754+
755+
Returns
756+
-------
757+
applied : DataFrame
758+
"""
759+
return self.apply(lambda x: map(func, x))
760+
744761
@Appender(DataFrame.fillna.__doc__)
745762
def fillna(self, value=None, method='pad', inplace=False, limit=None):
746763
new_series = {}

pandas/src/datetime.pyx

+41-8
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,11 @@ class Timestamp(_Timestamp):
136136
conv = tz.normalize(self)
137137
return Timestamp(conv)
138138

139+
def replace(self, **kwds):
140+
return Timestamp(datetime.replace(self, **kwds),
141+
offset=self.offset)
142+
143+
139144
cdef inline bint is_timestamp(object o):
140145
return isinstance(o, Timestamp)
141146

@@ -194,10 +199,38 @@ def apply_offset(ndarray[object] values, object offset):
194199
# (see Timestamp class above). This will serve as a C extension type that
195200
# shadows the python class, where we do any heavy lifting.
196201
cdef class _Timestamp(datetime):
197-
cdef public:
202+
cdef readonly:
198203
int64_t value, nanosecond
199204
object offset # frequency reference
200205

206+
def __richcmp__(_Timestamp self, object other, int op):
207+
cdef _Timestamp ots
208+
209+
if isinstance(other, _Timestamp):
210+
ots = other
211+
elif isinstance(other, datetime):
212+
ots = Timestamp(other)
213+
else:
214+
if op == 2:
215+
return False
216+
elif op == 3:
217+
return True
218+
else:
219+
raise TypeError('Cannot compare Timestamp with %s' % str(other))
220+
221+
if op == 2: # ==
222+
return self.value == ots.value
223+
elif op == 3: # !=
224+
return self.value != ots.value
225+
elif op == 0: # <
226+
return self.value < ots.value
227+
elif op == 1: # <=
228+
return self.value <= ots.value
229+
elif op == 4: # >
230+
return self.value > ots.value
231+
elif op == 5: # >=
232+
return self.value >= ots.value
233+
201234
def __add__(self, other):
202235
if is_integer_object(other):
203236
if self.offset is None:
@@ -313,6 +346,7 @@ cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts):
313346
dts.min = PyDateTime_DATE_GET_MINUTE(val)
314347
dts.sec = PyDateTime_DATE_GET_SECOND(val)
315348
dts.us = PyDateTime_DATE_GET_MICROSECOND(val)
349+
dts.ps = dts.as = 0
316350
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
317351

318352
cdef inline int64_t _dtlike_to_datetime64(object val,
@@ -324,17 +358,16 @@ cdef inline int64_t _dtlike_to_datetime64(object val,
324358
dts.min = val.minute
325359
dts.sec = val.second
326360
dts.us = val.microsecond
361+
dts.ps = dts.as = 0
327362
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
328363

329364
cdef inline int64_t _date_to_datetime64(object val,
330365
pandas_datetimestruct *dts):
331366
dts.year = PyDateTime_GET_YEAR(val)
332367
dts.month = PyDateTime_GET_MONTH(val)
333368
dts.day = PyDateTime_GET_DAY(val)
334-
dts.hour = 0
335-
dts.min = 0
336-
dts.sec = 0
337-
dts.us = 0
369+
dts.hour = dts.min = dts.sec = dts.us = 0
370+
dts.ps = dts.as = 0
338371
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
339372

340373

@@ -928,7 +961,7 @@ cpdef ndarray _unbox_utcoffsets(object transinfo):
928961
arr = np.empty(sz, dtype='i8')
929962

930963
for i in range(sz):
931-
arr[i] = int(total_seconds(transinfo[i][0])) * 1000000
964+
arr[i] = int(total_seconds(transinfo[i][0])) * 1000000000
932965

933966
return arr
934967

@@ -1243,7 +1276,7 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq):
12431276
for i in range(l):
12441277
pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts)
12451278
out[i] = get_period_ordinal(dts.year, dts.month, dts.day,
1246-
dts.hour, dts.min, dts.sec, freq)
1279+
dts.hour, dts.min, dts.sec, freq)
12471280
return out
12481281

12491282
def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq):
@@ -1338,7 +1371,7 @@ cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq):
13381371
dts.hour = dinfo.hour
13391372
dts.min = dinfo.minute
13401373
dts.sec = int(dinfo.second)
1341-
dts.us = 0
1374+
dts.us = dts.ps = 0
13421375

13431376
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
13441377

0 commit comments

Comments
 (0)