Skip to content

Commit c1b7ac0

Browse files
committed
Merge pull request #6083 from jreback/sparc_fix3
BUG: not converting scalars properly to M8/m8 on assignment (GH6079)
2 parents 70b1ad4 + 4392ae3 commit c1b7ac0

File tree

9 files changed

+104
-98
lines changed

9 files changed

+104
-98
lines changed

pandas/core/algorithms.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
154154
uniques = uniques.take(sorter)
155155

156156
if is_datetime:
157-
uniques = uniques.view('M8[ns]')
157+
uniques = uniques.astype('M8[ns]')
158158
if isinstance(values, PeriodIndex):
159159
uniques = PeriodIndex(ordinal=uniques, freq=values.freq)
160160

@@ -279,6 +279,7 @@ def rank(values, axis=0, method='average', na_option='keep',
279279
f, values = _get_data_algo(values, _rank2d_functions)
280280
ranks = f(values, axis=axis, ties_method=method,
281281
ascending=ascending, na_option=na_option)
282+
282283
return ranks
283284

284285

@@ -364,12 +365,22 @@ def _interpolate(a, b, fraction):
364365

365366

366367
def _get_data_algo(values, func_map):
368+
mask = None
367369
if com.is_float_dtype(values):
368370
f = func_map['float64']
369371
values = com._ensure_float64(values)
370372
elif com.is_datetime64_dtype(values):
371-
f = func_map['int64']
372-
values = values.view('i8')
373+
374+
# if we have NaT, punt to object dtype
375+
mask = com.isnull(values)
376+
if mask.ravel().any():
377+
f = func_map['generic']
378+
values = com._ensure_object(values)
379+
values[mask] = np.nan
380+
else:
381+
f = func_map['int64']
382+
values = values.view('i8')
383+
373384
elif com.is_integer_dtype(values):
374385
f = func_map['int64']
375386
values = com._ensure_int64(values)

pandas/core/array.py

-15
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,3 @@
3535

3636
NA = np.nan
3737

38-
# a series-like ndarray ####
39-
40-
41-
class SNDArray(Array):
42-
43-
def __new__(cls, data, index=None, name=None):
44-
data = data.view(SNDArray)
45-
data.index = index
46-
data.name = name
47-
48-
return data
49-
50-
@property
51-
def values(self):
52-
return self.view(Array)

pandas/core/common.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import codecs
99
import csv
1010
import types
11+
from datetime import datetime, timedelta
1112

1213
from numpy.lib.format import read_array, write_array
1314
import numpy as np
@@ -39,7 +40,7 @@ class AmbiguousIndexError(PandasError, KeyError):
3940
pass
4041

4142

42-
_POSSIBLY_CAST_DTYPES = set([np.dtype(t)
43+
_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name
4344
for t in ['M8[ns]', '>M8[ns]', '<M8[ns]',
4445
'm8[ns]', '>m8[ns]', '<m8[ns]',
4546
'O', 'int8',
@@ -867,11 +868,14 @@ def _infer_dtype_from_scalar(val):
867868

868869
dtype = np.object_
869870

870-
elif isinstance(val, np.datetime64):
871-
# ugly hacklet
871+
elif isinstance(val, (np.datetime64, datetime)) and getattr(val,'tz',None) is None:
872872
val = lib.Timestamp(val).value
873873
dtype = np.dtype('M8[ns]')
874874

875+
elif isinstance(val, (np.timedelta64, timedelta)):
876+
val = tslib.convert_to_timedelta(val,'ns')
877+
dtype = np.dtype('m8[ns]')
878+
875879
elif is_bool(val):
876880
dtype = np.bool_
877881

@@ -1608,7 +1612,7 @@ def _possibly_convert_objects(values, convert_dates=True,
16081612

16091613

16101614
def _possibly_castable(arr):
1611-
return arr.dtype not in _POSSIBLY_CAST_DTYPES
1615+
return arr.dtype.name not in _POSSIBLY_CAST_DTYPES
16121616

16131617

16141618
def _possibly_convert_platform(values):

pandas/core/frame.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -4696,9 +4696,14 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None):
46964696
raise AssertionError('%d columns passed, passed data had %s '
46974697
'columns' % (len(columns), len(content)))
46984698

4699-
arrays = [lib.maybe_convert_objects(arr, try_float=coerce_float)
4700-
if dtype != object and dtype != np.object else arr
4701-
for arr in content]
4699+
# provide soft conversion of object dtypes
4700+
def convert(arr):
4701+
if dtype != object and dtype != np.object:
4702+
arr = lib.maybe_convert_objects(arr, try_float=coerce_float)
4703+
arr = com._possibly_cast_to_datetime(arr, dtype)
4704+
return arr
4705+
4706+
arrays = [ convert(arr) for arr in content ]
47024707

47034708
return arrays, columns
47044709

pandas/io/tests/test_stata.py

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def test_read_dta1(self):
7171
def test_read_dta2(self):
7272
if LooseVersion(sys.version) < '2.7':
7373
raise nose.SkipTest('datetime interp under 2.6 is faulty')
74+
skip_if_not_little_endian()
7475

7576
expected = DataFrame.from_records(
7677
[

pandas/src/inference.pyx

+8-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,14 @@ def infer_dtype(object _values):
5656
if n == 0:
5757
return 'empty'
5858

59-
val = util.get_value_1d(values, 0)
59+
# make contiguous
60+
values = values.ravel()
61+
62+
# try to use a valid value
63+
for i in range(n):
64+
val = util.get_value_1d(values, i)
65+
if not is_null_datetimelike(val):
66+
break
6067

6168
if util.is_datetime64_object(val) or val is NaT:
6269
if is_datetime64_array(values):

pandas/src/reduce.pyx

+24-60
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from numpy cimport *
33
import numpy as np
44

5-
from pandas.core.array import SNDArray
65
from distutils.version import LooseVersion
76

87
is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2'
@@ -114,8 +113,8 @@ cdef class Reducer:
114113

115114
# use the cached_typ if possible
116115
if cached_typ is not None:
117-
cached_typ._data._block.values = chunk
118-
cached_typ.name = name
116+
object.__setattr__(cached_typ._data._block, 'values', chunk)
117+
object.__setattr__(cached_typ, 'name', name)
119118
res = self.f(cached_typ)
120119
else:
121120
res = self.f(chunk)
@@ -164,7 +163,7 @@ cdef class SeriesBinGrouper:
164163
bint passed_dummy
165164

166165
cdef public:
167-
object arr, index, dummy_arr, dummy_index, values, f, bins, typ, ityp, name
166+
object arr, index, dummy_arr, dummy_index, values, f, bins, typ, name
168167

169168
def __init__(self, object series, object f, object bins, object dummy):
170169
n = len(series)
@@ -178,7 +177,6 @@ cdef class SeriesBinGrouper:
178177
self.arr = values
179178
self.index = series.index
180179
self.typ = type(series)
181-
self.ityp = type(series.index)
182180
self.name = getattr(series,'name',None)
183181

184182
self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
@@ -210,9 +208,10 @@ cdef class SeriesBinGrouper:
210208
ndarray[int64_t] counts
211209
Py_ssize_t i, n, group_size
212210
object res
213-
bint initialized = 0, needs_typ = 1, try_typ = 0
211+
bint initialized = 0
214212
Slider vslider, islider
215-
object gin, typ, ityp, name
213+
object gin, typ, name
214+
object cached_typ = None
216215

217216
counts = np.zeros(self.ngroups, dtype=np.int64)
218217

@@ -226,45 +225,29 @@ cdef class SeriesBinGrouper:
226225

227226
group_size = 0
228227
n = len(self.arr)
229-
typ = self.typ
230-
ityp = self.ityp
231228
name = self.name
232229

233230
vslider = Slider(self.arr, self.dummy_arr)
234231
islider = Slider(self.index, self.dummy_index)
235232

236233
gin = self.dummy_index._engine
237234

238-
# old numpy issue, need to always create and pass the Series
239-
if is_numpy_prior_1_6_2:
240-
try_typ = 1
241-
needs_typ = 1
242-
243235
try:
244236
for i in range(self.ngroups):
245237
group_size = counts[i]
246238

247239
islider.set_length(group_size)
248240
vslider.set_length(group_size)
249241

250-
# see if we need to create the object proper
251-
if try_typ:
252-
if needs_typ:
253-
res = self.f(typ(vslider.buf, index=islider.buf,
254-
name=name, fastpath=True))
255-
else:
256-
res = self.f(SNDArray(vslider.buf,islider.buf,name=name))
242+
if cached_typ is None:
243+
cached_typ = self.typ(vslider.buf, index=islider.buf,
244+
name=name)
257245
else:
258-
try:
259-
res = self.f(SNDArray(vslider.buf,islider.buf,name=name))
260-
needs_typ = 0
261-
except:
262-
res = self.f(typ(vslider.buf, index=islider.buf,
263-
name=name, fastpath=True))
264-
needs_typ = 1
265-
266-
try_typ = 1
246+
object.__setattr__(cached_typ._data._block, 'values', vslider.buf)
247+
object.__setattr__(cached_typ, '_index', islider.buf)
248+
object.__setattr__(cached_typ, 'name', name)
267249

250+
res = self.f(cached_typ)
268251
res = _extract_result(res)
269252
if not initialized:
270253
result = self._get_result_array(res)
@@ -309,7 +292,7 @@ cdef class SeriesGrouper:
309292
bint passed_dummy
310293

311294
cdef public:
312-
object arr, index, dummy_arr, dummy_index, f, labels, values, typ, ityp, name
295+
object arr, index, dummy_arr, dummy_index, f, labels, values, typ, name
313296

314297
def __init__(self, object series, object f, object labels,
315298
Py_ssize_t ngroups, object dummy):
@@ -324,7 +307,6 @@ cdef class SeriesGrouper:
324307
self.arr = values
325308
self.index = series.index
326309
self.typ = type(series)
327-
self.ityp = type(series.index)
328310
self.name = getattr(series,'name',None)
329311

330312
self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
@@ -351,28 +333,22 @@ cdef class SeriesGrouper:
351333
ndarray[int64_t] labels, counts
352334
Py_ssize_t i, n, group_size, lab
353335
object res
354-
bint initialized = 0, needs_typ = 1, try_typ = 0
336+
bint initialized = 0
355337
Slider vslider, islider
356-
object gin, typ, ityp, name
338+
object gin, typ, name
339+
object cached_typ = None
357340

358341
labels = self.labels
359342
counts = np.zeros(self.ngroups, dtype=np.int64)
360343
group_size = 0
361344
n = len(self.arr)
362-
typ = self.typ
363-
ityp = self.ityp
364345
name = self.name
365346

366347
vslider = Slider(self.arr, self.dummy_arr)
367348
islider = Slider(self.index, self.dummy_index)
368349

369350
gin = self.dummy_index._engine
370351

371-
# old numpy issue, need to always create and pass the Series
372-
if is_numpy_prior_1_6_2:
373-
try_typ = 1
374-
needs_typ = 1
375-
376352
try:
377353
for i in range(n):
378354
group_size += 1
@@ -389,27 +365,15 @@ cdef class SeriesGrouper:
389365
islider.set_length(group_size)
390366
vslider.set_length(group_size)
391367

392-
# see if we need to create the object proper
393-
# try on the first go around
394-
if try_typ:
395-
if needs_typ:
396-
res = self.f(typ(vslider.buf, index=islider.buf,
397-
name=name, fastpath=True))
398-
else:
399-
res = self.f(SNDArray(vslider.buf,islider.buf,name=name))
368+
if cached_typ is None:
369+
cached_typ = self.typ(vslider.buf, index=islider.buf,
370+
name=name)
400371
else:
372+
object.__setattr__(cached_typ._data._block, 'values', vslider.buf)
373+
object.__setattr__(cached_typ, '_index', islider.buf)
374+
object.__setattr__(cached_typ, 'name', name)
401375

402-
# try with a numpy array directly
403-
try:
404-
res = self.f(SNDArray(vslider.buf,islider.buf,name=name))
405-
needs_typ = 0
406-
except (Exception), detail:
407-
res = self.f(typ(vslider.buf, index=islider.buf,
408-
name=name, fastpath=True))
409-
needs_typ = 1
410-
411-
try_typ = 1
412-
376+
res = self.f(cached_typ)
413377
res = _extract_result(res)
414378
if not initialized:
415379
result = self._get_result_array(res)

pandas/tests/test_frame.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -10489,13 +10489,17 @@ def test_rank2(self):
1048910489
[datetime(2000, 1, 2), datetime(2000, 1, 3),
1049010490
datetime(2000, 1, 1)]]
1049110491
df = DataFrame(data)
10492+
10493+
# check the rank
1049210494
expected = DataFrame([[2., nan, 1.],
1049310495
[2., 3., 1.]])
1049410496
result = df.rank(1, numeric_only=False)
1049510497
assert_frame_equal(result, expected)
1049610498

1049710499
# mixed-type frames
10498-
self.mixed_frame['foo'] = datetime.now()
10500+
self.mixed_frame['datetime'] = datetime.now()
10501+
self.mixed_frame['timedelta'] = timedelta(days=1,seconds=1)
10502+
1049910503
result = self.mixed_frame.rank(1)
1050010504
expected = self.mixed_frame.rank(1, numeric_only=True)
1050110505
assert_frame_equal(result, expected)
@@ -11087,6 +11091,31 @@ def test_constructor_with_convert(self):
1108711091
None], np.object_))
1108811092
assert_series_equal(result, expected)
1108911093

11094+
def test_construction_with_mixed(self):
11095+
# test construction edge cases with mixed types
11096+
11097+
# f7u12, this does not work without extensive workaround
11098+
data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)],
11099+
[datetime(2000, 1, 2), datetime(2000, 1, 3),
11100+
datetime(2000, 1, 1)]]
11101+
df = DataFrame(data)
11102+
11103+
# check dtypes
11104+
result = df.get_dtype_counts().order()
11105+
expected = Series({ 'datetime64[ns]' : 3 })
11106+
11107+
# mixed-type frames
11108+
self.mixed_frame['datetime'] = datetime.now()
11109+
self.mixed_frame['timedelta'] = timedelta(days=1,seconds=1)
11110+
self.assert_(self.mixed_frame['datetime'].dtype == 'M8[ns]')
11111+
self.assert_(self.mixed_frame['timedelta'].dtype == 'm8[ns]')
11112+
result = self.mixed_frame.get_dtype_counts().order()
11113+
expected = Series({ 'float64' : 4,
11114+
'object' : 1,
11115+
'datetime64[ns]' : 1,
11116+
'timedelta64[ns]' : 1}).order()
11117+
assert_series_equal(result,expected)
11118+
1109011119
def test_constructor_frame_copy(self):
1109111120
cop = DataFrame(self.frame, copy=True)
1109211121
cop['A'] = 5

0 commit comments

Comments
 (0)