Skip to content

Commit 132d90d

Browse files
committed
BUG: various bug fixes for DataFrame/Series construction related to:
0 and 1 len ndarrays datetimes that are single objects mixed datetimes and objects (GH pandas-dev#2751) astype now converts correctly with a datetime64 type to object, NaT are converted to np.nan _get_numeric_data with empty mixed-type returning empty, but index was missing DOC: release notes updated, added missing_data section to docs, whatsnew 0.10.2
1 parent 3ba3119 commit 132d90d

13 files changed

+260
-48
lines changed

RELEASE.rst

+14
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,20 @@ Where to get it
2222
* Binary installers on PyPI: http://pypi.python.org/pypi/pandas
2323
* Documentation: http://pandas.pydata.org
2424

25+
**API Changes**
26+
27+
- Series now automatically will try to set the correct dtype based on passed datetimelike objects (datetime/Timestamp)
28+
- timedelta64 are returned in appropriate cases (e.g. Series - Series, when both are datetime64)
29+
- mixed datetimes and objects (GH2751_) in a constructor witll be casted correctly
30+
- astype on datetimes to object are now handled (as well as NaT conversions to np.nan)
31+
32+
**Bug fixes**
33+
34+
- Single element ndarrays of datetimelike objects are handled (e.g. np.array(datetime(2001,1,1,0,0))), w/o dtype being passed
35+
- 0-dim ndarrays with a passed dtype are handled correctly (e.g. np.array(0.,dtype='float32'))
36+
37+
.. _GH2751: https://github.com/pydata/pandas/issues/2751
38+
2539
pandas 0.10.1
2640
=============
2741

doc/source/missing_data.rst

+17
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,23 @@ pandas provides the :func:`~pandas.core.common.isnull` and
8080
missing by the ``isnull`` and ``notnull`` functions. ``inf`` and
8181
``-inf`` are no longer considered missing by default.
8282

83+
Datetimes
84+
---------
85+
86+
For datetime64[ns] types, ``NaT`` represents missing values. This is a pseudo-native
87+
sentinal value that can be represented by numpy in a singular dtype (datetime64[ns]).
88+
Pandas objects provide intercompatibility between ``NaT`` and ``NaN``.
89+
90+
.. ipython:: python
91+
92+
df2 = df.copy()
93+
df2['timestamp'] = Timestamp('20120101')
94+
df2
95+
df2.ix[['a','c','h'],['one','timestamp']] = np.nan
96+
df2
97+
df2.get_dtype_counts()
98+
99+
83100
Calculations with missing data
84101
------------------------------
85102

doc/source/v0.10.2.txt

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
.. _whatsnew_0102:
2+
3+
v0.10.2 (February ??, 2013)
4+
---------------------------
5+
6+
This is a minor release from 0.10.1 and includes many new features and
7+
enhancements along with a large number of bug fixes. There are also a number of
8+
important API changes that long-time pandas users should pay close attention
9+
to.
10+
11+
API changes
12+
~~~~~~~~~~~
13+
14+
Datetime64[ns] columns in a DataFrame (or a Series) allow the use of ``np.nan`` to indicate a nan value, in addition to the traditional ``NaT``, or not-a-time. This allows convenient nan setting in a generic way. Furthermore datetime64 columns are created by default, when passed datetimelike objects (*this change was introduced in 0.10.1*)
15+
16+
.. ipython:: python
17+
18+
df = DataFrame(randn(6,2),date_range('20010102',periods=6),columns=['A','B'])
19+
df['timestamp'] = Timestamp('20010103')
20+
df
21+
22+
# datetime64[ns] out of the box
23+
df.get_dtype_counts()
24+
25+
# use the traditional nan, which is mapped to NaT internally
26+
df.ix[2:4,['A','timestamp']] = np.nan
27+
df
28+
29+
Astype conversion on datetime64[ns] to object, implicity converts ``NaT`` to ``np.nan``
30+
31+
32+
.. ipython:: python
33+
34+
import datetime
35+
s = Series([datetime.datetime(2001, 1, 2, 0, 0) for i in range(3)])
36+
s.dtype
37+
s[1] = np.nan
38+
s
39+
s.dtype
40+
s = s.astype('O')
41+
s
42+
s.dtype
43+
44+
New features
45+
~~~~~~~~~~~~
46+
47+
**Enhancements**
48+
49+
**Bug Fixes**
50+
51+
See the `full release notes
52+
<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
53+
on GitHub for a complete list.
54+

doc/source/whatsnew.rst

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ What's New
1616

1717
These are new features and improvements of note in each release.
1818

19+
.. include:: v0.10.2.txt
20+
1921
.. include:: v0.10.1.txt
2022

2123
.. include:: v0.10.0.txt

pandas/core/common.py

+14
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,20 @@ def _possibly_cast_to_datetime(value, dtype):
654654
except:
655655
pass
656656

657+
elif dtype is None:
658+
# we might have a array (or single object) that is datetime like, and no dtype is passed
659+
# don't change the value unless we find a datetime set
660+
v = value
661+
if not (is_list_like(v) or hasattr(v,'len')):
662+
v = [ v ]
663+
if len(v):
664+
inferred_type = lib.infer_dtype(v)
665+
if inferred_type == 'datetime':
666+
try:
667+
value = tslib.array_to_datetime(np.array(v))
668+
except:
669+
pass
670+
657671
return value
658672

659673

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -4289,7 +4289,7 @@ def applymap(self, func):
42894289

42904290
# if we have a dtype == 'M8[ns]', provide boxed values
42914291
def infer(x):
4292-
if x.dtype == 'M8[ns]':
4292+
if com.is_datetime64_dtype(x):
42934293
x = lib.map_infer(x, lib.Timestamp)
42944294
return lib.map_infer(x, func)
42954295
return self.apply(infer)
@@ -4980,7 +4980,7 @@ def _get_agg_axis(self, axis_num):
49804980
def _get_numeric_data(self):
49814981
if self._is_mixed_type:
49824982
num_data = self._data.get_numeric_data()
4983-
return DataFrame(num_data, copy=False)
4983+
return DataFrame(num_data, index=self.index, copy=False)
49844984
else:
49854985
if (self.values.dtype != np.object_ and
49864986
not issubclass(self.values.dtype.type, np.datetime64)):
@@ -4991,7 +4991,7 @@ def _get_numeric_data(self):
49914991
def _get_bool_data(self):
49924992
if self._is_mixed_type:
49934993
bool_data = self._data.get_bool_data()
4994-
return DataFrame(bool_data, copy=False)
4994+
return DataFrame(bool_data, index=self.index, copy=False)
49954995
else: # pragma: no cover
49964996
if self.values.dtype == np.bool_:
49974997
return self

pandas/core/series.py

+47-11
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,28 @@ def na_op(x, y):
7272

7373
def wrapper(self, other):
7474
from pandas.core.frame import DataFrame
75+
dtype = None
7576
wrap_results = lambda x: x
7677

7778
lvalues, rvalues = self, other
7879

79-
if (com.is_datetime64_dtype(self) and
80-
com.is_datetime64_dtype(other)):
80+
if com.is_datetime64_dtype(self):
81+
82+
if not isinstance(rvalues, np.ndarray):
83+
rvalues = np.array([rvalues])
84+
85+
# rhs is either a timedelta or a series/ndarray
86+
if lib.is_timedelta_array(rvalues):
87+
rvalues = np.array([ np.timedelta64(v) for v in rvalues ],dtype='timedelta64[ns]')
88+
dtype = 'M8[ns]'
89+
elif com.is_datetime64_dtype(rvalues):
90+
dtype = 'timedelta64[ns]'
91+
else:
92+
raise ValueError("cannot operate on a series with out a rhs of a series/ndarray of type datetime64[ns] or a timedelta")
93+
8194
lvalues = lvalues.view('i8')
8295
rvalues = rvalues.view('i8')
8396

84-
wrap_results = lambda rs: rs.astype('timedelta64[ns]')
85-
8697
if isinstance(rvalues, Series):
8798
lvalues = lvalues.values
8899
rvalues = rvalues.values
@@ -91,7 +102,7 @@ def wrapper(self, other):
91102
if self.index.equals(other.index):
92103
name = _maybe_match_name(self, other)
93104
return Series(wrap_results(na_op(lvalues, rvalues)),
94-
index=self.index, name=name)
105+
index=self.index, name=name, dtype=dtype)
95106

96107
join_idx, lidx, ridx = self.index.join(other.index, how='outer',
97108
return_indexers=True)
@@ -105,13 +116,13 @@ def wrapper(self, other):
105116
arr = na_op(lvalues, rvalues)
106117

107118
name = _maybe_match_name(self, other)
108-
return Series(arr, index=join_idx, name=name)
119+
return Series(arr, index=join_idx, name=name,dtype=dtype)
109120
elif isinstance(other, DataFrame):
110121
return NotImplemented
111122
else:
112123
# scalars
113124
return Series(na_op(lvalues.values, rvalues),
114-
index=self.index, name=self.name)
125+
index=self.index, name=self.name, dtype=dtype)
115126
return wrapper
116127

117128

@@ -777,7 +788,7 @@ def astype(self, dtype):
777788
See numpy.ndarray.astype
778789
"""
779790
casted = com._astype_nansafe(self.values, dtype)
780-
return self._constructor(casted, index=self.index, name=self.name)
791+
return self._constructor(casted, index=self.index, name=self.name, dtype=casted.dtype)
781792

782793
def convert_objects(self, convert_dates=True):
783794
"""
@@ -1195,7 +1206,7 @@ def tolist(self):
11951206
Overrides numpy.ndarray.tolist
11961207
"""
11971208
if com.is_datetime64_dtype(self):
1198-
return self.astype(object).values.tolist()
1209+
return list(self)
11991210
return self.values.tolist()
12001211

12011212
def to_dict(self):
@@ -3083,8 +3094,12 @@ def _try_cast(arr):
30833094
raise TypeError('Cannot cast datetime64 to %s' % dtype)
30843095
else:
30853096
subarr = _try_cast(data)
3086-
elif copy:
3097+
else:
3098+
subarr = _try_cast(data)
3099+
3100+
if copy:
30873101
subarr = data.copy()
3102+
30883103
elif isinstance(data, list) and len(data) > 0:
30893104
if dtype is not None:
30903105
try:
@@ -3094,12 +3109,15 @@ def _try_cast(arr):
30943109
raise
30953110
subarr = np.array(data, dtype=object, copy=copy)
30963111
subarr = lib.maybe_convert_objects(subarr)
3112+
subarr = com._possibly_cast_to_datetime(subarr, dtype)
30973113
else:
30983114
subarr = lib.list_to_object_array(data)
30993115
subarr = lib.maybe_convert_objects(subarr)
3116+
subarr = com._possibly_cast_to_datetime(subarr, dtype)
31003117
else:
31013118
subarr = _try_cast(data)
31023119

3120+
# scalar like
31033121
if subarr.ndim == 0:
31043122
if isinstance(data, list): # pragma: no cover
31053123
subarr = np.array(data, dtype=object)
@@ -3115,7 +3133,14 @@ def _try_cast(arr):
31153133
dtype = np.object_
31163134

31173135
if dtype is None:
3118-
value, dtype = _dtype_from_scalar(value)
3136+
3137+
# a 1-element ndarray
3138+
if isinstance(value, np.ndarray):
3139+
dtype = value.dtype
3140+
value = value.item()
3141+
else:
3142+
value, dtype = _dtype_from_scalar(value)
3143+
31193144
subarr = np.empty(len(index), dtype=dtype)
31203145
else:
31213146
# need to possibly convert the value here
@@ -3124,6 +3149,17 @@ def _try_cast(arr):
31243149
subarr.fill(value)
31253150
else:
31263151
return subarr.item()
3152+
3153+
# the result that we want
3154+
elif subarr.ndim == 1:
3155+
if index is not None:
3156+
3157+
# a 1-element ndarray
3158+
if len(subarr) != len(index) and len(subarr) == 1:
3159+
value = subarr[0]
3160+
subarr = np.empty(len(index), dtype=subarr.dtype)
3161+
subarr.fill(value)
3162+
31273163
elif subarr.ndim > 1:
31283164
if isinstance(data, np.ndarray):
31293165
raise Exception('Data must be 1-dimensional')

pandas/src/inference.pyx

+11
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,17 @@ def is_datetime64_array(ndarray values):
265265
return False
266266
return True
267267

268+
def is_timedelta_array(ndarray values):
269+
import datetime
270+
cdef int i, n = len(values)
271+
if n == 0:
272+
return False
273+
for i in range(n):
274+
if not isinstance(values[i],datetime.timedelta):
275+
return False
276+
return True
277+
278+
268279
def is_date_array(ndarray[object] values):
269280
cdef int i, n = len(values)
270281
if n == 0:

pandas/tests/test_frame.py

+36-3
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ def _skip_if_no_scipy():
4747

4848
JOIN_TYPES = ['inner', 'outer', 'left', 'right']
4949

50-
5150
class CheckIndexing(object):
5251

5352
_multiprocess_can_split_ = True
@@ -6484,14 +6483,18 @@ def test_get_X_columns(self):
64846483
['a', 'e']))
64856484

64866485
def test_get_numeric_data(self):
6487-
df = DataFrame({'a': 1., 'b': 2, 'c': 'foo'},
6486+
6487+
df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', 'f' : Timestamp('20010102')},
64886488
index=np.arange(10))
6489+
result = df.get_dtype_counts()
6490+
expected = Series({'int64': 1, 'float64' : 1, 'datetime64[ns]': 1, 'object' : 1})
6491+
assert_series_equal(result, expected)
64896492

64906493
result = df._get_numeric_data()
64916494
expected = df.ix[:, ['a', 'b']]
64926495
assert_frame_equal(result, expected)
64936496

6494-
only_obj = df.ix[:, ['c']]
6497+
only_obj = df.ix[:, ['c','f']]
64956498
result = only_obj._get_numeric_data()
64966499
expected = df.ix[:, []]
64976500
assert_frame_equal(result, expected)
@@ -7367,6 +7370,36 @@ def test_as_matrix_numeric_cols(self):
73677370
values = self.frame.as_matrix(['A', 'B', 'C', 'D'])
73687371
self.assert_(values.dtype == np.float64)
73697372

7373+
7374+
def test_constructor_with_datetimes(self):
7375+
7376+
# single item
7377+
df = DataFrame({'A' : 1, 'B' : 'foo', 'C' : 'bar', 'D' : Timestamp("20010101"), 'E' : datetime(2001,1,2,0,0) },
7378+
index=np.arange(10))
7379+
result = df.get_dtype_counts()
7380+
expected = Series({'int64': 1, 'datetime64[ns]': 2, 'object' : 2})
7381+
assert_series_equal(result, expected)
7382+
7383+
# check with ndarray construction ndim==0 (e.g. we are passing a ndim 0 ndarray with a dtype specified)
7384+
df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', 'float64' : np.array(1.,dtype='float64'),
7385+
'int64' : np.array(1,dtype='int64')}, index=np.arange(10))
7386+
result = df.get_dtype_counts()
7387+
expected = Series({'int64': 2, 'float64' : 2, 'object' : 1})
7388+
assert_series_equal(result, expected)
7389+
7390+
# check with ndarray construction ndim>0
7391+
df = DataFrame({'a': 1., 'b': 2, 'c': 'foo', 'float64' : np.array([1.]*10,dtype='float64'),
7392+
'int64' : np.array([1]*10,dtype='int64')}, index=np.arange(10))
7393+
result = df.get_dtype_counts()
7394+
expected = Series({'int64': 2, 'float64' : 2, 'object' : 1})
7395+
assert_series_equal(result, expected)
7396+
7397+
# GH #2751 (construction with no index specified)
7398+
df = DataFrame({'a':[1,2,4,7], 'b':[1.2, 2.3, 5.1, 6.3], 'c':list('abcd'), 'd':[datetime(2000,1,1) for i in range(4)] })
7399+
result = df.get_dtype_counts()
7400+
expected = Series({'int64': 1, 'float64' : 1, 'datetime64[ns]': 1, 'object' : 1})
7401+
assert_series_equal(result, expected)
7402+
73707403
def test_constructor_frame_copy(self):
73717404
cop = DataFrame(self.frame, copy=True)
73727405
cop['A'] = 5

0 commit comments

Comments
 (0)