Skip to content

Commit fc53067

Browse files
committed
Merge PR #2752
2 parents 8ad9598 + 132d90d commit fc53067

File tree

12 files changed

+255
-67
lines changed

12 files changed

+255
-67
lines changed

RELEASE.rst

+14-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ pandas 0.10.2
4444
- Do not automatically upcast numeric specified dtypes to ``int64`` or ``float64`` (GH622_ and GH797_)
4545
- Guarantee that ``convert_objects()`` for Series/DataFrame always returns a copy
4646
- groupby operations will respect dtypes for numeric float operations (float32/float64); other types will be operated on,
47-
and will try to cast back to the input dtype (e.g. if an int is passed, as long as the output doesn't have nans,
47+
and will try to cast back to the input dtype (e.g. if an int is passed, as long as the output doesn't have nans,
4848
then an int will be returned)
4949
- backfill/pad/take/diff/ohlc will now support ``float32/int16/int8`` operations
5050
- Integer block types will upcast as needed in where operations (GH2793_)
@@ -53,10 +53,23 @@ pandas 0.10.2
5353

5454
- Fix seg fault on empty data frame when fillna with ``pad`` or ``backfill`` (GH2778_)
5555

56+
**API Changes**
57+
58+
- Series now automatically will try to set the correct dtype based on passed datetimelike objects (datetime/Timestamp)
59+
- timedelta64 are returned in appropriate cases (e.g. Series - Series, when both are datetime64)
60+
- mixed datetimes and objects (GH2751_) in a constructor witll be casted correctly
61+
- astype on datetimes to object are now handled (as well as NaT conversions to np.nan)
62+
63+
**Bug fixes**
64+
65+
- Single element ndarrays of datetimelike objects are handled (e.g. np.array(datetime(2001,1,1,0,0))), w/o dtype being passed
66+
- 0-dim ndarrays with a passed dtype are handled correctly (e.g. np.array(0.,dtype='float32'))
67+
5668
.. _GH622: https://github.com/pydata/pandas/issues/622
5769
.. _GH797: https://github.com/pydata/pandas/issues/797
5870
.. _GH2778: https://github.com/pydata/pandas/issues/2778
5971
.. _GH2793: https://github.com/pydata/pandas/issues/2793
72+
.. _GH2751: https://github.com/pydata/pandas/issues/2751
6073

6174
pandas 0.10.1
6275
=============

doc/source/missing_data.rst

+17
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,23 @@ pandas provides the :func:`~pandas.core.common.isnull` and
8080
missing by the ``isnull`` and ``notnull`` functions. ``inf`` and
8181
``-inf`` are no longer considered missing by default.
8282

83+
Datetimes
84+
---------
85+
86+
For datetime64[ns] types, ``NaT`` represents missing values. This is a pseudo-native
87+
sentinal value that can be represented by numpy in a singular dtype (datetime64[ns]).
88+
Pandas objects provide intercompatibility between ``NaT`` and ``NaN``.
89+
90+
.. ipython:: python
91+
92+
df2 = df.copy()
93+
df2['timestamp'] = Timestamp('20120101')
94+
df2
95+
df2.ix[['a','c','h'],['one','timestamp']] = np.nan
96+
df2
97+
df2.get_dtype_counts()
98+
99+
83100
Calculations with missing data
84101
------------------------------
85102

doc/source/v0.10.2.txt

+31-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ Numeric dtypes will propgate and can coexist in DataFrames. If a dtype is passed
5050
df3.dtypes
5151

5252
# forcing date coercion
53-
s = Series([datetime(2001,1,1,0,0), 'foo', 1.0, 1,
53+
s = Series([datetime(2001,1,1,0,0), 'foo', 1.0, 1,
5454
Timestamp('20010104'), '20010105'],dtype='O')
5555
s.convert_objects(convert_dates='coerce')
5656

@@ -93,3 +93,33 @@ See the `full release notes
9393
<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
9494
on GitHub for a complete list.
9595

96+
97+
Datetime64[ns] columns in a DataFrame (or a Series) allow the use of ``np.nan`` to indicate a nan value, in addition to the traditional ``NaT``, or not-a-time. This allows convenient nan setting in a generic way. Furthermore datetime64 columns are created by default, when passed datetimelike objects (*this change was introduced in 0.10.1*)
98+
99+
.. ipython:: python
100+
101+
df = DataFrame(randn(6,2),date_range('20010102',periods=6),columns=['A','B'])
102+
df['timestamp'] = Timestamp('20010103')
103+
df
104+
105+
# datetime64[ns] out of the box
106+
df.get_dtype_counts()
107+
108+
# use the traditional nan, which is mapped to NaT internally
109+
df.ix[2:4,['A','timestamp']] = np.nan
110+
df
111+
112+
Astype conversion on datetime64[ns] to object, implicity converts ``NaT`` to ``np.nan``
113+
114+
115+
.. ipython:: python
116+
117+
import datetime
118+
s = Series([datetime.datetime(2001, 1, 2, 0, 0) for i in range(3)])
119+
s.dtype
120+
s[1] = np.nan
121+
s
122+
s.dtype
123+
s = s.astype('O')
124+
s
125+
s.dtype

pandas/core/common.py

+14
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,20 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
720720
except:
721721
pass
722722

723+
elif dtype is None:
724+
# we might have a array (or single object) that is datetime like, and no dtype is passed
725+
# don't change the value unless we find a datetime set
726+
v = value
727+
if not (is_list_like(v) or hasattr(v,'len')):
728+
v = [ v ]
729+
if len(v):
730+
inferred_type = lib.infer_dtype(v)
731+
if inferred_type == 'datetime':
732+
try:
733+
value = tslib.array_to_datetime(np.array(v))
734+
except:
735+
pass
736+
723737
return value
724738

725739

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -4300,7 +4300,7 @@ def applymap(self, func):
43004300

43014301
# if we have a dtype == 'M8[ns]', provide boxed values
43024302
def infer(x):
4303-
if x.dtype == 'M8[ns]':
4303+
if com.is_datetime64_dtype(x):
43044304
x = lib.map_infer(x, lib.Timestamp)
43054305
return lib.map_infer(x, func)
43064306
return self.apply(infer)
@@ -4991,7 +4991,7 @@ def _get_agg_axis(self, axis_num):
49914991
def _get_numeric_data(self):
49924992
if self._is_mixed_type:
49934993
num_data = self._data.get_numeric_data()
4994-
return DataFrame(num_data, copy=False)
4994+
return DataFrame(num_data, index=self.index, copy=False)
49954995
else:
49964996
if (self.values.dtype != np.object_ and
49974997
not issubclass(self.values.dtype.type, np.datetime64)):
@@ -5002,7 +5002,7 @@ def _get_numeric_data(self):
50025002
def _get_bool_data(self):
50035003
if self._is_mixed_type:
50045004
bool_data = self._data.get_bool_data()
5005-
return DataFrame(bool_data, copy=False)
5005+
return DataFrame(bool_data, index=self.index, copy=False)
50065006
else: # pragma: no cover
50075007
if self.values.dtype == np.bool_:
50085008
return self

pandas/core/series.py

+47-11
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,28 @@ def na_op(x, y):
7272

7373
def wrapper(self, other):
7474
from pandas.core.frame import DataFrame
75+
dtype = None
7576
wrap_results = lambda x: x
7677

7778
lvalues, rvalues = self, other
7879

79-
if (com.is_datetime64_dtype(self) and
80-
com.is_datetime64_dtype(other)):
80+
if com.is_datetime64_dtype(self):
81+
82+
if not isinstance(rvalues, np.ndarray):
83+
rvalues = np.array([rvalues])
84+
85+
# rhs is either a timedelta or a series/ndarray
86+
if lib.is_timedelta_array(rvalues):
87+
rvalues = np.array([ np.timedelta64(v) for v in rvalues ],dtype='timedelta64[ns]')
88+
dtype = 'M8[ns]'
89+
elif com.is_datetime64_dtype(rvalues):
90+
dtype = 'timedelta64[ns]'
91+
else:
92+
raise ValueError("cannot operate on a series with out a rhs of a series/ndarray of type datetime64[ns] or a timedelta")
93+
8194
lvalues = lvalues.view('i8')
8295
rvalues = rvalues.view('i8')
8396

84-
wrap_results = lambda rs: rs.astype('timedelta64[ns]')
85-
8697
if isinstance(rvalues, Series):
8798
lvalues = lvalues.values
8899
rvalues = rvalues.values
@@ -91,7 +102,7 @@ def wrapper(self, other):
91102
if self.index.equals(other.index):
92103
name = _maybe_match_name(self, other)
93104
return Series(wrap_results(na_op(lvalues, rvalues)),
94-
index=self.index, name=name)
105+
index=self.index, name=name, dtype=dtype)
95106

96107
join_idx, lidx, ridx = self.index.join(other.index, how='outer',
97108
return_indexers=True)
@@ -105,13 +116,13 @@ def wrapper(self, other):
105116
arr = na_op(lvalues, rvalues)
106117

107118
name = _maybe_match_name(self, other)
108-
return Series(arr, index=join_idx, name=name)
119+
return Series(arr, index=join_idx, name=name,dtype=dtype)
109120
elif isinstance(other, DataFrame):
110121
return NotImplemented
111122
else:
112123
# scalars
113124
return Series(na_op(lvalues.values, rvalues),
114-
index=self.index, name=self.name)
125+
index=self.index, name=self.name, dtype=dtype)
115126
return wrapper
116127

117128

@@ -777,7 +788,7 @@ def astype(self, dtype):
777788
See numpy.ndarray.astype
778789
"""
779790
casted = com._astype_nansafe(self.values, dtype)
780-
return self._constructor(casted, index=self.index, name=self.name)
791+
return self._constructor(casted, index=self.index, name=self.name, dtype=casted.dtype)
781792

782793
def convert_objects(self, convert_dates=True, convert_numeric=True):
783794
"""
@@ -1201,7 +1212,7 @@ def tolist(self):
12011212
Overrides numpy.ndarray.tolist
12021213
"""
12031214
if com.is_datetime64_dtype(self):
1204-
return self.astype(object).values.tolist()
1215+
return list(self)
12051216
return self.values.tolist()
12061217

12071218
def to_dict(self):
@@ -3103,8 +3114,12 @@ def _try_cast(arr):
31033114
raise TypeError('Cannot cast datetime64 to %s' % dtype)
31043115
else:
31053116
subarr = _try_cast(data)
3106-
elif copy:
3117+
else:
3118+
subarr = _try_cast(data)
3119+
3120+
if copy:
31073121
subarr = data.copy()
3122+
31083123
elif isinstance(data, list) and len(data) > 0:
31093124
if dtype is not None:
31103125
try:
@@ -3114,12 +3129,15 @@ def _try_cast(arr):
31143129
raise
31153130
subarr = np.array(data, dtype=object, copy=copy)
31163131
subarr = lib.maybe_convert_objects(subarr)
3132+
subarr = com._possibly_cast_to_datetime(subarr, dtype)
31173133
else:
31183134
subarr = lib.list_to_object_array(data)
31193135
subarr = lib.maybe_convert_objects(subarr)
3136+
subarr = com._possibly_cast_to_datetime(subarr, dtype)
31203137
else:
31213138
subarr = _try_cast(data)
31223139

3140+
# scalar like
31233141
if subarr.ndim == 0:
31243142
if isinstance(data, list): # pragma: no cover
31253143
subarr = np.array(data, dtype=object)
@@ -3135,7 +3153,14 @@ def _try_cast(arr):
31353153
dtype = np.object_
31363154

31373155
if dtype is None:
3138-
value, dtype = _dtype_from_scalar(value)
3156+
3157+
# a 1-element ndarray
3158+
if isinstance(value, np.ndarray):
3159+
dtype = value.dtype
3160+
value = value.item()
3161+
else:
3162+
value, dtype = _dtype_from_scalar(value)
3163+
31393164
subarr = np.empty(len(index), dtype=dtype)
31403165
else:
31413166
# need to possibly convert the value here
@@ -3144,6 +3169,17 @@ def _try_cast(arr):
31443169
subarr.fill(value)
31453170
else:
31463171
return subarr.item()
3172+
3173+
# the result that we want
3174+
elif subarr.ndim == 1:
3175+
if index is not None:
3176+
3177+
# a 1-element ndarray
3178+
if len(subarr) != len(index) and len(subarr) == 1:
3179+
value = subarr[0]
3180+
subarr = np.empty(len(index), dtype=subarr.dtype)
3181+
subarr.fill(value)
3182+
31473183
elif subarr.ndim > 1:
31483184
if isinstance(data, np.ndarray):
31493185
raise Exception('Data must be 1-dimensional')

pandas/src/inference.pyx

+11
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,17 @@ def is_datetime64_array(ndarray values):
265265
return False
266266
return True
267267

268+
def is_timedelta_array(ndarray values):
269+
import datetime
270+
cdef int i, n = len(values)
271+
if n == 0:
272+
return False
273+
for i in range(n):
274+
if not isinstance(values[i],datetime.timedelta):
275+
return False
276+
return True
277+
278+
268279
def is_date_array(ndarray[object] values):
269280
cdef int i, n = len(values)
270281
if n == 0:

0 commit comments

Comments
 (0)