Skip to content

Commit 8c9192c

Browse files
committed
Merge pull request #11153 from sinhrks/fillna_tzbug
(WIP) BUG: DatetimeTZBlock.fillna raises TypeError
2 parents d8182e1 + a7c705a commit 8c9192c

File tree

6 files changed

+201
-7
lines changed

6 files changed

+201
-7
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1157,3 +1157,4 @@ Bug Fixes
11571157
- Bug in ``Index`` dtype may not applied properly (:issue:`11017`)
11581158
- Bug in ``io.gbq`` when testing for minimum google api client version (:issue:`10652`)
11591159
- Bug in ``DataFrame`` construction from nested ``dict`` with ``timedelta`` keys (:issue:`11129`)
1160+
- Bug in ``.fillna`` against may raise ``TypeError`` when data contains datetime dtype (:issue:`7095`, :issue:`11153`)

pandas/core/dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def construct_from_string(cls, string):
181181

182182
def __unicode__(self):
183183
# format the tz
184-
return "datetime64[{unit}, {tz}]".format(unit=self.unit,tz=self.tz)
184+
return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz)
185185

186186
@property
187187
def name(self):

pandas/core/internals.py

+51-6
Original file line numberDiff line numberDiff line change
@@ -1947,20 +1947,42 @@ def _try_fill(self, value):
19471947
def fillna(self, value, limit=None,
19481948
inplace=False, downcast=None):
19491949

1950-
# straight putmask here
1951-
values = self.values if inplace else self.values.copy()
19521950
mask = isnull(self.values)
19531951
value = self._try_fill(value)
1952+
19541953
if limit is not None:
19551954
if self.ndim > 2:
19561955
raise NotImplementedError("number of dimensions for 'fillna' "
19571956
"is currently limited to 2")
19581957
mask[mask.cumsum(self.ndim-1)>limit]=False
19591958

1960-
np.putmask(values, mask, value)
1961-
return [self if inplace else
1962-
self.make_block(values,
1963-
fastpath=True)]
1959+
if mask.any():
1960+
try:
1961+
return self._fillna_mask(mask, value, inplace=inplace)
1962+
except TypeError:
1963+
pass
1964+
# _fillna_mask raises TypeError when it fails
1965+
# cannot perform inplace op because of object coercion
1966+
values = self.get_values(dtype=object)
1967+
np.putmask(values, mask, value)
1968+
return [self.make_block(values, fastpath=True)]
1969+
else:
1970+
return [self if inplace else self.copy()]
1971+
1972+
def _fillna_mask(self, mask, value, inplace=False):
1973+
if getattr(value, 'tzinfo', None) is None:
1974+
# Series comes to this path
1975+
values = self.values
1976+
if not inplace:
1977+
values = values.copy()
1978+
try:
1979+
np.putmask(values, mask, value)
1980+
return [self if inplace else
1981+
self.make_block(values, fastpath=True)]
1982+
except (ValueError, TypeError):
1983+
# scalar causes ValueError, and array causes TypeError
1984+
pass
1985+
raise TypeError
19641986

19651987
def to_native_types(self, slicer=None, na_rep=None, date_format=None,
19661988
quoting=None, **kwargs):
@@ -2033,6 +2055,29 @@ def get_values(self, dtype=None):
20332055
.reshape(self.values.shape)
20342056
return self.values
20352057

2058+
def _fillna_mask(self, mask, value, inplace=False):
2059+
# cannot perform inplace op for internal DatetimeIndex
2060+
my_tz = tslib.get_timezone(self.values.tz)
2061+
value_tz = tslib.get_timezone(getattr(value, 'tzinfo', None))
2062+
2063+
if (my_tz == value_tz or self.dtype == getattr(value, 'dtype', None)):
2064+
if my_tz == value_tz:
2065+
# hack for PY2.6 / numpy 1.7.1.
2066+
# Other versions can directly use self.values.putmask
2067+
# --------------------------------------
2068+
try:
2069+
value = value.asm8
2070+
except AttributeError:
2071+
value = tslib.Timestamp(value).asm8
2072+
### ------------------------------------
2073+
2074+
try:
2075+
values = self.values.putmask(mask, value)
2076+
return [self.make_block(values, fastpath=True)]
2077+
except ValueError:
2078+
pass
2079+
raise TypeError
2080+
20362081
def _slice(self, slicer):
20372082
""" return a slice of my values """
20382083
if isinstance(slicer, tuple):

pandas/tests/test_dtypes.py

+14
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,20 @@ def test_basic(self):
137137
self.assertFalse(is_datetimetz(np.dtype('float64')))
138138
self.assertFalse(is_datetimetz(1.0))
139139

140+
def test_dst(self):
141+
142+
dr1 = date_range('2013-01-01', periods=3, tz='US/Eastern')
143+
s1 = Series(dr1, name='A')
144+
self.assertTrue(is_datetimetz(s1))
145+
146+
dr2 = date_range('2013-08-01', periods=3, tz='US/Eastern')
147+
s2 = Series(dr2, name='A')
148+
self.assertTrue(is_datetimetz(s2))
149+
self.assertEqual(s1.dtype, s2.dtype)
150+
151+
152+
153+
140154
if __name__ == '__main__':
141155
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
142156
exit=False)

pandas/tests/test_frame.py

+28
Original file line numberDiff line numberDiff line change
@@ -8740,6 +8740,34 @@ def test_fillna_dtype_conversion(self):
87408740
result = df.fillna(v)
87418741
assert_frame_equal(result, expected)
87428742

8743+
def test_fillna_datetime_columns(self):
8744+
# GH 7095
8745+
df = pd.DataFrame({'A': [-1, -2, np.nan],
8746+
'B': date_range('20130101', periods=3),
8747+
'C': ['foo', 'bar', None],
8748+
'D': ['foo2', 'bar2', None]},
8749+
index=date_range('20130110', periods=3))
8750+
result = df.fillna('?')
8751+
expected = pd.DataFrame({'A': [-1, -2, '?'],
8752+
'B': date_range('20130101', periods=3),
8753+
'C': ['foo', 'bar', '?'],
8754+
'D': ['foo2', 'bar2', '?']},
8755+
index=date_range('20130110', periods=3))
8756+
self.assert_frame_equal(result, expected)
8757+
8758+
df = pd.DataFrame({'A': [-1, -2, np.nan],
8759+
'B': [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), pd.NaT],
8760+
'C': ['foo', 'bar', None],
8761+
'D': ['foo2', 'bar2', None]},
8762+
index=date_range('20130110', periods=3))
8763+
result = df.fillna('?')
8764+
expected = pd.DataFrame({'A': [-1, -2, '?'],
8765+
'B': [pd.Timestamp('2013-01-01'), pd.Timestamp('2013-01-02'), '?'],
8766+
'C': ['foo', 'bar', '?'],
8767+
'D': ['foo2', 'bar2', '?']},
8768+
index=date_range('20130110', periods=3))
8769+
self.assert_frame_equal(result, expected)
8770+
87438771
def test_ffill(self):
87448772
self.tsframe['A'][:5] = nan
87458773
self.tsframe['A'][-5:] = nan

pandas/tests/test_series.py

+106
Original file line numberDiff line numberDiff line change
@@ -3937,6 +3937,89 @@ def test_datetime64_fillna(self):
39373937
result = s.fillna(method='backfill')
39383938
assert_series_equal(result, expected)
39393939

3940+
def test_datetime64_tz_fillna(self):
3941+
for tz in ['US/Eastern', 'Asia/Tokyo']:
3942+
# DatetimeBlock
3943+
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
3944+
Timestamp('2011-01-03 10:00'), pd.NaT])
3945+
result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
3946+
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00'),
3947+
Timestamp('2011-01-03 10:00'), Timestamp('2011-01-02 10:00')])
3948+
self.assert_series_equal(expected, result)
3949+
3950+
result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
3951+
expected = Series([Timestamp('2011-01-01 10:00'),
3952+
Timestamp('2011-01-02 10:00', tz=tz),
3953+
Timestamp('2011-01-03 10:00'),
3954+
Timestamp('2011-01-02 10:00', tz=tz)])
3955+
self.assert_series_equal(expected, result)
3956+
3957+
result = s.fillna('AAA')
3958+
expected = Series([Timestamp('2011-01-01 10:00'), 'AAA',
3959+
Timestamp('2011-01-03 10:00'), 'AAA'], dtype=object)
3960+
self.assert_series_equal(expected, result)
3961+
3962+
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3963+
3: pd.Timestamp('2011-01-04 10:00')})
3964+
expected = Series([Timestamp('2011-01-01 10:00'),
3965+
Timestamp('2011-01-02 10:00', tz=tz),
3966+
Timestamp('2011-01-03 10:00'),
3967+
Timestamp('2011-01-04 10:00')])
3968+
self.assert_series_equal(expected, result)
3969+
3970+
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'),
3971+
3: pd.Timestamp('2011-01-04 10:00')})
3972+
expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00'),
3973+
Timestamp('2011-01-03 10:00'), Timestamp('2011-01-04 10:00')])
3974+
self.assert_series_equal(expected, result)
3975+
3976+
# DatetimeBlockTZ
3977+
idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
3978+
'2011-01-03 10:00', pd.NaT], tz=tz)
3979+
s = pd.Series(idx)
3980+
result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
3981+
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
3982+
Timestamp('2011-01-02 10:00'),
3983+
Timestamp('2011-01-03 10:00', tz=tz),
3984+
Timestamp('2011-01-02 10:00')])
3985+
self.assert_series_equal(expected, result)
3986+
3987+
result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
3988+
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
3989+
'2011-01-03 10:00', '2011-01-02 10:00'],
3990+
tz=tz)
3991+
expected = Series(idx)
3992+
self.assert_series_equal(expected, result)
3993+
3994+
result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz).to_pydatetime())
3995+
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
3996+
'2011-01-03 10:00', '2011-01-02 10:00'],
3997+
tz=tz)
3998+
expected = Series(idx)
3999+
self.assert_series_equal(expected, result)
4000+
4001+
result = s.fillna('AAA')
4002+
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA',
4003+
Timestamp('2011-01-03 10:00', tz=tz), 'AAA'],
4004+
dtype=object)
4005+
self.assert_series_equal(expected, result)
4006+
4007+
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
4008+
3: pd.Timestamp('2011-01-04 10:00')})
4009+
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
4010+
Timestamp('2011-01-02 10:00', tz=tz),
4011+
Timestamp('2011-01-03 10:00', tz=tz),
4012+
Timestamp('2011-01-04 10:00')])
4013+
self.assert_series_equal(expected, result)
4014+
4015+
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
4016+
3: pd.Timestamp('2011-01-04 10:00', tz=tz)})
4017+
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
4018+
Timestamp('2011-01-02 10:00', tz=tz),
4019+
Timestamp('2011-01-03 10:00', tz=tz),
4020+
Timestamp('2011-01-04 10:00', tz=tz)])
4021+
self.assert_series_equal(expected, result)
4022+
39404023
def test_fillna_int(self):
39414024
s = Series(np.random.randint(-100, 100, 50))
39424025
s.fillna(method='ffill', inplace=True)
@@ -5022,6 +5105,29 @@ def test_dropna_empty(self):
50225105
# invalid axis
50235106
self.assertRaises(ValueError, s.dropna, axis=1)
50245107

5108+
5109+
def test_datetime64_tz_dropna(self):
5110+
# DatetimeBlock
5111+
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
5112+
Timestamp('2011-01-03 10:00'), pd.NaT])
5113+
result = s.dropna()
5114+
expected = Series([Timestamp('2011-01-01 10:00'),
5115+
Timestamp('2011-01-03 10:00')], index=[0, 2])
5116+
self.assert_series_equal(result, expected)
5117+
5118+
# DatetimeBlockTZ
5119+
idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
5120+
'2011-01-03 10:00', pd.NaT],
5121+
tz='Asia/Tokyo')
5122+
s = pd.Series(idx)
5123+
self.assertEqual(s.dtype, 'datetime64[ns, Asia/Tokyo]')
5124+
result = s.dropna()
5125+
expected = Series([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
5126+
Timestamp('2011-01-03 10:00', tz='Asia/Tokyo')],
5127+
index=[0, 2])
5128+
self.assertEqual(result.dtype, 'datetime64[ns, Asia/Tokyo]')
5129+
self.assert_series_equal(result, expected)
5130+
50255131
def test_axis_alias(self):
50265132
s = Series([1, 2, np.nan])
50275133
assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))

0 commit comments

Comments
 (0)