From 6a5c216dd36a037323e351315031a50414017359 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 23 Dec 2018 18:57:06 -0800 Subject: [PATCH 01/14] implement astype portion of #24024 --- pandas/core/arrays/datetimelike.py | 51 +++++++++++++++++-- pandas/core/arrays/datetimes.py | 34 ++++++++++++- pandas/core/arrays/period.py | 43 +++------------- pandas/core/arrays/timedeltas.py | 25 ++++++++- pandas/core/dtypes/missing.py | 6 ++- pandas/core/indexes/datetimelike.py | 42 +++++++-------- pandas/core/indexes/datetimes.py | 27 ++++------ pandas/core/indexes/period.py | 11 ++-- pandas/core/indexes/timedeltas.py | 16 +++--- pandas/tests/arrays/test_datetimes.py | 14 +++++ pandas/tests/arrays/test_period.py | 18 ++++--- pandas/tests/indexes/datetimes/test_astype.py | 15 +++++- .../tests/indexes/timedeltas/test_astype.py | 15 +++++- 13 files changed, 211 insertions(+), 106 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index f82004747f0d0..272afc621af0e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -17,10 +17,11 @@ from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.core.dtypes.common import ( - is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype, - is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike, - is_period_dtype, is_timedelta64_dtype, needs_i8_conversion) + is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, + is_dtype_equal, is_extension_array_dtype, is_float_dtype, is_integer_dtype, + is_list_like, is_object_dtype, is_offsetlike, is_period_dtype, + is_string_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -403,9 +404,49 @@ def __getitem__(self, key): return self._simple_new(result, **attribs) def astype(self, dtype, copy=True): + # Some notes on cases we don't have to handle here in the base class: + # 1. PeriodArray.astype handles period -> period + # 2. DatetimeArray.astype handles conversion between tz. + # 3. DatetimeArray.astype handles datetime -> period + from pandas import Categorical + dtype = pandas_dtype(dtype) + if is_object_dtype(dtype): return self._box_values(self.asi8) - return super(DatetimeLikeArrayMixin, self).astype(dtype, copy) + elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): + return self._format_native_types() + elif is_integer_dtype(dtype): + # we deliberately ignore int32 vs. int64 here. + # See https://github.com/pandas-dev/pandas/issues/24381 for more. + values = self.asi8 + if copy: + values = values.copy() + return values + elif (is_datetime_or_timedelta_dtype(dtype) and + not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): + # disallow conversion between datetime/timedelta, + # and conversions for any datetimelike to float + msg = 'Cannot cast {name} to dtype {dtype}' + raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) + elif is_categorical_dtype(dtype): + return Categorical(self, dtype=dtype) + else: + return np.asarray(self, dtype=dtype) + + def view(self, dtype=None): + """ + New view on this array with the same data. + + Parameters + ---------- + dtype : numpy dtype, optional + + Returns + ------- + ndarray + With the specified `dtype`. + """ + return self._data.view(dtype=dtype) # ------------------------------------------------------------------ # ExtensionArray Interface diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a933f41faab67..08cdda998c68d 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -15,8 +15,9 @@ from pandas.core.dtypes.common import ( _INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype, - is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype, - is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype) + is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, + is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype, + is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -469,6 +470,35 @@ def __iter__(self): for v in converted: yield v + def astype(self, dtype, copy=True): + # We handle + # --> datetime + # --> period + # DatetimeLikeArrayMixin Super handles the rest. + dtype = pandas_dtype(dtype) + + if (is_datetime64_ns_dtype(dtype) and + not is_dtype_equal(dtype, self.dtype)): + # GH#18951: datetime64_ns dtype but not equal means different tz + new_tz = getattr(dtype, 'tz', None) + if getattr(self.dtype, 'tz', None) is None: + return self.tz_localize(new_tz) + result = self.tz_convert(new_tz) + if new_tz is None: + # Do we want .astype('datetime64[ns]') to be an ndarray. + # The astype in Block._astype expects this to return an + # ndarray, but we could maybe work around it there. + result = result._data + return result + elif is_datetime64tz_dtype(self.dtype) and is_dtype_equal(self.dtype, + dtype): + if copy: + return self.copy() + return self + elif is_period_dtype(dtype): + return self.to_period(freq=dtype.freq) + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) + # ---------------------------------------------------------------- # ExtensionArray Interface diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5f4d98a81e5f2..5ece399fa2159 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -15,10 +15,8 @@ from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.common import ( - _TD_DTYPE, ensure_object, is_array_like, is_categorical_dtype, - is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal, - is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, - is_period_dtype, is_string_dtype, pandas_dtype) + _TD_DTYPE, ensure_object, is_array_like, is_datetime64_dtype, + is_float_dtype, is_list_like, is_period_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ABCIndexClass, ABCPeriodIndex, ABCSeries from pandas.core.dtypes.missing import isna, notna @@ -593,42 +591,13 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): # ------------------------------------------------------------------ def astype(self, dtype, copy=True): - # TODO: Figure out something better here... - # We have DatetimeLikeArrayMixin -> - # super(...), which ends up being... DatetimeIndexOpsMixin? - # this is complicated. - # need a pandas_astype(arr, dtype). - from pandas import Categorical - + # We handle Period[T] -> Period[U] + # Our parent handles everything else. dtype = pandas_dtype(dtype) - if is_object_dtype(dtype): - return np.asarray(self, dtype=object) - elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): - return self._format_native_types() - elif is_integer_dtype(dtype): - values = self._data - - if values.dtype != dtype: - # int32 vs. int64 - values = values.astype(dtype) - - elif copy: - values = values.copy() - - return values - elif (is_datetime_or_timedelta_dtype(dtype) and - not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): - # disallow conversion between datetime/timedelta, - # and conversions for any datetimelike to float - msg = 'Cannot cast {name} to dtype {dtype}' - raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) - elif is_categorical_dtype(dtype): - return Categorical(self, dtype=dtype) - elif is_period_dtype(dtype): + if is_period_dtype(dtype): return self.asfreq(dtype.freq) - else: - return np.asarray(self, dtype=dtype) + return super(PeriodArray, self).astype(dtype, copy=copy) @property def flags(self): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 06a9627a290c6..17ae1c184c57e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -17,7 +17,8 @@ from pandas.core.dtypes.common import ( _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, - is_string_dtype, is_timedelta64_dtype) + is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, + pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex) @@ -231,6 +232,28 @@ def _validate_fill_value(self, fill_value): "Got '{got}'.".format(got=fill_value)) return fill_value + def astype(self, dtype, copy=True): + # We handle + # --> timedelta64[ns] + # --> timedelta64 + # DatetimeLikeArrayMixin super call handles other cases + dtype = pandas_dtype(dtype) + + if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype): + # essentially this is division + result = self._data.astype(dtype, copy=copy) + if self._hasnans: + values = self._maybe_mask_results(result, + fill_value=None, + convert='float64') + return values + return result.astype('i8') + elif is_timedelta64_ns_dtype(dtype): + if copy: + return self.copy() + return self + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) + # ---------------------------------------------------------------- # Rendering Methods diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 809dcbd054ea0..0226de34d1fac 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -14,7 +14,8 @@ is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype) from .generic import ( - ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries) + ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries, + ABCDatetimeArray, ABCTimedeltaArray) from .inference import is_list_like isposinf_scalar = libmissing.isposinf_scalar @@ -108,7 +109,8 @@ def _isna_new(obj): elif isinstance(obj, ABCMultiIndex): raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, - ABCExtensionArray)): + ABCExtensionArray, + ABCDatetimeArray, ABCTimedeltaArray)): return _isna_ndarraylike(obj) elif isinstance(obj, ABCGeneric): return obj._constructor(obj._data.isna(func=isna)) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 8a319a65314dd..bc6754ee9f896 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -13,10 +13,10 @@ from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( - ensure_int64, is_bool_dtype, is_categorical_dtype, - is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype, - is_integer, is_integer_dtype, is_list_like, is_object_dtype, - is_period_dtype, is_scalar, is_string_dtype) + ensure_int64, is_bool_dtype, pandas_dtype, + is_dtype_equal, is_float, + is_integer, is_integer_dtype, is_list_like, + is_period_dtype, is_scalar) from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core import algorithms, ops @@ -40,6 +40,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): # override DatetimeLikeArrayMixin method copy = Index.copy unique = Index.unique + view = Index.view # DatetimeLikeArrayMixin assumes subclasses are mutable, so these are # properties there. They can be made into cache_readonly for Index @@ -527,24 +528,23 @@ def _maybe_box_as_values(self, values, **attribs): # - sort_values return values + @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): - if is_object_dtype(dtype): - return self._box_values_as_index() - elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): - return Index(self.format(), name=self.name, dtype=object) - elif is_integer_dtype(dtype): - # TODO(DatetimeArray): use self._values here. - # Can't use ._values currently, because that returns a - # DatetimeIndex, which throws us in an infinite loop. - return Index(self.values.astype('i8', copy=copy), name=self.name, - dtype='i8') - elif (is_datetime_or_timedelta_dtype(dtype) and - not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): - # disallow conversion between datetime/timedelta, - # and conversions for any datetimelike to float - msg = 'Cannot cast {name} to dtype {dtype}' - raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) - return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy) + if is_dtype_equal(self.dtype, dtype) and copy is False: + # Ensure that self.astype(self.dtype) is self + return self + + new_values = self._eadata.astype(dtype, copy=copy) + + # we pass `dtype` to the Index constructor, for cases like + # dtype=object to disable inference. But, DTA.astype ignores + # integer sign and size, so we need to detect that case and + # just choose int64. + dtype = pandas_dtype(dtype) + if is_integer_dtype(dtype): + dtype = np.dtype("int64") + + return Index(new_values, dtype=dtype, name=self.name) @Appender(DatetimeLikeArrayMixin._time_shift.__doc__) def _time_shift(self, periods, freq=None): diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 09e741af363da..62dac601c8677 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -14,9 +14,8 @@ from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes.common import ( - _NS_DTYPE, ensure_int64, is_datetime64_ns_dtype, is_dtype_equal, is_float, - is_integer, is_list_like, is_period_dtype, is_scalar, is_string_like, - pandas_dtype) + _NS_DTYPE, ensure_int64, is_float, + is_integer, is_list_like, is_scalar, is_string_like) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.missing import isna @@ -24,7 +23,7 @@ DatetimeArrayMixin as DatetimeArray, _to_m8) from pandas.core.base import _shared_docs import pandas.core.common as com -from pandas.core.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.base import Index from pandas.core.indexes.datetimelike import ( DatetimeIndexOpsMixin, wrap_array_method, wrap_field_accessor) from pandas.core.indexes.numeric import Int64Index @@ -603,20 +602,6 @@ def intersection(self, other): # -------------------------------------------------------------------- - @Appender(_index_shared_docs['astype']) - def astype(self, dtype, copy=True): - dtype = pandas_dtype(dtype) - if (is_datetime64_ns_dtype(dtype) and - not is_dtype_equal(dtype, self.dtype)): - # GH 18951: datetime64_ns dtype but not equal means different tz - new_tz = getattr(dtype, 'tz', None) - if getattr(self.dtype, 'tz', None) is None: - return self.tz_localize(new_tz) - return self.tz_convert(new_tz) - elif is_period_dtype(dtype): - return self.to_period(freq=dtype.freq) - return super(DatetimeIndex, self).astype(dtype, copy=copy) - def _get_time_micros(self): values = self.asi8 if self.tz is not None and not timezones.is_utc(self.tz): @@ -1089,10 +1074,16 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): # -------------------------------------------------------------------- # Wrapping DatetimeArray + @property + def _eadata(self): + return DatetimeArray._simple_new(self._data, + tz=self.tz, freq=self.freq) + # Compat for frequency inference, see GH#23789 _is_monotonic_increasing = Index.is_monotonic_increasing _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique + astype = DatetimeIndexOpsMixin.astype _timezone = cache_readonly(DatetimeArray._timezone.fget) is_normalized = cache_readonly(DatetimeArray.is_normalized.fget) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b15604a57fb81..5f1f136f4abfd 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -247,6 +247,10 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): # ------------------------------------------------------------------------ # Data + @property + def _eadata(self): + return self._data + @property def _ndarray_values(self): return self._data._ndarray_values @@ -539,16 +543,13 @@ def asof_locs(self, where, mask): def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) - # We have a few special-cases for `dtype`. - # Failing those, we fall back to astyping the values - if is_datetime64_any_dtype(dtype): # 'how' is index-speicifc, isn't part of the EA interface. tz = getattr(dtype, 'tz', None) return self.to_timestamp(how=how).tz_localize(tz) - result = self._data.astype(dtype, copy=copy) - return Index(result, name=self.name, dtype=dtype, copy=False) + # TODO: should probably raise on `how` here, so we don't ignore it. + return super(PeriodIndex, self).astype(dtype, copy=copy) @Substitution(klass='PeriodIndex') @Appender(_shared_docs['searchsorted']) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 47f7f7cf860fc..3d5dc2dfa2bd0 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -237,6 +237,10 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): # ------------------------------------------------------------------- # Wrapping TimedeltaArray + @property + def _eadata(self): + return TimedeltaArray._simple_new(self._data, freq=self.freq) + __mul__ = _make_wrapped_arith_op("__mul__") __rmul__ = _make_wrapped_arith_op("__rmul__") __floordiv__ = _make_wrapped_arith_op("__floordiv__") @@ -284,14 +288,14 @@ def __rtruediv__(self, other): def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype): - # return an index (essentially this is division) - result = self.values.astype(dtype, copy=copy) + # Have to repeat the check for 'timedelta64' (not ns) dtype + # so that we can return a numeric index, since pandas will return + # a TimedeltaIndex when dtype='timedelta' + result = self._eadata.astype(dtype, copy=copy) if self.hasnans: - values = self._maybe_mask_results(result, fill_value=None, - convert='float64') - return Index(values, name=self.name) + return Index(result, name=self.name) return Index(result.astype('i8'), name=self.name) - return super(TimedeltaIndex, self).astype(dtype, copy=copy) + return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy) def union(self, other): """ diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 2b630b98b69a2..4a304194437b1 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -5,9 +5,11 @@ import operator import numpy as np +import pytest import pandas as pd from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray +from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas.util.testing as tm @@ -42,3 +44,15 @@ def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators): result = op(other, arr) tm.assert_numpy_array_equal(result, expected) + + +class TestDatetimeArray(object): + def test_astype_to_same(self): + arr = DatetimeArray._from_sequence(['2000'], tz='US/Central') + result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) + assert result is arr + + def test_tz_setter_raises(self): + arr = DatetimeArray._from_sequence(['2000'], tz='US/Central') + with pytest.raises(AttributeError, match='tz_localize'): + arr.tz = 'UTC' diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 4425cc8eb1139..f06a4b4e5c715 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -88,21 +88,25 @@ def test_take_raises(): arr.take([0, -1], allow_fill=True, fill_value='foo') -@pytest.mark.parametrize('dtype', [int, np.int32, np.int64]) +@pytest.mark.parametrize('dtype', [int, np.int32, np.int64, 'uint']) def test_astype(dtype): - # Need to ensure ordinals are astyped correctly for both - # int32 and 64 + # We choose to ignore the sign and size of integers for + # Period/Datetime/Timedelta astype arr = period_array(['2000', '2001', None], freq='D') result = arr.astype(dtype) - # need pandas_dtype to handle int32 vs. int64 correctly - expected = pandas_dtype(dtype) - assert result.dtype == expected + expected_dtype = np.dtype('int64') + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) def test_astype_copies(): arr = period_array(['2000', '2001', None], freq='D') result = arr.astype(np.int64, copy=False) - assert result is arr._data + # Add the `.base`, since we now use `.asi8` which returns a view. + # We could maybe override it in PeriodArray to return ._data directly. + assert result.base is arr._data result = arr.astype(np.int64, copy=True) assert result is not arr._data diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index d47d1016ee653..6c4eceae0b8cc 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -168,7 +168,7 @@ def test_astype_object_with_nat(self): def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) - msg = 'Cannot cast DatetimeIndex to dtype' + msg = 'Cannot cast DatetimeArrayMixin to dtype' with pytest.raises(TypeError, match=msg): idx.astype(dtype) @@ -301,6 +301,19 @@ def test_to_period_nofreq(self): assert idx.freqstr is None tm.assert_index_equal(idx.to_period(), expected) + @pytest.mark.parametrize('tz', [None, 'US/Central']) + def test_astype_category(self, tz): + obj = pd.date_range("2000", periods=2, tz=tz) + result = obj.astype('category') + expected = pd.CategoricalIndex([pd.Timestamp('2000-01-01', tz=tz), + pd.Timestamp('2000-01-02', tz=tz)]) + tm.assert_index_equal(result, expected) + + # TODO: use \._data following composition changeover + result = obj._eadata.astype('category') + expected = expected.values + tm.assert_categorical_equal(result, expected) + @pytest.mark.parametrize('tz', [None, 'US/Central']) def test_astype_array_fallback(self, tz): obj = pd.date_range("2000", periods=2, tz=tz) diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index 6afbe9cff42c2..aa0017324e078 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -75,10 +75,23 @@ def test_astype_timedelta64(self): def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) - msg = 'Cannot cast TimedeltaIndex to dtype' + msg = 'Cannot cast TimedeltaArrayMixin to dtype' with pytest.raises(TypeError, match=msg): idx.astype(dtype) + @pytest.mark.parametrize('tz', [None, 'US/Central']) + def test_astype_category(self, tz): + obj = pd.date_range("2000", periods=2, tz=tz) + result = obj.astype('category') + expected = pd.CategoricalIndex([pd.Timestamp('2000-01-01', tz=tz), + pd.Timestamp('2000-01-02', tz=tz)]) + tm.assert_index_equal(result, expected) + + # TODO: Use \._data following composition changeover + result = obj._eadata.astype('category') + expected = expected.values + tm.assert_categorical_equal(result, expected) + def test_astype_array_fallback(self): obj = pd.timedelta_range("1H", periods=2) result = obj.astype(bool) From 1a9f30b5f47e1539cfbb3d1bfa19912a89a6d33c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 23 Dec 2018 19:51:58 -0800 Subject: [PATCH 02/14] fixup unused import --- pandas/tests/arrays/test_period.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index f06a4b4e5c715..cd0b670fb8cd2 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -4,7 +4,6 @@ from pandas._libs.tslibs import iNaT from pandas._libs.tslibs.period import IncompatibleFrequency -from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import PeriodDtype import pandas as pd From 1b109b8a2bff3526e07c77ef601ac3841104fd26 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 24 Dec 2018 08:35:29 -0800 Subject: [PATCH 03/14] isort fixup --- pandas/core/dtypes/missing.py | 4 ++-- pandas/core/indexes/datetimelike.py | 6 ++---- pandas/core/indexes/datetimes.py | 4 ++-- pandas/tests/arrays/test_datetimes.py | 3 ++- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 0226de34d1fac..1d0ea03455949 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -14,8 +14,8 @@ is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype) from .generic import ( - ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries, - ABCDatetimeArray, ABCTimedeltaArray) + ABCDatetimeArray, ABCExtensionArray, ABCGeneric, ABCIndexClass, + ABCMultiIndex, ABCSeries, ABCTimedeltaArray) from .inference import is_list_like isposinf_scalar = libmissing.isposinf_scalar diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index bc6754ee9f896..e01c259b62b97 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -13,10 +13,8 @@ from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( - ensure_int64, is_bool_dtype, pandas_dtype, - is_dtype_equal, is_float, - is_integer, is_integer_dtype, is_list_like, - is_period_dtype, is_scalar) + ensure_int64, is_bool_dtype, is_dtype_equal, is_float, is_integer, + is_integer_dtype, is_list_like, is_period_dtype, is_scalar, pandas_dtype) from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core import algorithms, ops diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 62dac601c8677..16bc16a1dba66 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -14,8 +14,8 @@ from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes.common import ( - _NS_DTYPE, ensure_int64, is_float, - is_integer, is_list_like, is_scalar, is_string_like) + _NS_DTYPE, ensure_int64, is_float, is_integer, is_list_like, is_scalar, + is_string_like) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.missing import isna diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 4a304194437b1..615ffe8ae8e38 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -7,9 +7,10 @@ import numpy as np import pytest +from pandas.core.dtypes.dtypes import DatetimeTZDtype + import pandas as pd from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray -from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas.util.testing as tm From 5615b9f1f5195d03ad1c483ea987d8917d517b29 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 24 Dec 2018 14:45:50 -0800 Subject: [PATCH 04/14] pass copy kwarg --- pandas/core/arrays/datetimelike.py | 2 +- pandas/tests/arrays/test_period.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 272afc621af0e..e7b48be12cdb3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -431,7 +431,7 @@ def astype(self, dtype, copy=True): elif is_categorical_dtype(dtype): return Categorical(self, dtype=dtype) else: - return np.asarray(self, dtype=dtype) + return np.asarray(self, dtype=dtype, copy=copy) def view(self, dtype=None): """ diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index cd0b670fb8cd2..079ff416b7899 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -109,6 +109,7 @@ def test_astype_copies(): result = arr.astype(np.int64, copy=True) assert result is not arr._data + tm.assert_numpy_array_equal(result, arr._data.view('i8')) def test_astype_categorical(): From 184f59fe68d9a312b0b151c3c82da3547aab04e1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 25 Dec 2018 07:15:22 -0800 Subject: [PATCH 05/14] revert change that brokethe world --- pandas/core/arrays/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index e7b48be12cdb3..272afc621af0e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -431,7 +431,7 @@ def astype(self, dtype, copy=True): elif is_categorical_dtype(dtype): return Categorical(self, dtype=dtype) else: - return np.asarray(self, dtype=dtype, copy=copy) + return np.asarray(self, dtype=dtype) def view(self, dtype=None): """ From e41068aeb44c4a9e728305dc4f5c7e47efdeec12 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 25 Dec 2018 12:58:48 -0800 Subject: [PATCH 06/14] comments, typo --- pandas/core/arrays/timedeltas.py | 4 +++- pandas/core/indexes/datetimelike.py | 4 +++- pandas/core/indexes/period.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 17ae1c184c57e..435e29f8a708c 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -240,7 +240,9 @@ def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype): - # essentially this is division + # by pandas convention, converting to non-nano timedelta64 + # returns an int64-dtyped array with ints representing multiples + # of the desired timedelta unit. This is essentially division result = self._data.astype(dtype, copy=copy) if self._hasnans: values = self._maybe_mask_results(result, diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index e01c259b62b97..2532b43d382b6 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -542,7 +542,9 @@ def astype(self, dtype, copy=True): if is_integer_dtype(dtype): dtype = np.dtype("int64") - return Index(new_values, dtype=dtype, name=self.name) + # pass copy=False because any copying will be done in the + # _eadata.astype call above + return Index(new_values, dtype=dtype, name=self.name, copy=False) @Appender(DatetimeLikeArrayMixin._time_shift.__doc__) def _time_shift(self, periods, freq=None): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 5f1f136f4abfd..f42fe3af7b742 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -544,7 +544,7 @@ def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) if is_datetime64_any_dtype(dtype): - # 'how' is index-speicifc, isn't part of the EA interface. + # 'how' is index-specific, isn't part of the EA interface. tz = getattr(dtype, 'tz', None) return self.to_timestamp(how=how).tz_localize(tz) From 6f108ddd84550091f9f57f0c90e9c1a0103a8752 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 25 Dec 2018 13:01:11 -0800 Subject: [PATCH 07/14] avoid double-copy --- pandas/core/arrays/timedeltas.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 435e29f8a708c..53c67c9295ff7 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -243,12 +243,14 @@ def astype(self, dtype, copy=True): # by pandas convention, converting to non-nano timedelta64 # returns an int64-dtyped array with ints representing multiples # of the desired timedelta unit. This is essentially division - result = self._data.astype(dtype, copy=copy) if self._hasnans: + # avoid double-copying + result = self._data.astype(dtype, copy=False) values = self._maybe_mask_results(result, fill_value=None, convert='float64') return values + result = self._data.astype(dtype, copy=copy) return result.astype('i8') elif is_timedelta64_ns_dtype(dtype): if copy: From 04efd450a9ef6fd92029d99044cc10a5ebf33914 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 27 Dec 2018 20:18:11 -0800 Subject: [PATCH 08/14] sidestep int sign/size astype issues --- pandas/core/indexes/datetimelike.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 184e3f65354a5..eb6bdf41a1286 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -557,17 +557,10 @@ def astype(self, dtype, copy=True): new_values = self._eadata.astype(dtype, copy=copy) - # we pass `dtype` to the Index constructor, for cases like - # dtype=object to disable inference. But, DTA.astype ignores - # integer sign and size, so we need to detect that case and - # just choose int64. - dtype = pandas_dtype(dtype) - if is_integer_dtype(dtype): - dtype = np.dtype("int64") - # pass copy=False because any copying will be done in the # _eadata.astype call above - return Index(new_values, dtype=dtype, name=self.name, copy=False) + return Index(new_values, + dtype=new_values.dtype, name=self.name, copy=False) @Appender(DatetimeLikeArrayMixin._time_shift.__doc__) def _time_shift(self, periods, freq=None): From 3fca8108a832fa7b80f317a75994ff8722b3743a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Dec 2018 10:31:56 -0600 Subject: [PATCH 09/14] Implement UInt64 handling, tests, and docs --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/arrays/datetimelike.py | 8 +++++++- pandas/core/indexes/base.py | 5 +++-- pandas/tests/indexes/datetimes/test_astype.py | 9 +++++++++ pandas/tests/indexes/period/test_astype.py | 6 ++++++ pandas/tests/indexes/timedeltas/test_astype.py | 9 +++++++++ 6 files changed, 35 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index d6bda5fde44d9..216e234770d8e 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1324,6 +1324,7 @@ Datetimelike - Bug in :func:`to_datetime` where ``box`` and ``utc`` arguments were ignored when passing a :class:`DataFrame` or ``dict`` of unit mappings (:issue:`23760`) - Bug in :attr:`Series.dt` where the cache would not update properly after an in-place operation (:issue:`24408`) - Bug in :class:`PeriodIndex` where comparisons against an array-like object with length 1 failed to raise ``ValueError`` (:issue:`23078`) +- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`). Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 4270c0ac72d1a..39b359457585c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -21,7 +21,8 @@ is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal, is_extension_array_dtype, is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike, is_period_dtype, - is_string_dtype, is_timedelta64_dtype, needs_i8_conversion, pandas_dtype) + is_string_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype, + needs_i8_conversion, pandas_dtype) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries from pandas.core.dtypes.missing import isna @@ -419,6 +420,11 @@ def astype(self, dtype, copy=True): # we deliberately ignore int32 vs. int64 here. # See https://github.com/pandas-dev/pandas/issues/24381 for more. values = self.asi8 + + if is_unsigned_integer_dtype(dtype): + # Again, we ignore int32 vs. int64 + values = values.view("uint64") + if copy: values = values.copy() return values diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2f29561af943b..8dfc03486f777 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -739,8 +739,9 @@ def view(self, cls=None): Parameters ---------- dtype : numpy dtype or pandas type - Note that any integer `dtype` is treated as ``'int64'``, - regardless of the sign and size. + Note that any signed integer `dtype` is treated as ``'int64'``, + and any unsigned integer `dtype` is treated as ``'uint64'``, + regardless of the size. copy : bool, default True By default, astype always returns a newly allocated object. If copy is set to False and internal requirements on dtype are diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 6c4eceae0b8cc..cda7a005c40c7 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -33,6 +33,15 @@ def test_astype(self): tm.assert_index_equal(result, Index(rng.asi8)) tm.assert_numpy_array_equal(result.values, rng.asi8) + def test_astype_uint(self): + arr = date_range('2000', periods=2) + expected = pd.UInt64Index( + np.array([946684800000000000, 946771200000000000], dtype="uint64") + ) + + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + def test_astype_with_tz(self): # with tz diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index 68c338c6cb688..6abdf5962d6cf 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -41,6 +41,12 @@ def test_astype_conversion(self): tm.assert_index_equal(result, Index(idx.asi8)) tm.assert_numpy_array_equal(result.values, idx.asi8) + def test_astype_uint(self): + arr = period_range('2000', periods=2) + expected = pd.UInt64Index(np.array([10957, 10958], dtype='uint64')) + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + def test_astype_object(self): idx = pd.PeriodIndex([], freq='M') diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index aa0017324e078..088322d9f9a97 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -54,6 +54,15 @@ def test_astype(self): tm.assert_index_equal(result, Index(rng.asi8)) tm.assert_numpy_array_equal(rng.asi8, result.values) + def test_astype_uint(self): + arr = timedelta_range('1H', periods=2) + expected = pd.UInt64Index( + np.array([3600000000000, 90000000000000], dtype="uint64") + ) + + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + def test_astype_timedelta64(self): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) From 5fa32e9848406fc2f9f5a1608a7e3545df70b06c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Dec 2018 10:40:46 -0600 Subject: [PATCH 10/14] Handle uint in astype tests --- pandas/tests/arrays/test_datetimes.py | 17 +++++++++++++++++ pandas/tests/arrays/test_period.py | 10 ++++++++-- pandas/tests/arrays/test_timedeltas.py | 17 +++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 615ffe8ae8e38..871bc440825bf 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -53,6 +53,23 @@ def test_astype_to_same(self): result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) assert result is arr + @pytest.mark.parametrize("dtype", [ + int, np.int32, np.int64, 'uint32', 'uint64', + ]) + def test_astype_int(self, dtype): + arr = DatetimeArray._from_sequence([pd.Timestamp('2000'), + pd.Timestamp('2001')]) + result = arr.astype(dtype) + + if np.dtype(dtype).kind == 'u': + expected_dtype = np.dtype('uint64') + else: + expected_dtype = np.dtype('int64') + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + def test_tz_setter_raises(self): arr = DatetimeArray._from_sequence(['2000'], tz='US/Central') with pytest.raises(AttributeError, match='tz_localize'): diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 079ff416b7899..7fb88640e5fb4 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -87,13 +87,19 @@ def test_take_raises(): arr.take([0, -1], allow_fill=True, fill_value='foo') -@pytest.mark.parametrize('dtype', [int, np.int32, np.int64, 'uint']) +@pytest.mark.parametrize('dtype', [ + int, np.int32, np.int64, 'uint32', 'uint64', +]) def test_astype(dtype): # We choose to ignore the sign and size of integers for # Period/Datetime/Timedelta astype arr = period_array(['2000', '2001', None], freq='D') result = arr.astype(dtype) - expected_dtype = np.dtype('int64') + + if np.dtype(dtype).kind == 'u': + expected_dtype = np.dtype('uint64') + else: + expected_dtype = np.dtype('int64') expected = arr.astype(expected_dtype) assert result.dtype == expected_dtype diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 97ac3fce07088..287079165284b 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -55,3 +55,20 @@ def test_neg_freq(self): result = -arr tm.assert_timedelta_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", [ + int, np.int32, np.int64, 'uint32', 'uint64', + ]) + def test_astype_int(self, dtype): + arr = TimedeltaArray._from_sequence([pd.Timedelta('1H'), + pd.Timedelta('2H')]) + result = arr.astype(dtype) + + if np.dtype(dtype).kind == 'u': + expected_dtype = np.dtype('uint64') + else: + expected_dtype = np.dtype('int64') + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) From 5d718e66d86ab8fc6707ece5bb062086cc36424b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Dec 2018 11:02:50 -0600 Subject: [PATCH 11/14] Fixed TimedeltaArray._format_native_types --- pandas/core/arrays/timedeltas.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index b00cefb9e88f8..a08a7cf2dc718 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -268,8 +268,11 @@ def _formatter(self, boxed=False): from pandas.io.formats.format import _get_format_timedelta64 return _get_format_timedelta64(self, box=True) - def _format_native_types(self): - return self.astype(object) + def _format_native_types(self, na_rep=u'NaT', date_format=None): + from pandas.io.formats.format import _get_format_timedelta64 + + formatter = _get_format_timedelta64(self._data, na_rep) + return np.array([formatter(x) for x in self._data]) # ---------------------------------------------------------------- # Arithmetic Methods From 33b543484adc47042eebbe2ebdc6e5578428f869 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Dec 2018 11:48:21 -0600 Subject: [PATCH 12/14] Linting --- pandas/core/indexes/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index eb6bdf41a1286..2e13f4959484a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -14,7 +14,7 @@ from pandas.core.dtypes.common import ( ensure_int64, is_bool_dtype, is_dtype_equal, is_float, is_integer, - is_integer_dtype, is_list_like, is_period_dtype, is_scalar, pandas_dtype) + is_list_like, is_period_dtype, is_scalar) from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core import algorithms, ops From e29d8984897c665051c0cc5b62929a43f219dd9e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Dec 2018 12:38:17 -0600 Subject: [PATCH 13/14] Change default to str This makes the default na repr match the expected type for the formatter. --- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/period.py | 2 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 39b359457585c..2acb08b696506 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -317,7 +317,7 @@ def _ndarray_values(self): # ---------------------------------------------------------------- # Rendering Methods - def _format_native_types(self, na_rep=u'NaT', date_format=None): + def _format_native_types(self, na_rep='NaT', date_format=None): """ Helper method for astype when converting to strings. diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 46ba9a64bf54e..966511d048421 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -525,7 +525,7 @@ def _validate_fill_value(self, fill_value): # ----------------------------------------------------------------- # Rendering Methods - def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): + def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): from pandas.io.formats.format import _get_format_datetime64_from_values fmt = _get_format_datetime64_from_values(self, date_format) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d45cd9402d45b..f7632255f26e0 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -572,7 +572,7 @@ def asfreq(self, freq=None, how='E'): # ------------------------------------------------------------------ # Rendering Methods - def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): + def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): """ actually format my specific types """ diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a08a7cf2dc718..2c7187f85517f 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -268,7 +268,7 @@ def _formatter(self, boxed=False): from pandas.io.formats.format import _get_format_timedelta64 return _get_format_timedelta64(self, box=True) - def _format_native_types(self, na_rep=u'NaT', date_format=None): + def _format_native_types(self, na_rep='NaT', date_format=None): from pandas.io.formats.format import _get_format_timedelta64 formatter = _get_format_timedelta64(self._data, na_rep) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index e87b05110053e..0ae96bcb88249 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -395,7 +395,7 @@ def _maybe_convert_timedelta(self, other): # ------------------------------------------------------------------------ # Rendering Methods - def _format_native_types(self, na_rep=u'NaT', quoting=None, **kwargs): + def _format_native_types(self, na_rep='NaT', quoting=None, **kwargs): # just dispatch, return ndarray return self._data._format_native_types(na_rep=na_rep, quoting=quoting, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index c58150be7b5a4..8192b1a9ae2d0 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -224,7 +224,7 @@ def _formatter_func(self): from pandas.io.formats.format import _get_format_timedelta64 return _get_format_timedelta64(self, box=True) - def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): + def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): from pandas.io.formats.format import Timedelta64Formatter return Timedelta64Formatter(values=self, nat_rep=na_rep, From a3c42f0f696b29f7f2aac1344dc7195210815a24 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Dec 2018 13:27:25 -0600 Subject: [PATCH 14/14] revert for period --- pandas/core/arrays/period.py | 2 +- pandas/core/indexes/period.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f7632255f26e0..d45cd9402d45b 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -572,7 +572,7 @@ def asfreq(self, freq=None, how='E'): # ------------------------------------------------------------------ # Rendering Methods - def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): + def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): """ actually format my specific types """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0ae96bcb88249..e87b05110053e 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -395,7 +395,7 @@ def _maybe_convert_timedelta(self, other): # ------------------------------------------------------------------------ # Rendering Methods - def _format_native_types(self, na_rep='NaT', quoting=None, **kwargs): + def _format_native_types(self, na_rep=u'NaT', quoting=None, **kwargs): # just dispatch, return ndarray return self._data._format_native_types(na_rep=na_rep, quoting=quoting,