diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ceaf9e748fe5a..0d96732093a8c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -5,7 +5,7 @@ import numpy as np -from pandas._libs import NaT, iNaT, lib +from pandas._libs import NaT, algos, iNaT, lib from pandas._libs.tslibs.period import ( DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period) from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds @@ -155,6 +155,7 @@ class TimelikeOps(object): times .. versionadded:: 0.24.0 + nonexistent : 'shift', 'NaT', default 'raise' A nonexistent time does not exist in a particular timezone where clocks moved forward due to DST. @@ -246,7 +247,7 @@ def _round(self, freq, mode, ambiguous, nonexistent): if 'tz' in attribs: attribs['tz'] = None return self._ensure_localized( - self._shallow_copy(result, **attribs), ambiguous, nonexistent + self._simple_new(result, **attribs), ambiguous, nonexistent ) @Appender((_round_doc + _round_example).format(op="round")) @@ -310,6 +311,8 @@ def shape(self): @property def size(self): + # type: () -> int + """The number of elements in this array.""" return np.prod(self.shape) def __len__(self): @@ -554,6 +557,21 @@ def _validate_frequency(cls, index, freq, **kwargs): 'does not conform to passed frequency {passed}' .format(infer=inferred, passed=freq.freqstr)) + # monotonicity/uniqueness properties are called via frequencies.infer_freq, + # see GH#23789 + + @property + def _is_monotonic_increasing(self): + return algos.is_monotonic(self.asi8, timelike=True)[0] + + @property + def _is_monotonic_decreasing(self): + return algos.is_monotonic(self.asi8, timelike=True)[1] + + @property + def _is_unique(self): + return len(unique1d(self.asi8)) == len(self) + # ------------------------------------------------------------------ # Arithmetic Methods @@ -661,9 +679,7 @@ def _add_nat(self): # and datetime dtypes result = np.zeros(len(self), dtype=np.int64) result.fill(iNaT) - if is_timedelta64_dtype(self): - return type(self)(result, freq=None) - return type(self)(result, tz=self.tz, freq=None) + return type(self)(result, dtype=self.dtype, freq=None) def _sub_nat(self): """ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4849ee1e3e665..ff19cf101d275 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -165,10 +165,23 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin, _data """ _typ = "datetimearray" + + # define my properties & methods for delegation _bool_ops = ['is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 'is_leap_year'] _object_ops = ['weekday_name', 'freq', 'tz'] + _field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', + 'weekofyear', 'week', 'weekday', 'dayofweek', + 'dayofyear', 'quarter', 'days_in_month', + 'daysinmonth', 'microsecond', + 'nanosecond'] + _other_ops = ['date', 'time', 'timetz'] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops + _datetimelike_methods = ['to_period', 'tz_localize', + 'tz_convert', + 'normalize', 'strftime', 'round', 'floor', + 'ceil', 'month_name', 'day_name'] # dummy attribute so that datetime.__eq__(DatetimeArray) defers # by returning NotImplemented @@ -527,7 +540,7 @@ def _add_offset(self, offset): "or DatetimeIndex", PerformanceWarning) result = self.astype('O') + offset - return type(self)(result, freq='infer') + return type(self)._from_sequence(result, freq='infer') def _sub_datetimelike_scalar(self, other): # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] @@ -562,8 +575,8 @@ def _add_delta(self, delta): ------- result : DatetimeArray """ - new_values = dtl.DatetimeLikeArrayMixin._add_delta(self, delta) - return type(self)(new_values, tz=self.tz, freq='infer') + new_values = super(DatetimeArrayMixin, self)._add_delta(delta) + return type(self)._from_sequence(new_values, tz=self.tz, freq='infer') # ----------------------------------------------------------------- # Timezone Conversion and Localization Methods @@ -866,14 +879,15 @@ def normalize(self): dtype='datetime64[ns, Asia/Calcutta]', freq=None) """ if self.tz is None or timezones.is_utc(self.tz): - not_null = self.notna() + not_null = ~self.isna() DAY_NS = ccalendar.DAY_SECONDS * 1000000000 new_values = self.asi8.copy() adjustment = (new_values[not_null] % DAY_NS) new_values[not_null] = new_values[not_null] - adjustment else: new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) - return type(self)(new_values, freq='infer').tz_localize(self.tz) + return type(self)._from_sequence(new_values, + freq='infer').tz_localize(self.tz) def to_period(self, freq=None): """ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d9dde1c699761..f4bdd44b9ec39 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -336,7 +336,7 @@ def to_timestamp(self, freq=None, how='start'): new_data = self.asfreq(freq, how=how) new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) - return DatetimeArrayMixin(new_data, freq='infer') + return DatetimeArrayMixin._from_sequence(new_data, freq='infer') # -------------------------------------------------------------------- # Array-like / EA-Interface Methods diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9b7e1986e4831..a5d074df338ee 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -6,7 +6,7 @@ import numpy as np -from pandas._libs import algos, lib, tslibs +from pandas._libs import lib, tslibs from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( @@ -15,15 +15,16 @@ from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( - _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, + _NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar, is_string_dtype, is_timedelta64_dtype) +from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex) from pandas.core.dtypes.missing import isna from pandas.core import ops -from pandas.core.algorithms import checked_add_with_arr, unique1d +from pandas.core.algorithms import checked_add_with_arr import pandas.core.common as com from pandas.tseries.frequencies import to_offset @@ -90,7 +91,7 @@ def wrapper(self, other): else: try: - other = type(self)(other)._data + other = type(self)._from_sequence(other)._data except (ValueError, TypeError): return ops.invalid_comparison(self, other, op) @@ -112,6 +113,14 @@ def wrapper(self, other): class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps): _typ = "timedeltaarray" __array_priority__ = 1000 + # define my properties & methods for delegation + _other_ops = [] + _bool_ops = [] + _object_ops = ['freq'] + _field_ops = ['days', 'seconds', 'microseconds', 'nanoseconds'] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _datetimelike_methods = ["to_pytimedelta", "total_seconds", + "round", "floor", "ceil"] # Needed so that NaT.__richcmp__(DateTimeArray) operates pointwise ndim = 1 @@ -222,21 +231,6 @@ def _validate_fill_value(self, fill_value): "Got '{got}'.".format(got=fill_value)) return fill_value - # monotonicity/uniqueness properties are called via frequencies.infer_freq, - # see GH#23789 - - @property - def _is_monotonic_increasing(self): - return algos.is_monotonic(self.asi8, timelike=True)[0] - - @property - def _is_monotonic_decreasing(self): - return algos.is_monotonic(self.asi8, timelike=True)[1] - - @property - def _is_unique(self): - return len(unique1d(self.asi8)) == len(self) - # ---------------------------------------------------------------- # Arithmetic Methods @@ -262,8 +256,8 @@ def _add_delta(self, delta): ------- result : TimedeltaArray """ - new_values = dtl.DatetimeLikeArrayMixin._add_delta(self, delta) - return type(self)(new_values, freq='infer') + new_values = super(TimedeltaArrayMixin, self)._add_delta(delta) + return type(self)._from_sequence(new_values, freq='infer') def _add_datetime_arraylike(self, other): """ @@ -293,7 +287,8 @@ def _add_datetimelike_scalar(self, other): result = checked_add_with_arr(i8, other.value, arr_mask=self._isnan) result = self._maybe_mask_results(result) - return DatetimeArrayMixin(result, tz=other.tz, freq=self.freq) + dtype = DatetimeTZDtype(tz=other.tz) if other.tz else _NS_DTYPE + return DatetimeArrayMixin(result, dtype=dtype, freq=self.freq) def _addsub_offset_array(self, other, op): # Add or subtract Array-like of DateOffset objects diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ee5f0820a7b3e..1c966ab58e8c4 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -191,32 +191,21 @@ def _join_i8_wrapper(joinf, **kwargs): _tz = None _freq = None _comparables = ['name', 'freqstr', 'tz'] - _attributes = ['name', 'freq', 'tz'] + _attributes = ['name', 'tz', 'freq'] # dummy attribute so that datetime.__eq__(DatetimeArray) defers # by returning NotImplemented timetuple = None - # define my properties & methods for delegation - _bool_ops = ['is_month_start', 'is_month_end', - 'is_quarter_start', 'is_quarter_end', 'is_year_start', - 'is_year_end', 'is_leap_year'] - _object_ops = ['weekday_name', 'freq', 'tz'] - _field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'week', 'weekday', 'dayofweek', - 'dayofyear', 'quarter', 'days_in_month', - 'daysinmonth', 'microsecond', - 'nanosecond'] - _other_ops = ['date', 'time', 'timetz'] - _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops - _datetimelike_methods = ['to_period', 'tz_localize', - 'tz_convert', - 'normalize', 'strftime', 'round', 'floor', - 'ceil', 'month_name', 'day_name'] - _is_numeric_dtype = False _infer_as_myclass = True + # some things like freq inference make use of these attributes. + _bool_ops = DatetimeArray._bool_ops + _object_ops = DatetimeArray._object_ops + _field_ops = DatetimeArray._field_ops + _datetimelike_ops = DatetimeArray._datetimelike_ops + # -------------------------------------------------------------------- # Constructors diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index acd89e9ec8492..9dc2692f276e3 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -873,6 +873,8 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', if is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter + elif is_datetime64tz_dtype(values): + fmt_klass = Datetime64TZFormatter elif is_timedelta64_dtype(values.dtype): fmt_klass = Timedelta64Formatter elif is_extension_array_dtype(values.dtype): @@ -881,8 +883,6 @@ def format_array(values, formatter, float_format=None, na_rep='NaN', fmt_klass = FloatArrayFormatter elif is_integer_dtype(values.dtype): fmt_klass = IntArrayFormatter - elif is_datetime64tz_dtype(values): - fmt_klass = Datetime64TZFormatter else: fmt_klass = GenericArrayFormatter diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 4b8ead71ed74c..a1916979ab536 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -299,3 +299,14 @@ def test_to_period_nofreq(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) assert idx.freqstr is None tm.assert_index_equal(idx.to_period(), expected) + + @pytest.mark.parametrize('tz', [None, 'US/Central']) + def test_astype_array_fallback(self, tz): + obj = pd.date_range("2000", periods=2, tz=tz) + result = obj.astype(bool) + expected = pd.Index(np.array([True, True])) + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index 3c384eed0a848..68c338c6cb688 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -97,3 +97,24 @@ def test_astype_object2(self): for i in [0, 1, 3]: assert result_list[i] == expected_list[i] assert result_list[2] is pd.NaT + + def test_astype_category(self): + obj = pd.period_range("2000", periods=2) + result = obj.astype('category') + expected = pd.CategoricalIndex([pd.Period('2000-01-01', freq="D"), + pd.Period('2000-01-02', freq="D")]) + tm.assert_index_equal(result, expected) + + result = obj._data.astype('category') + expected = expected.values + tm.assert_categorical_equal(result, expected) + + def test_astype_array_fallback(self): + obj = pd.period_range("2000", periods=2) + result = obj.astype(bool) + expected = pd.Index(np.array([True, True])) + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py index 1a0481b730618..6afbe9cff42c2 100644 --- a/pandas/tests/indexes/timedeltas/test_astype.py +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -4,6 +4,7 @@ import pytest import pandas.util.testing as tm +import pandas as pd from pandas import ( Float64Index, Index, Int64Index, NaT, Timedelta, TimedeltaIndex, timedelta_range @@ -77,3 +78,13 @@ def test_astype_raises(self, dtype): msg = 'Cannot cast TimedeltaIndex to dtype' with pytest.raises(TypeError, match=msg): idx.astype(dtype) + + def test_astype_array_fallback(self): + obj = pd.timedelta_range("1H", periods=2) + result = obj.astype(bool) + expected = pd.Index(np.array([True, True])) + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index ed954c76294b6..3fdf303ea2e8e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1026,7 +1026,8 @@ def test_tz_range_is_utc(self): dti = pd.DatetimeIndex(tz_range) assert dumps(dti, iso_dates=True) == exp df = DataFrame({'DT': dti}) - assert dumps(df, iso_dates=True) == dfexp + result = dumps(df, iso_dates=True) + assert result == dfexp tz_range = pd.date_range('2013-01-01 00:00:00', periods=2, tz='US/Eastern') diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 5c3cf5450986a..0d617d5a26706 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -555,3 +555,13 @@ def test_setitem_with_string_index(self): x['Date'] = date.today() assert x.Date == date.today() assert x['Date'] == date.today() + + def test_setitem_with_different_tz(self): + # GH#24024 + ser = pd.Series(pd.date_range('2000', periods=2, tz="US/Central")) + ser[0] = pd.Timestamp("2000", tz='US/Eastern') + expected = pd.Series([ + pd.Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"), + pd.Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"), + ], dtype=object) + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index b9cf845ea47d7..ce464184cd8d6 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1023,3 +1023,13 @@ def test_get_level_values_box(self): index = MultiIndex(levels=levels, codes=codes) assert isinstance(index.get_level_values(0)[0], Timestamp) + + def test_view_tz(self): + # GH#24024 + ser = pd.Series(pd.date_range('2000', periods=4, tz='US/Central')) + result = ser.view("i8") + expected = pd.Series([946706400000000000, + 946792800000000000, + 946879200000000000, + 946965600000000000]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 9f0def034f976..f5d33c3e09a97 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1237,21 +1237,7 @@ def test_values_consistent(array, expected_type, dtype): assert type(l_values) is expected_type assert type(l_values) is type(r_values) - if isinstance(l_values, np.ndarray): - tm.assert_numpy_array_equal(l_values, r_values) - elif isinstance(l_values, pd.Index): - tm.assert_index_equal(l_values, r_values) - elif pd.api.types.is_categorical(l_values): - tm.assert_categorical_equal(l_values, r_values) - elif pd.api.types.is_period_dtype(l_values): - tm.assert_period_array_equal(l_values, r_values) - elif pd.api.types.is_interval_dtype(l_values): - tm.assert_interval_array_equal(l_values, r_values) - else: - raise TypeError("Unexpected type {}".format(type(l_values))) - - assert l_values.dtype == dtype - assert r_values.dtype == dtype + tm.assert_equal(l_values, r_values) @pytest.mark.parametrize('array, expected', [ diff --git a/pandas/util/testing.py b/pandas/util/testing.py index faed4ccebd96b..56a3cda1ba89f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1661,9 +1661,9 @@ def to_array(obj): if is_period_dtype(obj): return period_array(obj) elif is_datetime64_dtype(obj) or is_datetime64tz_dtype(obj): - return DatetimeArray(obj) + return DatetimeArray._from_sequence(obj) elif is_timedelta64_dtype(obj): - return TimedeltaArray(obj) + return TimedeltaArray._from_sequence(obj) else: return np.array(obj) diff --git a/setup.cfg b/setup.cfg index 7f92882317927..a675ccc0d8fdc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -131,7 +131,7 @@ known_post_core=pandas.tseries,pandas.io,pandas.plotting sections=FUTURE,STDLIB,THIRDPARTY,PRE_CORE,DTYPES,FIRSTPARTY,POST_CORE,LOCALFOLDER known_first_party=pandas -known_third_party=Cython,numpy,python-dateutil,pytz,pyarrow,pytest +known_third_party=Cython,numpy,dateutil,python-dateutil,pytz,pyarrow,pytest multi_line_output=4 force_grid_wrap=0 combine_as_imports=True