Skip to content

Commit b485e5a

Browse files
committed
Large cleanup
1 parent ac734b3 commit b485e5a

File tree

15 files changed

+50
-63
lines changed

15 files changed

+50
-63
lines changed

doc/source/whatsnew/v0.24.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,7 @@ Previous Behavior:
678678
.. code-block:: ipython
679679
680680
In [2]: ser = pd.Series([pd.Timestamp('2000', tz='UTC'),
681-
pd.Timestamp('2000', tz='UTC')])
681+
...: pd.Timestamp('2000', tz='UTC')])
682682
In [3]: ser.unique()
683683
Out[3]: array([Timestamp('2000-01-01 00:00:00+0000', tz='UTC')], dtype=object)
684684
@@ -1329,6 +1329,7 @@ Datetimelike
13291329
- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`)
13301330
- Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`)
13311331
- Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`)
1332+
- Bug in :class:`DataFrame` when creating a new column from an ndarray of :class:`Timestamp` objects with timezones converting creating an object-dtype column, rather than datetime with timezone (:issue:`23932`)
13321333
- Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`)
13331334
- Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`)
13341335
- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`)

pandas/core/arrays/datetimelike.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,10 @@ def asi8(self):
394394
# do not cache or you'll create a memory leak
395395
return self._data.view('i8')
396396

397+
@property
398+
def _ndarray_values(self):
399+
return self._data
400+
397401
# ------------------------------------------------------------------
398402
# Formatting
399403

@@ -514,9 +518,6 @@ def __setitem__(
514518
self._check_compatible_with(value)
515519
value = self._unbox_scalar(value)
516520
elif isna(value) or value == iNaT:
517-
# TODO: Right now DatetimeTZBlock.fill_value is iNaT.
518-
# There's some confuction about whether Block.fill_value should
519-
# be the NA value or the storage value.
520521
value = iNaT
521522
else:
522523
msg = (
@@ -589,7 +590,6 @@ def astype(self, dtype, copy=True):
589590
# 3. DatetimeArray.astype handles datetime -> period
590591
from pandas import Categorical
591592
dtype = pandas_dtype(dtype)
592-
# TODO: handle PeriodDtype, perhaps other EAs.
593593

594594
if is_object_dtype(dtype):
595595
return self._box_values(self.asi8)
@@ -739,7 +739,7 @@ def repeat(self, repeats, *args, **kwargs):
739739
return type(self)(values, dtype=self.dtype)
740740

741741
def map(self, mapper):
742-
# TODO: remove this hack
742+
# TODO(GH-23179): Add ExtensionArray.map
743743
# Need to figure out if we want ExtensionArray.map first.
744744
# If so, then we can refactor IndexOpsMixin._map_values to
745745
# a standalone function and call from here..
@@ -1339,16 +1339,12 @@ def _reduce(self, name, skipna=True, **kwargs):
13391339
# Reductions
13401340

13411341
def any(self, skipna=True):
1342-
if skipna:
1343-
values = self[~self.isnan]
1344-
else:
1345-
values = self
1346-
1347-
# TODO: Should any period be considered Falsey?
1342+
values = self._values_for_reduction(skipna=skipna)
13481343
return len(values)
13491344

13501345
def all(self, skipna=True):
1351-
return not self.all(skipna=skipna)
1346+
values = self._values_for_reduction(skipna=skipna)
1347+
return len(values)
13521348

13531349
def _values_for_reduction(self, skipna=True):
13541350
if skipna:

pandas/core/arrays/datetimes.py

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -242,11 +242,11 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False):
242242
dtype = DatetimeTZDtype(tz=dtype.tz)
243243
elif dtz and values.tz:
244244
if not timezones.tz_compare(dtz, values.tz):
245-
# todo standard error message.
246245
msg = (
247-
"Timezones do not match. {} != {}."
246+
"Timezone of the array and 'dtype' do not match. "
247+
"'{}' != '{}'"
248248
)
249-
raise ValueError(msg.format(dtz, values.tz))
249+
raise TypeError(msg.format(dtz, values.tz))
250250
elif values.tz:
251251
dtype = values.dtype
252252
# freq = validate_values_freq(values, freq)
@@ -273,7 +273,13 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False):
273273
)
274274
raise ValueError(msg.format(values.dtype))
275275

276-
dtype = pandas_dtype(dtype) # TODO: profile
276+
# Performance note:
277+
# for a length 10,000 ndarray[datetime64[ns]], pandas_dtype() takes
278+
# ~15% of the runtime of __init__. It's only useful for converting
279+
# string aliases like 'M8[ns]' or 'datetime64[ns, tz]'.
280+
# We should consider requiring an actual dtype.
281+
282+
dtype = pandas_dtype(dtype)
277283
if (isinstance(dtype, np.dtype) and dtype != _NS_DTYPE
278284
or not isinstance(dtype, (np.dtype, DatetimeTZDtype))):
279285
msg = (
@@ -436,7 +442,6 @@ def _scalar_from_string(self, value):
436442
return Timestamp(value, tz=self.tz)
437443

438444
def _check_compatible_with(self, other):
439-
# TODO: verify this.
440445
if not timezones.tz_compare(self.tz, other.tz):
441446
raise IncompatibleTimeZoneError(
442447
"Timezone's don't match. '{} != {}'".format(self.tz, other.tz)
@@ -499,9 +504,9 @@ def _resolution(self):
499504
# Array-Like / EA-Interface Methods
500505

501506
def __array__(self, dtype=None):
502-
# TODO: Check PeriodArray.__array__ and push to parent
503-
# This may need to wait for the deprecation of np.array
504-
# on datetimetz data.
507+
# TODO(datetime-tz __array__): push to parent
508+
# If deprecating behavior for datetime-tz, we'll need to handle that
509+
# specially.
505510
if is_object_dtype(dtype):
506511
return np.array(list(self), dtype=object)
507512
elif is_int64_dtype(dtype):
@@ -535,16 +540,8 @@ def __iter__(self):
535540
# ----------------------------------------------------------------
536541
# ExtensionArray Interface
537542

538-
@property
539-
def _ndarray_values(self):
540-
# TODO: Move to parent
541-
return self._data
542-
543543
@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
544544
def _validate_fill_value(self, fill_value):
545-
# TODO: Right now DatetimeTZBlock.fill_value is iNaT.
546-
# There's some confuction about whether Block.fill_value should
547-
# be the NA value or the storage value.
548545
if isna(fill_value) or fill_value == iNaT:
549546
fill_value = iNaT
550547
elif isinstance(fill_value, (datetime, np.datetime64)):
@@ -1094,7 +1091,6 @@ def astype(self, dtype, copy=True):
10941091
result = result._data
10951092
return result
10961093
elif is_datetime64tz_dtype(self.dtype) and self.dtype == dtype:
1097-
# TODO: add specific tests for each of these cases to arrays.
10981094
if copy:
10991095
return self.copy()
11001096
return self
@@ -1634,13 +1630,13 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
16341630
data = data._data
16351631

16361632
if isinstance(data, DatetimeArrayMixin):
1637-
# TODO: verify this changes. This was done in haste.
1638-
if inferred_freq and data.freq:
1639-
assert inferred_freq == data.freq
1640-
16411633
if tz and data.tz:
16421634
if not timezones.tz_compare(tz, data.tz):
1643-
raise TypeError("TODO")
1635+
msg = (
1636+
"Timezone of the array and 'dtype' do not match. "
1637+
"'{}' != '{}'"
1638+
)
1639+
raise TypeError(msg.format(tz, data.tz))
16441640
tz = data.tz
16451641
tz = validate_tz_from_dtype(dtype, tz)
16461642

pandas/core/arrays/period.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -258,11 +258,6 @@ def _check_compatible_with(self, other):
258258
def dtype(self):
259259
return self._dtype
260260

261-
@property
262-
def _ndarray_values(self):
263-
# Ordinals
264-
return self._data
265-
266261
@property
267262
def freq(self):
268263
"""

pandas/core/arrays/timedeltas.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,8 @@ def _formatter(self, boxed=False):
285285
return _get_format_timedelta64(self, box=True)
286286

287287
def __array__(self, dtype=None):
288-
# https://github.com/pandas-dev/pandas/pull/23593
289-
# TODO: Check PeriodArray.__array__ and push to parent
288+
# TODO(https://github.com/pandas-dev/pandas/pull/23593)
289+
# Maybe push to parent once datetimetz __array__ is figured out.
290290
if is_object_dtype(dtype):
291291
return np.array(list(self), dtype=object)
292292
elif is_int64_dtype(dtype):
@@ -768,7 +768,6 @@ def astype(self, dtype, copy=True):
768768
return values
769769
return result.astype('i8')
770770
elif is_timedelta64_ns_dtype(dtype):
771-
# TODO: Figure out why this was needed.
772771
if copy:
773772
return self.copy()
774773
return self

pandas/core/frame.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4200,7 +4200,8 @@ def _maybe_casted_values(index, labels=None):
42004200
else:
42014201
values = values.take(labels)
42024202

4203-
# TODO: Push this into maybe_upcast_putmask?
4203+
# TODO(https://github.com/pandas-dev/pandas/issues/24206)
4204+
# Push this into maybe_upcast_putmask?
42044205
# We can't pass EAs there right now. Looks a bit
42054206
# complicated.
42064207
# So we unbox the ndarray_values, op, re-box.

pandas/core/indexes/datetimelike.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,6 @@ def _maybe_box_as_values(self, values, **attribs):
574574
return values
575575

576576
def _deepcopy_if_needed(self, orig, copy=False):
577-
# TODO: is this the right class?
578577
# Override Index._deepcopy_if_needed, since _data is not an ndarray.
579578
# what is orig here? ndarray or DatetimeArray, DatetimeIndex?
580579
if copy:
@@ -801,7 +800,6 @@ class DatelikeIndexMixin(object):
801800

802801
@property
803802
def freq(self):
804-
# TODO(DatetimeArray): remove
805803
# Can't simply use delegate_names since our base class is defining
806804
# freq
807805
return self._data.freq

pandas/core/indexes/datetimes.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,6 @@ def _join_i8_wrapper(joinf, **kwargs):
224224
_tz = None
225225
_freq = None
226226
_comparables = ['name', 'freqstr', 'tz']
227-
# TODO: decide whether freq is an attribute.
228-
# Keeping it in attributes breaks things like Index.__getitem__
229227
_attributes = ['name', 'tz', 'freq']
230228

231229
# dummy attribute so that datetime.__eq__(DatetimeArray) defers

pandas/core/indexes/timedeltas.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@ def _join_i8_wrapper(joinf, **kwargs):
160160

161161
_freq = None
162162

163-
# TODO: Deduplicate with DatetimeIndex by doing these as props on base
164163
_box_func = TimedeltaArray._box_func
165164
_bool_ops = TimedeltaArray._bool_ops
166165
_object_ops = TimedeltaArray._object_ops
@@ -220,7 +219,6 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE):
220219
# `dtype` is passed by _shallow_copy in corner cases, should always
221220
# be timedelta64[ns] if present
222221
if not isinstance(values, TimedeltaArray):
223-
# TODO: make TimedeltaArray._simple_new idempotent?
224222
values = TimedeltaArray._simple_new(values, dtype=dtype,
225223
freq=freq)
226224
assert isinstance(values, TimedeltaArray), type(values)

pandas/core/reshape/merge.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1590,7 +1590,6 @@ def _right_outer_join(x, y, max_groups):
15901590

15911591
def _factorize_keys(lk, rk, sort=True):
15921592
if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
1593-
# TODO: verify if we get just arrays here, or maybe series / index
15941593
lk = lk._data
15951594
rk = rk._data
15961595

pandas/tests/arithmetic/test_datetime64.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1471,7 +1471,6 @@ def check(get_ser, test_ser):
14711471
# with 'operate' (from core/ops.py) for the ops that are not
14721472
# defined
14731473
op = getattr(get_ser, op_str, None)
1474-
# TODO: error message changed. Do we care?
14751474
# Previously, _validate_for_numeric_binop in core/indexes/base.py
14761475
# did this for us.
14771476
with pytest.raises(TypeError,

pandas/tests/arrays/test_datetimes.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import pandas as pd
1515
from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray
16+
from pandas.core.arrays.datetimes import sequence_to_dt64ns
1617
import pandas.util.testing as tm
1718

1819

@@ -21,7 +22,7 @@ def test_mismatched_timezone_raises(self):
2122
a = DatetimeArray(np.array(['2000-01-01T06:00:00'], dtype='M8[ns]'),
2223
dtype=DatetimeTZDtype(tz='US/Central'))
2324
dtype = DatetimeTZDtype(tz='US/Eastern')
24-
with pytest.raises(ValueError, match='Timezones'):
25+
with pytest.raises(TypeError, match='do not match'):
2526
DatetimeArray(a, dtype=dtype)
2627

2728
def test_non_array_raises(self):
@@ -105,3 +106,17 @@ def test_tz_setter_raises(self):
105106
arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
106107
with pytest.raises(AttributeError, match='tz_localize'):
107108
arr.tz = 'UTC'
109+
110+
111+
class TestSequenceToDT64NS(object):
112+
113+
def test_tz_dtype_mismatch_raises(self):
114+
arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
115+
with pytest.raises(TypeError, match='do not match'):
116+
sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC"))
117+
118+
def test_tz_dtype_matches(self):
119+
arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
120+
result, _, _ = sequence_to_dt64ns(
121+
arr, dtype=DatetimeTZDtype(tz="US/Central"))
122+
tm.assert_extension_array_equal(arr, result)

pandas/tests/extension/test_datetime.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from pandas.tests.extension import base
99

1010

11-
# TODO: figure out a way to test non-TZ
1211
@pytest.fixture(params=["US/Central"])
1312
def dtype(request):
1413
return DatetimeTZDtype(unit="ns", tz=request.param)
@@ -117,8 +116,6 @@ class TestArithmeticOps(BaseDatetimeTests, base.BaseArithmeticOpsTests):
117116
implements = {'__sub__', '__rsub__'}
118117

119118
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
120-
# TODO: move this to the base class?
121-
# It's duplicated between Period and Datetime now
122119
if all_arithmetic_operators in self.implements:
123120
s = pd.Series(data)
124121
self.check_opname(s, all_arithmetic_operators, s.iloc[0],

pandas/tests/test_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,11 +1244,11 @@ def test_iter_box(self):
12441244
# Series[M8[ns]] and Series[m8[ns]] to return a DateLikeArray.
12451245
pytest.param(
12461246
pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]',
1247-
marks=[pytest.mark.xfail(reason="TODO", strict=True)]
1247+
marks=[pytest.mark.xfail(reason="datetime _values", strict=True)]
12481248
),
12491249
pytest.param(
12501250
pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]',
1251-
marks=[pytest.mark.xfail(reason="TODO", strict=True)]
1251+
marks=[pytest.mark.xfail(reason="timedelta _values", strict=True)]
12521252
),
12531253
12541254
])

pandas/tests/test_panel.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -470,11 +470,6 @@ def test_delitem_and_pop(self):
470470
def test_setitem(self):
471471
lp = self.panel.filter(['ItemA', 'ItemB']).to_frame()
472472

473-
# On master we go all the way down to
474-
# MultiIndex.from_tuples(DatetimeIndex), which raise a
475-
# ValueError: cannot include dtype 'M' in a buffer
476-
# Now we (correctly) raise a TypeError.
477-
# TODO: Add release note for this.
478473
with pytest.raises(TypeError):
479474
self.panel['ItemE'] = lp
480475

0 commit comments

Comments
 (0)