Skip to content

Commit 959cd72

Browse files
committed
Squashed commit of the following:
commit 23e5cfc Author: Tom Augspurger <[email protected]> Date: Tue Oct 2 13:10:41 2018 -0500 Use ._tshift internally for datetimelike ops In preperation for PeriodArray / DatetimeArray / TimedeltaArray. Index.shift has a different meaning from ExtensionArray.shift. - Index.shift pointwise shifts each element by some amount - ExtensionArray.shift shits the *position* of each value in the array padding the end with NA This is going to get confusing. This PR tries to avoid some of that by internally using a new `_tshift` method (time-shift) when we want to do pointwise shifting of each value. Places that know they want that behavior (like in the datetimelike ops) should use that. commit 1d9f76c Author: Joris Van den Bossche <[email protected]> Date: Tue Oct 2 17:11:11 2018 +0200 CLN: remove Index._to_embed (pandas-dev#22879) * CLN: remove Index._to_embed * pep8 commit 6247da0 Author: Tom Augspurger <[email protected]> Date: Tue Oct 2 08:50:41 2018 -0500 Provide default implementation for `data_repated` (pandas-dev#22935) commit 5ce06b5 Author: Matthew Roeschke <[email protected]> Date: Mon Oct 1 14:22:20 2018 -0700 BUG: to_datetime preserves name of Index argument in the result (pandas-dev#22918) * BUG: to_datetime preserves name of Index argument in the result * correct test
1 parent 9d17fd2 commit 959cd72

File tree

13 files changed

+81
-88
lines changed

13 files changed

+81
-88
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,7 @@ Datetimelike
655655
- Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`)
656656
- Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`)
657657
- Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`)
658+
- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`)
658659

659660
Timedelta
660661
^^^^^^^^^

pandas/core/arrays/datetimelike.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ def _sub_period_array(self, other):
455455
def _addsub_int_array(self, other, op):
456456
"""
457457
Add or subtract array-like of integers equivalent to applying
458-
`shift` pointwise.
458+
`_tshift` pointwise.
459459
460460
Parameters
461461
----------
@@ -555,6 +555,20 @@ def shift(self, periods, freq=None):
555555
return self._tshift(periods, freq=freq)
556556

557557
def _tshift(self, periods, freq=None):
558+
"""
559+
Shift each value by `periods`.
560+
561+
Note this is different from ExtensionArray.shift, which
562+
shifts the *position* of each element, padding the end with
563+
missing values.
564+
565+
Parameters
566+
----------
567+
periods : int
568+
Number of periods to shift by.
569+
freq : pandas.DateOffset, pandas.Timedelta, or string
570+
Frequency increment to shift by.
571+
"""
558572
if freq is not None and freq != self.freq:
559573
if isinstance(freq, compat.string_types):
560574
freq = frequencies.to_offset(freq)

pandas/core/arrays/period.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -572,22 +572,26 @@ def shift(self, periods=1):
572572
-------
573573
shifted : Period Array/Index
574574
"""
575-
# We have two kinds of shift.
576-
# 1. ExtensionArray.shift: move positions of each value,
577-
# fill NA on the end
578-
# 2. Datelike.tshift: move each value through time
579-
# Each Datelike array will implement both. It's up to the
580-
# caller to call the correct one.
581-
return self._ea_shift(periods=periods)
582-
583-
def _ea_shift(self, periods=1):
584-
# TODO: remove from DatetimeLikeArrayMixin
575+
# TODO(DatetimeArray): remove from DatetimeLikeArrayMixin
585576
# The semantics for Index.shift differ from EA.shift
586577
# then just call super.
587578
return ExtensionArray.shift(self, periods)
588579

589580
def _tshift(self, n, freq=None):
590-
# TODO: docs
581+
"""
582+
Shift each value by `periods`.
583+
584+
Note this is different from ExtensionArray.shift, which
585+
shifts the *position* of each element, padding the end with
586+
missing values.
587+
588+
Parameters
589+
----------
590+
periods : int
591+
Number of periods to shift by.
592+
freq : pandas.DateOffset, pandas.Timedelta, or string
593+
Frequency increment to shift by.
594+
"""
591595
values = self.values + n * self.freq.n
592596
if self.hasnans:
593597
values[self._isnan] = iNaT

pandas/core/indexes/base.py

+1-13
Original file line numberDiff line numberDiff line change
@@ -1124,7 +1124,7 @@ def to_series(self, index=None, name=None):
11241124
if name is None:
11251125
name = self.name
11261126

1127-
return Series(self._to_embed(), index=index, name=name)
1127+
return Series(self.values.copy(), index=index, name=name)
11281128

11291129
def to_frame(self, index=True, name=None):
11301130
"""
@@ -1187,18 +1187,6 @@ def to_frame(self, index=True, name=None):
11871187
result.index = self
11881188
return result
11891189

1190-
def _to_embed(self, keep_tz=False, dtype=None):
1191-
"""
1192-
*this is an internal non-public method*
1193-
1194-
return an array repr of this object, potentially casting to object
1195-
1196-
"""
1197-
if dtype is not None:
1198-
return self.astype(dtype)._to_embed(keep_tz=keep_tz)
1199-
1200-
return self.values.copy()
1201-
12021190
_index_shared_docs['astype'] = """
12031191
Create an Index with values cast to dtypes. The class of a new Index
12041192
is determined by dtype. When conversion is impossible, a ValueError

pandas/core/indexes/datetimes.py

+4-14
Original file line numberDiff line numberDiff line change
@@ -665,23 +665,13 @@ def to_series(self, keep_tz=False, index=None, name=None):
665665
if name is None:
666666
name = self.name
667667

668-
return Series(self._to_embed(keep_tz), index=index, name=name)
669-
670-
def _to_embed(self, keep_tz=False, dtype=None):
671-
"""
672-
return an array repr of this object, potentially casting to object
673-
674-
This is for internal compat
675-
"""
676-
if dtype is not None:
677-
return self.astype(dtype)._to_embed(keep_tz=keep_tz)
678-
679668
if keep_tz and self.tz is not None:
680-
681669
# preserve the tz & copy
682-
return self.copy(deep=True)
670+
values = self.copy(deep=True)
671+
else:
672+
values = self.values.copy()
683673

684-
return self.values.copy()
674+
return Series(values, index=index, name=name)
685675

686676
def to_period(self, freq=None):
687677
"""

pandas/core/indexes/period.py

-10
Original file line numberDiff line numberDiff line change
@@ -412,16 +412,6 @@ def __array_wrap__(self, result, context=None):
412412
# cannot pass _simple_new as it is
413413
return self._shallow_copy(result, freq=self.freq, name=self.name)
414414

415-
def _to_embed(self, keep_tz=False, dtype=None):
416-
"""
417-
return an array repr of this object, potentially casting to object
418-
"""
419-
420-
if dtype is not None:
421-
return self.astype(dtype)._to_embed(keep_tz=keep_tz)
422-
423-
return self.astype(object).values
424-
425415
@property
426416
def _formatter_func(self):
427417
return lambda x: "'%s'" % x

pandas/core/tools/datetimes.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,13 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
9999
result = Series(arg).map(cache_array)
100100
if box:
101101
if errors == 'ignore':
102-
return Index(result)
102+
return Index(result, name=name)
103103
else:
104104
return DatetimeIndex(result, name=name)
105105
return result.values
106106

107107

108-
def _return_parsed_timezone_results(result, timezones, box, tz):
108+
def _return_parsed_timezone_results(result, timezones, box, tz, name):
109109
"""
110110
Return results from array_strptime if a %z or %Z directive was passed.
111111
@@ -119,6 +119,9 @@ def _return_parsed_timezone_results(result, timezones, box, tz):
119119
True boxes result as an Index-like, False returns an ndarray
120120
tz : object
121121
None or pytz timezone object
122+
name : string, default None
123+
Name for a DatetimeIndex
124+
122125
Returns
123126
-------
124127
tz_result : ndarray of parsed dates with timezone
@@ -136,7 +139,7 @@ def _return_parsed_timezone_results(result, timezones, box, tz):
136139
in zip(result, timezones)])
137140
if box:
138141
from pandas import Index
139-
return Index(tz_results)
142+
return Index(tz_results, name=name)
140143
return tz_results
141144

142145

@@ -209,7 +212,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
209212
if box:
210213
if errors == 'ignore':
211214
from pandas import Index
212-
return Index(result)
215+
return Index(result, name=name)
213216

214217
return DatetimeIndex(result, tz=tz, name=name)
215218
return result
@@ -252,7 +255,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
252255
arg, format, exact=exact, errors=errors)
253256
if '%Z' in format or '%z' in format:
254257
return _return_parsed_timezone_results(
255-
result, timezones, box, tz)
258+
result, timezones, box, tz, name)
256259
except tslibs.OutOfBoundsDatetime:
257260
if errors == 'raise':
258261
raise

pandas/tests/extension/conftest.py

+20
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,31 @@ def all_data(request, data, data_missing):
3232

3333
@pytest.fixture
3434
def data_repeated(data):
35+
<<<<<<< HEAD
3536
"""Return different versions of data for count times"""
3637
def gen(count):
3738
for _ in range(count):
3839
yield data
3940
yield gen
41+
=======
42+
"""
43+
Generate many datasets.
44+
45+
Parameters
46+
----------
47+
data : fixture implementing `data`
48+
49+
Returns
50+
-------
51+
Callable[[int], Generator]:
52+
A callable that takes a `count` argument and
53+
returns a generator yielding `count` datasets.
54+
"""
55+
def gen(count):
56+
for _ in range(count):
57+
yield data
58+
return gen
59+
>>>>>>> datetimelike-tshift
4060

4161

4262
@pytest.fixture

pandas/tests/extension/decimal/test_decimal.py

-8
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,6 @@ def data_missing():
3030
return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
3131

3232

33-
@pytest.fixture
34-
def data_repeated():
35-
def gen(count):
36-
for _ in range(count):
37-
yield DecimalArray(make_data())
38-
yield gen
39-
40-
4133
@pytest.fixture
4234
def data_for_sorting():
4335
return DecimalArray([decimal.Decimal('1'),

pandas/tests/extension/test_categorical.py

-9
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,6 @@ def data_missing():
4545
return Categorical([np.nan, 'A'])
4646

4747

48-
@pytest.fixture
49-
def data_repeated():
50-
"""Return different versions of data for count times"""
51-
def gen(count):
52-
for _ in range(count):
53-
yield Categorical(make_data())
54-
yield gen
55-
56-
5748
@pytest.fixture
5849
def data_for_sorting():
5950
return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'],

pandas/tests/extension/test_integer.py

-8
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,6 @@ def data_missing(dtype):
4747
return integer_array([np.nan, 1], dtype=dtype)
4848

4949

50-
@pytest.fixture
51-
def data_repeated(data):
52-
def gen(count):
53-
for _ in range(count):
54-
yield data
55-
yield gen
56-
57-
5850
@pytest.fixture
5951
def data_for_sorting(dtype):
6052
return integer_array([1, 2, 0], dtype=dtype)

pandas/tests/extension/test_interval.py

-9
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,6 @@ def data_missing():
4747
return IntervalArray.from_tuples([None, (0, 1)])
4848

4949

50-
@pytest.fixture
51-
def data_repeated():
52-
"""Return different versions of data for count times"""
53-
def gen(count):
54-
for _ in range(count):
55-
yield IntervalArray(make_data())
56-
yield gen
57-
58-
5950
@pytest.fixture
6051
def data_for_sorting():
6152
return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)])

pandas/tests/indexes/datetimes/test_tools.py

+17
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,15 @@ def test_to_datetime_parse_timezone_malformed(self, offset):
233233
with pytest.raises(ValueError):
234234
pd.to_datetime([date], format=fmt)
235235

236+
def test_to_datetime_parse_timezone_keeps_name(self):
237+
# GH 21697
238+
fmt = '%Y-%m-%d %H:%M:%S %z'
239+
arg = pd.Index(['2010-01-01 12:00:00 Z'], name='foo')
240+
result = pd.to_datetime(arg, format=fmt)
241+
expected = pd.DatetimeIndex(['2010-01-01 12:00:00'], tz='UTC',
242+
name='foo')
243+
tm.assert_index_equal(result, expected)
244+
236245

237246
class TestToDatetime(object):
238247
def test_to_datetime_pydatetime(self):
@@ -765,6 +774,14 @@ def test_unit_rounding(self, cache):
765774
expected = pd.Timestamp('2015-06-19 19:55:31.877000093')
766775
assert result == expected
767776

777+
@pytest.mark.parametrize('cache', [True, False])
778+
def test_unit_ignore_keeps_name(self, cache):
779+
# GH 21697
780+
expected = pd.Index([15e9] * 2, name='name')
781+
result = pd.to_datetime(expected, errors='ignore', box=True, unit='s',
782+
cache=cache)
783+
tm.assert_index_equal(result, expected)
784+
768785
@pytest.mark.parametrize('cache', [True, False])
769786
def test_dataframe(self, cache):
770787

0 commit comments

Comments
 (0)