Skip to content

CLN: assorted #48385

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 12, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/offsets.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class BaseOffset:
@property
def freqstr(self) -> str: ...
def apply_index(self, dtindex: DatetimeIndex) -> DatetimeIndex: ...
def _apply(self, other): ...
def _apply_array(self, dtarr) -> None: ...
def rollback(self, dt: datetime) -> datetime: ...
def rollforward(self, dt: datetime) -> datetime: ...
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,9 @@ cdef class BaseOffset:
res = self._apply_array(dtindex)
return type(dtindex)(res)

def _apply(self, other):
raise NotImplementedError("implemented by subclasses")

@apply_array_wraps
def _apply_array(self, dtarr):
raise NotImplementedError(
Expand Down
58 changes: 42 additions & 16 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@

from pandas.tseries.frequencies import get_period_alias
from pandas.tseries.offsets import (
BDay,
Day,
Tick,
)
Expand Down Expand Up @@ -394,7 +393,9 @@ def _generate_range(
if isinstance(freq, Tick):
i8values = generate_regular_range(start, end, periods, freq)
else:
xdr = generate_range(start=start, end=end, periods=periods, offset=freq)
xdr = _generate_range(
start=start, end=end, periods=periods, offset=freq
)
i8values = np.array([x.value for x in xdr], dtype=np.int64)

endpoint_tz = start.tz if start is not None else end.tz
Expand Down Expand Up @@ -2493,18 +2494,23 @@ def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexis
return ts


def generate_range(start=None, end=None, periods=None, offset=BDay()):
def _generate_range(
start: Timestamp | None,
end: Timestamp | None,
periods: int | None,
offset: BaseOffset,
):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confused why the default value for offset got dropped?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bc this is never called without passing offset (outside tests, which this changes)

"""
Generates a sequence of dates corresponding to the specified time
offset. Similar to dateutil.rrule except uses pandas DateOffset
objects to represent time increments.

Parameters
----------
start : datetime, (default None)
end : datetime, (default None)
periods : int, (default None)
offset : DateOffset, (default BDay())
start : Timestamp or None
end : Timestamp or None
periods : int or None
offset : DateOffset,

Notes
-----
Expand All @@ -2519,26 +2525,46 @@ def generate_range(start=None, end=None, periods=None, offset=BDay()):
"""
offset = to_offset(offset)

start = Timestamp(start)
start = start if start is not NaT else None
end = Timestamp(end)
end = end if end is not NaT else None
# Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
# expected "Union[integer[Any], float, str, date, datetime64]"
start = Timestamp(start) # type: ignore[arg-type]
# Non-overlapping identity check (left operand type: "Timestamp", right
# operand type: "NaTType")
start = start if start is not NaT else None # type: ignore[comparison-overlap]
# Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
# expected "Union[integer[Any], float, str, date, datetime64]"
end = Timestamp(end) # type: ignore[arg-type]
# Non-overlapping identity check (left operand type: "Timestamp", right
# operand type: "NaTType")
end = end if end is not NaT else None # type: ignore[comparison-overlap]

if start and not offset.is_on_offset(start):
start = offset.rollforward(start)
# Incompatible types in assignment (expression has type "datetime",
# variable has type "Optional[Timestamp]")
start = offset.rollforward(start) # type: ignore[assignment]

elif end and not offset.is_on_offset(end):
end = offset.rollback(end)
# Incompatible types in assignment (expression has type "datetime",
# variable has type "Optional[Timestamp]")
end = offset.rollback(end) # type: ignore[assignment]

if periods is None and end < start and offset.n >= 0:
# Unsupported operand types for < ("Timestamp" and "None")
if periods is None and end < start and offset.n >= 0: # type: ignore[operator]
end = None
periods = 0

if end is None:
end = start + (periods - 1) * offset
# error: No overload variant of "__radd__" of "BaseOffset" matches
# argument type "None"
end = start + (periods - 1) * offset # type: ignore[operator]

if start is None:
start = end - (periods - 1) * offset
# error: No overload variant of "__radd__" of "BaseOffset" matches
# argument type "None"
start = end - (periods - 1) * offset # type: ignore[operator]

start = cast(Timestamp, start)
end = cast(Timestamp, end)

cur = start
if offset.n >= 0:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin):
_typ = "periodarray" # ABCPeriodArray
_internal_fill_value = np.int64(iNaT)
_recognized_scalars = (Period,)
_is_recognized_dtype = is_period_dtype
_is_recognized_dtype = is_period_dtype # check_compatible_with checks freq match
_infer_matches = ("period",)

@property
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1916,7 +1916,7 @@ def _drop_labels_or_levels(self, keys, axis: int = 0):
# Perform copy upfront and then use inplace operations below.
# This ensures that we always perform exactly one copy.
# ``copy`` and/or ``inplace`` options could be added in the future.
dropped = self.copy()
dropped = self.copy(deep=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just trying to be explicit with copy instead of relying on signature defaults?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this improves perf by avoiding making an actual copy. safe since this method is private


if axis == 0:
# Handle dropping index levels
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,8 +955,6 @@ def _getitem_lowerdim(self, tup: tuple):
# is equivalent.
# (see the other place where we call _handle_lowerdim_multi_index_axis0)
with suppress(IndexingError):
# error "_LocationIndexer" has no attribute
# "_handle_lowerdim_multi_index_axis0"
return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup)

tup = self._validate_key_length(tup)
Expand Down Expand Up @@ -1013,8 +1011,6 @@ def _getitem_nested_tuple(self, tup: tuple):
# DataFrame, IndexingError is not raised when slice(None,None,None)
# with one row.
with suppress(IndexingError):
# error "_LocationIndexer" has no attribute
# "_handle_lowerdim_multi_index_axis0"
return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(
tup
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
if upcasted_na is None and self.block.dtype.kind != "V":
# No upcasting is necessary
fill_value = self.block.fill_value
values = self.block.get_values()
values = self.block.values
else:
fill_value = upcasted_na

Expand Down
34 changes: 26 additions & 8 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
offsets,
)
import pandas._testing as tm
from pandas.core.arrays.datetimes import generate_range
from pandas.core.arrays.datetimes import _generate_range as generate_range

START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)

Expand Down Expand Up @@ -840,27 +840,45 @@ def test_date_range_with_tz(self, tzstr):

class TestGenRangeGeneration:
def test_generate(self):
rng1 = list(generate_range(START, END, offset=BDay()))
rng2 = list(generate_range(START, END, offset="B"))
rng1 = list(generate_range(START, END, periods=None, offset=BDay()))
rng2 = list(generate_range(START, END, periods=None, offset="B"))
assert rng1 == rng2

def test_generate_cday(self):
rng1 = list(generate_range(START, END, offset=CDay()))
rng2 = list(generate_range(START, END, offset="C"))
rng1 = list(generate_range(START, END, periods=None, offset=CDay()))
rng2 = list(generate_range(START, END, periods=None, offset="C"))
assert rng1 == rng2

def test_1(self):
rng = list(generate_range(start=datetime(2009, 3, 25), periods=2))
rng = list(
generate_range(
start=datetime(2009, 3, 25), end=None, periods=2, offset=BDay()
)
)
expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)]
assert rng == expected

def test_2(self):
rng = list(generate_range(start=datetime(2008, 1, 1), end=datetime(2008, 1, 3)))
rng = list(
generate_range(
start=datetime(2008, 1, 1),
end=datetime(2008, 1, 3),
periods=None,
offset=BDay(),
)
)
expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)]
assert rng == expected

def test_3(self):
rng = list(generate_range(start=datetime(2008, 1, 5), end=datetime(2008, 1, 6)))
rng = list(
generate_range(
start=datetime(2008, 1, 5),
end=datetime(2008, 1, 6),
periods=None,
offset=BDay(),
)
)
expected = []
assert rng == expected

Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/plotting/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,11 @@ def test_custom_business_day_freq(self):

_check_plot_works(s.plot)

@pytest.mark.xfail(reason="GH#24426")
@pytest.mark.xfail(
reason="GH#24426, see also "
"github.com/pandas-dev/pandas/commit/"
"ef1bd69fa42bbed5d09dd17f08c44fc8bfc2b685#r61470674"
)
def test_plot_accessor_updates_on_inplace(self):
ser = Series([1, 2, 3, 4])
_, ax = self.plt.subplots()
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,17 +902,17 @@ def test_addsub_timedeltalike_non_nano(self, dt64, ts, td):
assert result._reso == ts._reso
assert result == expected

@pytest.mark.xfail(reason="tz_localize not yet implemented for non-nano")
def test_addsub_offset(self, ts_tz):
# specifically non-Tick offset
off = offsets.YearBegin(1)
off = offsets.YearEnd(1)
result = ts_tz + off

assert isinstance(result, Timestamp)
assert result._reso == ts_tz._reso
# If ts_tz is ever on the last day of the year, the year would be
# incremented by one
assert result.year == ts_tz.year
if ts_tz.month == 12 and ts_tz.day == 31:
assert result.year == ts_tz.year + 1
else:
assert result.year == ts_tz.year
assert result.day == 31
assert result.month == 12
assert tz_compare(result.tz, ts_tz.tz)
Expand Down