Skip to content

CLN: assorted #50644

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -546,9 +546,6 @@ cpdef array_to_datetime(
else:
# coerce
# we now need to parse this as if unit='ns'
# we can ONLY accept integers at this point
# if we have previously (or in future accept
# datetimes/strings, then we must coerce)
try:
iresult[i] = cast_from_unit(val, "ns")
except OverflowError:
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ cdef tzinfo convert_timezone(
)

cdef int64_t parse_pydatetime(
object val,
datetime val,
npy_datetimestruct *dts,
bint utc_convert,
) except? -1
15 changes: 7 additions & 8 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -413,8 +413,8 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,

Parameters
----------
dts: npy_datetimestruct
tzoffset: int
dts : npy_datetimestruct
tzoffset : int
tz : tzinfo or None
timezone for the timezone-aware output.
reso : NPY_DATETIMEUNIT, default NPY_FR_ns
Expand Down Expand Up @@ -463,7 +463,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts,
return obj


cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
cdef _TSObject _convert_str_to_tsobject(str ts, tzinfo tz, str unit,
bint dayfirst=False,
bint yearfirst=False):
"""
Expand Down Expand Up @@ -499,7 +499,6 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
NPY_DATETIMEUNIT out_bestunit, reso

if len(ts) == 0 or ts in nat_strings:
ts = NaT
obj = _TSObject()
obj.value = NPY_NAT
obj.tzinfo = tz
Expand Down Expand Up @@ -727,16 +726,16 @@ cdef tzinfo convert_timezone(


cdef int64_t parse_pydatetime(
object val,
npy_datetimestruct *dts,
bint utc_convert,
datetime val,
npy_datetimestruct *dts,
bint utc_convert,
) except? -1:
"""
Convert pydatetime to datetime64.

Parameters
----------
val
val : datetime
Element being processed.
dts : *npy_datetimestruct
Needed to use in pydatetime_to_dt64, which writes to it.
Expand Down
12 changes: 0 additions & 12 deletions pandas/_libs/tslibs/parsing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def parse_datetime_string(
date_string: str,
dayfirst: bool = ...,
yearfirst: bool = ...,
**kwargs,
) -> datetime: ...
def parse_time_string(
arg: str,
Expand All @@ -24,28 +23,17 @@ def quarter_to_myear(year: int, quarter: int, freq: str) -> tuple[int, int]: ...
def try_parse_dates(
values: npt.NDArray[np.object_], # object[:]
parser,
dayfirst: bool = ...,
default: datetime | None = ...,
) -> npt.NDArray[np.object_]: ...
def try_parse_year_month_day(
years: npt.NDArray[np.object_], # object[:]
months: npt.NDArray[np.object_], # object[:]
days: npt.NDArray[np.object_], # object[:]
) -> npt.NDArray[np.object_]: ...
def try_parse_datetime_components(
years: npt.NDArray[np.object_], # object[:]
months: npt.NDArray[np.object_], # object[:]
days: npt.NDArray[np.object_], # object[:]
hours: npt.NDArray[np.object_], # object[:]
minutes: npt.NDArray[np.object_], # object[:]
seconds: npt.NDArray[np.object_], # object[:]
) -> npt.NDArray[np.object_]: ...
def guess_datetime_format(
dt_str,
dayfirst: bool | None = ...,
) -> str | None: ...
def concat_date_cols(
date_cols: tuple,
keep_trivial_numbers: bool = ...,
) -> npt.NDArray[np.object_]: ...
def get_rule_month(source: str) -> str: ...
63 changes: 8 additions & 55 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ cdef object _parse_delimited_date(str date_string, bint dayfirst):
raise DateParseError(f"Invalid date specified ({month}/{day})")


cdef bint does_string_look_like_time(str parse_string):
cdef bint _does_string_look_like_time(str parse_string):
"""
Checks whether given string is a time: it has to start either from
H:MM or from HH:MM, and hour and minute values must be valid.
Expand Down Expand Up @@ -249,7 +249,6 @@ def parse_datetime_string(
str date_string,
bint dayfirst=False,
bint yearfirst=False,
**kwargs,
) -> datetime:
"""
Parse datetime string, only returns datetime.
Expand All @@ -266,10 +265,10 @@ def parse_datetime_string(
if not _does_string_look_like_datetime(date_string):
raise ValueError(f'Given date string "{date_string}" not likely a datetime')

if does_string_look_like_time(date_string):
if _does_string_look_like_time(date_string):
# use current datetime as default, not pass _DEFAULT_DATETIME
dt = du_parse(date_string, dayfirst=dayfirst,
yearfirst=yearfirst, **kwargs)
yearfirst=yearfirst)
return dt

dt, _ = _parse_delimited_date(date_string, dayfirst)
Expand All @@ -294,7 +293,7 @@ def parse_datetime_string(

try:
dt = du_parse(date_string, default=_DEFAULT_DATETIME,
dayfirst=dayfirst, yearfirst=yearfirst, **kwargs)
dayfirst=dayfirst, yearfirst=yearfirst)
except TypeError:
# following may be raised from dateutil
# TypeError: 'NoneType' object is not iterable
Expand Down Expand Up @@ -667,9 +666,7 @@ cdef dateutil_parse(
# Parsing for type-inference


def try_parse_dates(
object[:] values, parser, bint dayfirst=False, default=None,
) -> np.ndarray:
def try_parse_dates(object[:] values, parser) -> np.ndarray:
cdef:
Py_ssize_t i, n
object[::1] result
Expand Down Expand Up @@ -705,47 +702,6 @@ def try_parse_year_month_day(
return result.base # .base to access underlying ndarray


def try_parse_datetime_components(object[:] years,
object[:] months,
object[:] days,
object[:] hours,
object[:] minutes,
object[:] seconds) -> np.ndarray:

cdef:
Py_ssize_t i, n
object[::1] result
int secs
double float_secs
double micros

n = len(years)
# TODO(cython3): Use len instead of `shape[0]`
if (
months.shape[0] != n
or days.shape[0] != n
or hours.shape[0] != n
or minutes.shape[0] != n
or seconds.shape[0] != n
):
raise ValueError("Length of all datetime components must be equal")
result = np.empty(n, dtype="O")

for i in range(n):
float_secs = float(seconds[i])
secs = int(float_secs)

micros = float_secs - secs
if micros > 0:
micros = micros * 1000000

result[i] = datetime(int(years[i]), int(months[i]), int(days[i]),
int(hours[i]), int(minutes[i]), secs,
int(micros))

return result.base # .base to access underlying ndarray


# ----------------------------------------------------------------------
# Miscellaneous

Expand Down Expand Up @@ -1001,6 +957,7 @@ cdef str _fill_token(token: str, padding: int):
token_filled = f"{seconds}.{nanoseconds}"
return token_filled


cdef void _maybe_warn_about_dayfirst(format: str, bint dayfirst):
"""Warn if guessed datetime format doesn't respect dayfirst argument."""
cdef:
Expand Down Expand Up @@ -1062,16 +1019,13 @@ cdef object convert_to_unicode(object item, bint keep_trivial_numbers):

@cython.wraparound(False)
@cython.boundscheck(False)
def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndarray:
def concat_date_cols(tuple date_cols) -> np.ndarray:
"""
Concatenates elements from numpy arrays in `date_cols` into strings.

Parameters
----------
date_cols : tuple[ndarray]
keep_trivial_numbers : bool, default True
if True and len(date_cols) == 1, then
conversion (to string from integer/float zero) is not performed

Returns
-------
Expand Down Expand Up @@ -1110,8 +1064,7 @@ def concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True) -> np.ndar
it = <flatiter>PyArray_IterNew(array)
for row_idx in range(rows_count):
item = PyArray_GETITEM(array, PyArray_ITER_DATA(it))
result_view[row_idx] = convert_to_unicode(item,
keep_trivial_numbers)
result_view[row_idx] = convert_to_unicode(item, True)
PyArray_ITER_NEXT(it)
else:
# create fixed size list - more efficient memory allocation
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -764,7 +764,7 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil:

Parameters
----------
dts: npy_datetimestruct*
dts : npy_datetimestruct*
freq : int

Returns
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame:
result.index = res_index

# infer dtypes
result = result.infer_objects()
result = result.infer_objects(copy=False)

return result

Expand Down
4 changes: 0 additions & 4 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,10 +471,6 @@ def _internal_fill_value(self) -> int:
dtype = self._ndarray.dtype
return dtype.type(-1)

@property
def _constructor(self) -> type[Categorical]:
return Categorical

@classmethod
def _from_sequence(
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1286,7 +1286,7 @@ def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_
return new_data

@final
def _addsub_object_array(self, other: np.ndarray, op):
def _addsub_object_array(self, other: npt.NDArray[np.object_], op):
"""
Add or subtract array-like of DateOffset objects

Expand All @@ -1297,10 +1297,14 @@ def _addsub_object_array(self, other: np.ndarray, op):

Returns
-------
result : same class as self
np.ndarray[object]
Except in fastpath case with length 1 where we operate on the
contained scalar.
"""
assert op in [operator.add, operator.sub]
if len(other) == 1 and self.ndim == 1:
# Note: without this special case, we could annotate return type
# as ndarray[object]
# If both 1D then broadcasting is unambiguous
return op(self, other[0])

Expand Down
6 changes: 2 additions & 4 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,8 +793,6 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj,
if val is NaT or val.tz is None: # type: ignore[comparison-overlap]
val = val.to_datetime64()
dtype = val.dtype
# TODO(2.0): this should be dtype = val.dtype
# to get the correct M8 resolution
# TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes
else:
if pandas_dtype:
Expand Down Expand Up @@ -1677,8 +1675,8 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
arr._validate_setitem_value(element)
return True
except (ValueError, TypeError):
# TODO(2.0): stop catching ValueError for tzaware, see
# _catch_deprecated_value_error
# TODO: re-use _catch_deprecated_value_error to ensure we are
# strict about what exceptions we allow through here.
return False

# This is technically incorrect, but maintains the behavior of
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9436,7 +9436,9 @@ def _append(
row_df = other.to_frame().T
# infer_objects is needed for
# test_append_empty_frame_to_series_with_dateutil_tz
other = row_df.infer_objects().rename_axis(index.names, copy=False)
other = row_df.infer_objects(copy=False).rename_axis(
index.names, copy=False
)
elif isinstance(other, list):
if not other:
pass
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1833,7 +1833,7 @@ def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
result = result.T

# Note: we really only care about inferring numeric dtypes here
return self._reindex_output(result).infer_objects()
return self._reindex_output(result).infer_objects(copy=False)

def _iterate_column_groupbys(self, obj: DataFrame | Series):
for i, colname in enumerate(obj.columns):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3385,7 +3385,7 @@ def _reorder_indexer(
new_order = np.arange(n)[::-1][indexer]
elif isinstance(k, slice) and k.start is None and k.stop is None:
# slice(None) should not determine order GH#31330
new_order = np.ones((n,))[indexer]
new_order = np.ones((n,), dtype=np.intp)[indexer]
else:
# For all other case, use the same order as the level
new_order = np.arange(n)[indexer]
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,13 +231,11 @@ def _should_fallback_to_positional(self) -> bool:

@doc(Index._convert_slice_indexer)
def _convert_slice_indexer(self, key: slice, kind: str):
# TODO(2.0): once #45324 deprecation is enforced we should be able
# TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able
# to simplify this.
if is_float_dtype(self.dtype):
assert kind in ["loc", "getitem"]

# TODO: can we write this as a condition based on
# e.g. _should_fallback_to_positional?
# We always treat __getitem__ slicing as label-based
# translate to locations
return self.slice_indexer(key.start, key.stop, key.step)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2166,7 +2166,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
if not has_dtype:
# i.e. if we already had a Series or ndarray, keep that
# dtype. But if we had a list or dict, then do inference
df = df.infer_objects()
df = df.infer_objects(copy=False)
self.obj._mgr = df._mgr
else:
self.obj._mgr = self.obj._append(value)._mgr
Expand Down
1 change: 0 additions & 1 deletion pandas/io/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1117,7 +1117,6 @@ def converter(*date_cols):
parsing.try_parse_dates(
parsing.concat_date_cols(date_cols),
parser=date_parser,
dayfirst=dayfirst,
),
errors="ignore",
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/plotting/_matplotlib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ def _compute_plot_data(self):

# GH16953, infer_objects is needed as fallback, for ``Series``
# with ``dtype == object``
data = data.infer_objects()
data = data.infer_objects(copy=False)
include_type = [np.number, "datetime", "datetimetz", "timedelta"]

# GH23719, allow plotting boolean
Expand Down
2 changes: 1 addition & 1 deletion pandas/plotting/_matplotlib/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def _args_adjust(self) -> None:

def _calculate_bins(self, data: DataFrame) -> np.ndarray:
"""Calculate bins given data"""
nd_values = data.infer_objects()._get_numeric_data()
nd_values = data.infer_objects(copy=False)._get_numeric_data()
values = np.ravel(nd_values)
values = values[~isna(values)]

Expand Down
Loading