Skip to content

CLN: follow-ups #40000

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ repos:
entry: np\.bool[^_8]
language: pygrep
types_or: [python, cython, rst]
- id: np-object
name: Check for use of np.object instead of np.object_
entry: np\.object[^_8]
language: pygrep
types_or: [python, cython, rst]
- id: no-os-remove
name: Check code for instances of os.remove
entry: os\.remove
Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/algos/isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def time_isin(self, dtype, exponent, title):

class IsinWithRandomFloat:
params = [
[np.float64, np.object],
[np.float64, np.object_],
[
1_300,
2_000,
Expand Down Expand Up @@ -134,7 +134,7 @@ def time_isin(self, dtype, size, title):

class IsinWithArangeSorted:
params = [
[np.float64, np.int64, np.uint64, np.object],
[np.float64, np.int64, np.uint64, np.object_],
[
1_000,
2_000,
Expand All @@ -155,7 +155,7 @@ def time_isin(self, dtype, size):

class IsinWithArange:
params = [
[np.float64, np.int64, np.uint64, np.object],
[np.float64, np.int64, np.uint64, np.object_],
[
1_000,
2_000,
Expand Down
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1755,8 +1755,8 @@ Missing

- Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`)
- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object_``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object_``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
- :class:`DataFrame` and :class:`Series` now properly handle numpy masked arrays with hardened masks. Previously, constructing a DataFrame or Series from a masked array with a hard mask would create a pandas object containing the underlying value, rather than the expected NaN. (:issue:`24574`)
- Bug in :class:`DataFrame` constructor where ``dtype`` argument was not honored when handling numpy masked record arrays. (:issue:`24874`)

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1468,7 +1468,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
if is_decimal_array(values):
return "decimal"

elif is_complex(val):
elif util.is_complex_object(val):
if is_complex_array(values):
return "complex"

Expand Down
29 changes: 19 additions & 10 deletions pandas/core/array_algos/take.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from typing import Optional

import numpy as np
Expand All @@ -6,6 +8,7 @@
algos as libalgos,
lib,
)
from pandas._typing import ArrayLike

from pandas.core.dtypes.cast import maybe_promote
from pandas.core.dtypes.common import (
Expand All @@ -14,20 +17,17 @@
)
from pandas.core.dtypes.missing import na_value_for_dtype

from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
extract_array,
)
from pandas.core.construction import ensure_wrapped_if_datetimelike


def take_nd(
arr,
arr: ArrayLike,
indexer,
axis: int = 0,
out: Optional[np.ndarray] = None,
fill_value=lib.no_default,
allow_fill: bool = True,
):
) -> ArrayLike:

"""
Specialized Cython take which sets NaN values in one pass
Expand All @@ -37,7 +37,7 @@ def take_nd(

Parameters
----------
arr : array-like
arr : np.ndarray or ExtensionArray
Input array.
indexer : ndarray
1-D array of indices to take, subarrays corresponding to -1 value
Expand All @@ -57,20 +57,29 @@ def take_nd(

Returns
-------
subarray : array-like
subarray : np.ndarray or ExtensionArray
May be the same type as the input, or cast to an ndarray.
"""
if fill_value is lib.no_default:
fill_value = na_value_for_dtype(arr.dtype, compat=False)

arr = extract_array(arr, extract_numpy=True)

if not isinstance(arr, np.ndarray):
# i.e. ExtensionArray,
# includes for EA to catch DatetimeArray, TimedeltaArray
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

arr = np.asarray(arr)
return _take_nd_ndarray(arr, indexer, axis, out, fill_value, allow_fill)


def _take_nd_ndarray(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is purely for typing right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yah mypy complained without it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can't just cast here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

simon explained a couple days ago in a different thread that it involves ArrayLike being a TypeVar and not a union

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok then

arr: np.ndarray,
indexer,
axis: int,
out: Optional[np.ndarray],
fill_value,
allow_fill: bool,
) -> np.ndarray:

indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
arr, indexer, axis, out, fill_value, allow_fill
Expand Down
8 changes: 3 additions & 5 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,6 @@ def coerce_to_target_dtype(self, other) -> Block:

return self.astype(new_dtype, copy=False)

@final
def interpolate(
self,
method: str = "pad",
Expand Down Expand Up @@ -1293,11 +1292,10 @@ def _interpolate(

# only deal with floats
if self.dtype.kind != "f":
if self.dtype.kind not in ["i", "u"]:
return [self]
data = data.astype(np.float64)
# bc we already checked that can_hold_na, we dont have int dtype here
return [self]

if fill_value is None:
if is_valid_na_for_dtype(fill_value, self.dtype):
fill_value = self.fill_value

if method in ("krogh", "piecewise_polynomial", "pchip"):
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,12 +874,16 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
if take_left is None:
lvals = result[name]._values
else:
# TODO: can we pin down take_left's type earlier?
take_left = extract_array(take_left, extract_numpy=True)
lfill = na_value_for_dtype(take_left.dtype)
lvals = algos.take_nd(take_left, left_indexer, fill_value=lfill)

if take_right is None:
rvals = result[name]._values
else:
# TODO: can we pin down take_right's type earlier?
take_right = extract_array(take_right, extract_numpy=True)
rfill = na_value_for_dtype(take_right.dtype)
rvals = algos.take_nd(take_right, right_indexer, fill_value=rfill)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
getattr(empty_series_dti.resample("d"), resample_method)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object)
# (ex: doing mean with dtype of np.object_)
pass


Expand Down