-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN refactor rest of core #37586
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CLN refactor rest of core #37586
Changes from 4 commits
3f275aa
4a6c153
9db6e6c
8794ceb
803f49b
4511a0c
9c3598b
873d430
31d29af
7a51410
a31e89d
a8c8c9c
5e053d4
5f0a278
142e837
8e18225
f181cb0
9383755
fe0e0ba
0d10a5b
938b2d5
7fda4e7
c36bf8d
2c2868e
c6d3233
1f782fd
b315629
84207d9
3f79955
9888383
0cd394f
2ec839e
5125e5f
d693f58
d4e3bd6
5559352
4f14b81
c03d876
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -284,10 +284,9 @@ def _try_aggregate_string_function(self, arg: str, *args, **kwargs): | |
return f | ||
|
||
f = getattr(np, arg, None) | ||
if f is not None: | ||
if hasattr(self, "__array__"): | ||
# in particular exclude Window | ||
return f(self, *args, **kwargs) | ||
if f is not None and hasattr(self, "__array__"): | ||
# in particular exclude Window | ||
return f(self, *args, **kwargs) | ||
|
||
raise AttributeError( | ||
f"'{arg}' is not a valid function for '{type(self).__name__}' object" | ||
|
@@ -570,8 +569,8 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs): | |
# TODO(GH-24345): Avoid potential double copy | ||
if copy or na_value is not lib.no_default: | ||
result = result.copy() | ||
if na_value is not lib.no_default: | ||
result[self.isna()] = na_value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why? its small but we potentially avoid a duplicate check |
||
if na_value is not lib.no_default: | ||
result[self.isna()] = na_value | ||
return result | ||
|
||
@property | ||
|
@@ -974,15 +973,14 @@ def value_counts( | |
NaN 1 | ||
dtype: int64 | ||
""" | ||
result = value_counts( | ||
return value_counts( | ||
self, | ||
sort=sort, | ||
ascending=ascending, | ||
normalize=normalize, | ||
bins=bins, | ||
dropna=dropna, | ||
) | ||
return result | ||
|
||
def unique(self): | ||
values = self._values | ||
|
@@ -1244,8 +1242,7 @@ def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: | |
|
||
def drop_duplicates(self, keep="first"): | ||
duplicated = self.duplicated(keep=keep) | ||
result = self[np.logical_not(duplicated)] | ||
return result | ||
return self[np.logical_not(duplicated)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. id prefer |
||
|
||
def duplicated(self, keep="first"): | ||
return duplicated(self._values, keep=keep) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -112,8 +112,10 @@ def is_bool_indexer(key: Any) -> bool: | |
key = np.asarray(key) | ||
|
||
if not lib.is_bool_array(key): | ||
na_msg = "Cannot mask with non-boolean array containing NA / NaN values" | ||
if isna(key).any(): | ||
na_msg = ( | ||
"Cannot mask with non-boolean array containing NA / NaN values" | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this an improvement? |
||
raise ValueError(na_msg) | ||
return False | ||
return True | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -347,8 +347,7 @@ def array( | |
elif is_timedelta64_ns_dtype(dtype): | ||
return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) | ||
|
||
result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) | ||
return result | ||
return PandasArray._from_sequence(data, dtype=dtype, copy=copy) | ||
|
||
|
||
def extract_array(obj: AnyArrayLike, extract_numpy: bool = False) -> ArrayLike: | ||
|
@@ -551,9 +550,13 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo | |
Otherwise an object array is returned. | ||
""" | ||
# perf shortcut as this is the most common case | ||
if isinstance(arr, np.ndarray): | ||
if maybe_castable(arr) and not copy and dtype is None: | ||
return arr | ||
if ( | ||
isinstance(arr, np.ndarray) | ||
and maybe_castable(arr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should change maybe_castable to take arr.dtype directly |
||
and not copy | ||
and dtype is None | ||
): | ||
return arr | ||
|
||
if isinstance(dtype, ExtensionDtype) and (dtype.kind != "M" or is_sparse(dtype)): | ||
# create an extension array from its dtype | ||
|
@@ -575,9 +578,11 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo | |
|
||
# Take care in creating object arrays (but iterators are not | ||
# supported): | ||
if is_object_dtype(dtype) and ( | ||
is_list_like(subarr) | ||
and not (is_iterator(subarr) or isinstance(subarr, np.ndarray)) | ||
if ( | ||
is_object_dtype(dtype) | ||
and is_list_like(subarr) | ||
and not is_iterator(subarr) | ||
and not isinstance(subarr, np.ndarray) | ||
): | ||
subarr = construct_1d_object_array_from_listlike(subarr) | ||
elif not is_extension_array_dtype(subarr): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you put parens where appropriate to make this more obvious