Skip to content

Commit b7ac784

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 2996538 + b8d750f commit b7ac784

File tree

21 files changed

+156
-106
lines changed

21 files changed

+156
-106
lines changed

asv_bench/benchmarks/reshape.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def setup(self, dtype):
102102
columns = np.arange(n)
103103
if dtype == "int":
104104
values = np.arange(m * m * n).reshape(m * m, n)
105+
self.df = DataFrame(values, index, columns)
105106
else:
106107
# the category branch is ~20x slower than int. So we
107108
# cut down the size a bit. Now it's only ~3x slower.
@@ -111,7 +112,10 @@ def setup(self, dtype):
111112
values = np.take(list(string.ascii_letters), indices)
112113
values = [pd.Categorical(v) for v in values.T]
113114

114-
self.df = DataFrame(values, index, columns)
115+
self.df = DataFrame(
116+
{i: cat for i, cat in enumerate(values)}, index, columns
117+
)
118+
115119
self.df2 = self.df.iloc[:-1]
116120

117121
def time_full_product(self, dtype):

ci/code_checks.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,8 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
121121
pandas/io/parsers/ \
122122
pandas/io/sas/ \
123123
pandas/io/sql.py \
124-
pandas/tseries/
124+
pandas/tseries/ \
125+
pandas/io/formats/style_render.py
125126
RET=$(($RET + $?)) ; echo $MSG "DONE"
126127

127128
fi

ci/deps/actions-39-slow.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ dependencies:
2323
- matplotlib
2424
- moto>=1.3.14
2525
- flask
26+
- numba
2627
- numexpr
2728
- numpy
2829
- openpyxl

ci/deps/actions-39.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ dependencies:
2222
- matplotlib
2323
- moto>=1.3.14
2424
- flask
25+
- numba
2526
- numexpr
2627
- numpy
2728
- openpyxl

ci/deps/azure-windows-39.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ dependencies:
2323
- matplotlib
2424
- moto>=1.3.14
2525
- flask
26+
- numba
2627
- numexpr
2728
- numpy
2829
- openpyxl

doc/source/whatsnew/v1.3.2.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ including other versions of pandas.
1414

1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
17-
-
17+
- Performance regression in :meth:`DataFrame.isin` and :meth:`Series.isin` for nullable data types (:issue:`42714`)
18+
- Regression in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
19+
- Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
1820
-
1921

2022
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ Performance improvements
166166
~~~~~~~~~~~~~~~~~~~~~~~~
167167
- Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`)
168168
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`)
169+
- Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`)
169170

170171
.. ---------------------------------------------------------------------------
171172
@@ -225,7 +226,6 @@ Indexing
225226
- Bug in :meth:`Series.loc` when with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`)
226227
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
227228
- Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
228-
-
229229

230230
Missing
231231
^^^^^^^

pandas/core/arrays/masked.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ def isin(self, values) -> BooleanArray: # type: ignore[override]
417417
# see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
418418
result[self._mask] = values_have_NA
419419

420-
mask = np.zeros_like(self, dtype=bool)
420+
mask = np.zeros(self._data.shape, dtype=bool)
421421
return BooleanArray(result, mask, copy=False)
422422

423423
def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3745,7 +3745,7 @@ def _set_item_mgr(self, key, value: ArrayLike) -> None:
37453745
# try to set first as we want an invalid
37463746
# value exception to occur first
37473747
if len(self):
3748-
self._check_setitem_copy(stacklevel=5)
3748+
self._check_setitem_copy()
37493749

37503750
def _iset_item(self, loc: int, value) -> None:
37513751
arraylike = self._sanitize_column(value)

pandas/core/generic.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
doc,
6868
rewrite_axis_style_signature,
6969
)
70+
from pandas.util._exceptions import find_stack_level
7071
from pandas.util._validators import (
7172
validate_ascending,
7273
validate_bool_kwarg,
@@ -3506,7 +3507,7 @@ def _maybe_update_cacher(
35063507
"""
35073508

35083509
if verify_is_copy:
3509-
self._check_setitem_copy(stacklevel=5, t="referent")
3510+
self._check_setitem_copy(t="referent")
35103511

35113512
if clear:
35123513
self._clear_item_cache()
@@ -3853,26 +3854,21 @@ def _check_is_chained_assignment_possible(self) -> bool_t:
38533854
setting.
38543855
"""
38553856
if self._is_copy:
3856-
self._check_setitem_copy(stacklevel=4, t="referent")
3857+
self._check_setitem_copy(t="referent")
38573858
return False
38583859

38593860
@final
3860-
def _check_setitem_copy(self, stacklevel=4, t="setting", force=False):
3861+
def _check_setitem_copy(self, t="setting", force=False):
38613862
"""
38623863
38633864
Parameters
38643865
----------
3865-
stacklevel : int, default 4
3866-
the level to show of the stack when the error is output
38673866
t : str, the type of setting error
38683867
force : bool, default False
38693868
If True, then force showing an error.
38703869
38713870
validate if we are doing a setitem on a chained copy.
38723871
3873-
If you call this function, be sure to set the stacklevel such that the
3874-
user will see the error *at the level of setting*
3875-
38763872
It is technically possible to figure out that we are setting on
38773873
a copy even WITH a multi-dtyped pandas object. In other words, some
38783874
blocks may be views while other are not. Currently _is_view will ALWAYS
@@ -3931,7 +3927,7 @@ def _check_setitem_copy(self, stacklevel=4, t="setting", force=False):
39313927
if value == "raise":
39323928
raise com.SettingWithCopyError(t)
39333929
elif value == "warn":
3934-
warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel)
3930+
warnings.warn(t, com.SettingWithCopyWarning, stacklevel=find_stack_level())
39353931

39363932
def __delitem__(self, key) -> None:
39373933
"""

pandas/core/indexes/multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2836,7 +2836,7 @@ def _maybe_to_slice(loc):
28362836
try:
28372837
return self._engine.get_loc(key)
28382838
except TypeError:
2839-
# e.g. partial string slicing
2839+
# e.g. test_partial_slicing_with_multiindex partial string slicing
28402840
loc, _ = self.get_loc_level(key, list(range(self.nlevels)))
28412841
return loc
28422842

pandas/core/indexing.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -658,16 +658,7 @@ def _get_setitem_indexer(self, key):
658658
if isinstance(key, range):
659659
return list(key)
660660

661-
try:
662-
return self._convert_to_indexer(key, axis=0, is_setter=True)
663-
except TypeError as e:
664-
665-
# invalid indexer type vs 'other' indexing errors
666-
if "cannot do" in str(e):
667-
raise
668-
elif "unhashable type" in str(e):
669-
raise
670-
raise IndexingError(key) from e
661+
return self._convert_to_indexer(key, axis=0, is_setter=True)
671662

672663
def _ensure_listlike_indexer(self, key, axis=None, value=None):
673664
"""
@@ -1209,7 +1200,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
12091200
is_int_index = labels.is_integer()
12101201
is_int_positional = is_integer(key) and not is_int_index
12111202

1212-
if is_scalar(key) or isinstance(labels, MultiIndex):
1203+
if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)):
12131204
# Otherwise get_loc will raise InvalidIndexError
12141205

12151206
# if we are a label return me
@@ -1224,8 +1215,6 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
12241215
# GH35015, using datetime as column indices raises exception
12251216
if not isinstance(labels, MultiIndex):
12261217
raise
1227-
except TypeError:
1228-
pass
12291218
except ValueError:
12301219
if not is_int_positional:
12311220
raise

pandas/core/internals/blocks.py

+13-9
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from pandas._libs.internals import BlockPlacement
2626
from pandas._typing import (
2727
ArrayLike,
28-
Dtype,
2928
DtypeObj,
3029
F,
3130
Shape,
@@ -52,7 +51,6 @@
5251
is_list_like,
5352
is_sparse,
5453
is_string_dtype,
55-
pandas_dtype,
5654
)
5755
from pandas.core.dtypes.dtypes import (
5856
CategoricalDtype,
@@ -100,6 +98,7 @@
10098
TimedeltaArray,
10199
)
102100
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
101+
from pandas.core.arrays.sparse import SparseDtype
103102
from pandas.core.base import PandasObject
104103
import pandas.core.common as com
105104
import pandas.core.computation.expressions as expressions
@@ -326,6 +325,8 @@ def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block:
326325

327326
return type(self)(new_values, new_mgr_locs, self.ndim)
328327

328+
# NB: this cannot be made cache_readonly because in libreduction we pin
329+
# new .values that can have different shape GH#42631
329330
@property
330331
def shape(self) -> Shape:
331332
return self.values.shape
@@ -1255,7 +1256,7 @@ def where(self, other, cond, errors="raise") -> list[Block]:
12551256

12561257
return result_blocks
12571258

1258-
def _unstack(self, unstacker, fill_value, new_placement):
1259+
def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
12591260
"""
12601261
Return a list of unstacked blocks of self
12611262
@@ -1264,6 +1265,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
12641265
unstacker : reshape._Unstacker
12651266
fill_value : int
12661267
Only used in ExtensionBlock._unstack
1268+
allow_fill : bool
12671269
12681270
Returns
12691271
-------
@@ -1638,7 +1640,7 @@ def where(self, other, cond, errors="raise") -> list[Block]:
16381640

16391641
return [self.make_block_same_class(result)]
16401642

1641-
def _unstack(self, unstacker, fill_value, new_placement):
1643+
def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
16421644
# ExtensionArray-safe unstack.
16431645
# We override ObjectBlock._unstack, which unstacks directly on the
16441646
# values of the array. For EA-backed blocks, this would require
@@ -1655,7 +1657,7 @@ def _unstack(self, unstacker, fill_value, new_placement):
16551657
blocks = [
16561658
# TODO: could cast to object depending on fill_value?
16571659
self.make_block_same_class(
1658-
self.values.take(indices, allow_fill=True, fill_value=fill_value),
1660+
self.values.take(indices, allow_fill=allow_fill, fill_value=fill_value),
16591661
BlockPlacement(place),
16601662
)
16611663
for indices, place in zip(new_values.T, new_placement)
@@ -1842,7 +1844,7 @@ class CategoricalBlock(ExtensionBlock):
18421844
# Constructor Helpers
18431845

18441846

1845-
def maybe_coerce_values(values) -> ArrayLike:
1847+
def maybe_coerce_values(values: ArrayLike) -> ArrayLike:
18461848
"""
18471849
Input validation for values passed to __init__. Ensure that
18481850
any datetime64/timedelta64 dtypes are in nanoseconds. Ensure
@@ -1874,7 +1876,7 @@ def maybe_coerce_values(values) -> ArrayLike:
18741876
return values
18751877

18761878

1877-
def get_block_type(values, dtype: Dtype | None = None):
1879+
def get_block_type(values, dtype: DtypeObj | None = None):
18781880
"""
18791881
Find the appropriate Block subclass to use for the given values and dtype.
18801882
@@ -1889,13 +1891,15 @@ def get_block_type(values, dtype: Dtype | None = None):
18891891
"""
18901892
# We use vtype and kind checks because they are much more performant
18911893
# than is_foo_dtype
1892-
dtype = cast(np.dtype, pandas_dtype(dtype) if dtype else values.dtype)
1894+
if dtype is None:
1895+
dtype = values.dtype
1896+
18931897
vtype = dtype.type
18941898
kind = dtype.kind
18951899

18961900
cls: type[Block]
18971901

1898-
if is_sparse(dtype):
1902+
if isinstance(dtype, SparseDtype):
18991903
# Need this first(ish) so that Sparse[datetime] is sparse
19001904
cls = ExtensionBlock
19011905
elif isinstance(dtype, CategoricalDtype):

pandas/core/internals/construction.py

+8
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,14 @@ def to_arrays(
757757
# i.e. numpy structured array
758758
columns = ensure_index(data.dtype.names)
759759
arrays = [data[name] for name in columns]
760+
761+
if len(data) == 0:
762+
# GH#42456 the indexing above results in list of 2D ndarrays
763+
# TODO: is that an issue with numpy?
764+
for i, arr in enumerate(arrays):
765+
if arr.ndim == 2:
766+
arrays[i] = arr[:, 0]
767+
760768
return arrays, columns
761769
return [], ensure_index([])
762770

0 commit comments

Comments
 (0)