Skip to content

Commit d37419c

Browse files
authored
Merge branch 'main' into issue-60550-fix-v2
2 parents dd005ab + f1b00b8 commit d37419c

File tree

15 files changed

+154
-49
lines changed

15 files changed

+154
-49
lines changed

doc/source/getting_started/overview.rst

+1
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,4 @@ License
174174
-------
175175

176176
.. literalinclude:: ../../../LICENSE
177+
:language: none

doc/source/whatsnew/v3.0.0.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -694,8 +694,10 @@ Interval
694694
Indexing
695695
^^^^^^^^
696696
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
697+
- Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`)
697698
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
698699
- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
700+
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
699701
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
700702
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
701703

@@ -712,7 +714,7 @@ MultiIndex
712714
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
713715
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
714716
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
715-
-
717+
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
716718

717719
I/O
718720
^^^
@@ -787,6 +789,7 @@ Reshaping
787789
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
788790
- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
789791
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
792+
- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
790793

791794
Sparse
792795
^^^^^^

pandas/_libs/hashtable.pxd

+12-12
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ cdef class HashTable:
4141

4242
cdef class UInt64HashTable(HashTable):
4343
cdef kh_uint64_t *table
44-
cdef int64_t na_position
44+
cdef Py_ssize_t na_position
4545
cdef bint uses_mask
4646

4747
cpdef get_item(self, uint64_t val)
@@ -51,7 +51,7 @@ cdef class UInt64HashTable(HashTable):
5151

5252
cdef class Int64HashTable(HashTable):
5353
cdef kh_int64_t *table
54-
cdef int64_t na_position
54+
cdef Py_ssize_t na_position
5555
cdef bint uses_mask
5656

5757
cpdef get_item(self, int64_t val)
@@ -61,7 +61,7 @@ cdef class Int64HashTable(HashTable):
6161

6262
cdef class UInt32HashTable(HashTable):
6363
cdef kh_uint32_t *table
64-
cdef int64_t na_position
64+
cdef Py_ssize_t na_position
6565
cdef bint uses_mask
6666

6767
cpdef get_item(self, uint32_t val)
@@ -71,7 +71,7 @@ cdef class UInt32HashTable(HashTable):
7171

7272
cdef class Int32HashTable(HashTable):
7373
cdef kh_int32_t *table
74-
cdef int64_t na_position
74+
cdef Py_ssize_t na_position
7575
cdef bint uses_mask
7676

7777
cpdef get_item(self, int32_t val)
@@ -81,7 +81,7 @@ cdef class Int32HashTable(HashTable):
8181

8282
cdef class UInt16HashTable(HashTable):
8383
cdef kh_uint16_t *table
84-
cdef int64_t na_position
84+
cdef Py_ssize_t na_position
8585
cdef bint uses_mask
8686

8787
cpdef get_item(self, uint16_t val)
@@ -91,7 +91,7 @@ cdef class UInt16HashTable(HashTable):
9191

9292
cdef class Int16HashTable(HashTable):
9393
cdef kh_int16_t *table
94-
cdef int64_t na_position
94+
cdef Py_ssize_t na_position
9595
cdef bint uses_mask
9696

9797
cpdef get_item(self, int16_t val)
@@ -101,7 +101,7 @@ cdef class Int16HashTable(HashTable):
101101

102102
cdef class UInt8HashTable(HashTable):
103103
cdef kh_uint8_t *table
104-
cdef int64_t na_position
104+
cdef Py_ssize_t na_position
105105
cdef bint uses_mask
106106

107107
cpdef get_item(self, uint8_t val)
@@ -111,7 +111,7 @@ cdef class UInt8HashTable(HashTable):
111111

112112
cdef class Int8HashTable(HashTable):
113113
cdef kh_int8_t *table
114-
cdef int64_t na_position
114+
cdef Py_ssize_t na_position
115115
cdef bint uses_mask
116116

117117
cpdef get_item(self, int8_t val)
@@ -121,7 +121,7 @@ cdef class Int8HashTable(HashTable):
121121

122122
cdef class Float64HashTable(HashTable):
123123
cdef kh_float64_t *table
124-
cdef int64_t na_position
124+
cdef Py_ssize_t na_position
125125
cdef bint uses_mask
126126

127127
cpdef get_item(self, float64_t val)
@@ -131,7 +131,7 @@ cdef class Float64HashTable(HashTable):
131131

132132
cdef class Float32HashTable(HashTable):
133133
cdef kh_float32_t *table
134-
cdef int64_t na_position
134+
cdef Py_ssize_t na_position
135135
cdef bint uses_mask
136136

137137
cpdef get_item(self, float32_t val)
@@ -141,7 +141,7 @@ cdef class Float32HashTable(HashTable):
141141

142142
cdef class Complex64HashTable(HashTable):
143143
cdef kh_complex64_t *table
144-
cdef int64_t na_position
144+
cdef Py_ssize_t na_position
145145
cdef bint uses_mask
146146

147147
cpdef get_item(self, complex64_t val)
@@ -151,7 +151,7 @@ cdef class Complex64HashTable(HashTable):
151151

152152
cdef class Complex128HashTable(HashTable):
153153
cdef kh_complex128_t *table
154-
cdef int64_t na_position
154+
cdef Py_ssize_t na_position
155155
cdef bint uses_mask
156156

157157
cpdef get_item(self, complex128_t val)

pandas/_libs/hashtable_class_helper.pxi.in

+3-3
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ cdef class {{name}}HashTable(HashTable):
535535
int ret = 0
536536
{{c_type}} val
537537
khiter_t k
538-
int8_t na_position = self.na_position
538+
Py_ssize_t na_position = self.na_position
539539

540540
if self.uses_mask and mask is None:
541541
raise NotImplementedError # pragma: no cover
@@ -567,7 +567,7 @@ cdef class {{name}}HashTable(HashTable):
567567
Int64Vector self_locs = Int64Vector()
568568
Int64VectorData *l
569569
Int64VectorData *sl
570-
int8_t na_position = self.na_position
570+
Py_ssize_t na_position = self.na_position
571571

572572
l = &locs.data
573573
sl = &self_locs.data
@@ -609,7 +609,7 @@ cdef class {{name}}HashTable(HashTable):
609609
{{c_type}} val
610610
khiter_t k
611611
intp_t[::1] locs = np.empty(n, dtype=np.intp)
612-
int8_t na_position = self.na_position
612+
Py_ssize_t na_position = self.na_position
613613

614614
if self.uses_mask and mask is None:
615615
raise NotImplementedError # pragma: no cover

pandas/_libs/lib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ def has_only_ints_or_nan(const floating[:] arr) -> bool:
502502
return True
503503

504504

505-
def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len):
505+
def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, intp_t max_len):
506506
cdef:
507507
Py_ssize_t i, n = len(indices)
508508
intp_t k, vstart, vlast, v

pandas/_libs/tslibs/timedeltas.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -1740,7 +1740,8 @@ cdef class _Timedelta(timedelta):
17401740
Format the Timedelta as ISO 8601 Duration.
17411741

17421742
``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the
1743-
values. See https://en.wikipedia.org/wiki/ISO_8601#Durations.
1743+
values. See Wikipedia:
1744+
`ISO 8601 § Durations <https://en.wikipedia.org/wiki/ISO_8601#Durations>`_.
17441745

17451746
Returns
17461747
-------

pandas/_libs/tslibs/timestamps.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1309,7 +1309,7 @@ cdef class _Timestamp(ABCTimestamp):
13091309
By default, the fractional part is omitted if self.microsecond == 0
13101310
and self._nanosecond == 0.
13111311

1312-
If self.tzinfo is not None, the UTC offset is also attached, giving
1312+
If self.tzinfo is not None, the UTC offset is also attached,
13131313
giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmmnnn+HH:MM'.
13141314

13151315
Parameters

pandas/core/algorithms.py

+6
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,8 @@ def map_array(
16471647
If the function returns a tuple with more than one element
16481648
a MultiIndex will be returned.
16491649
"""
1650+
from pandas import Index
1651+
16501652
if na_action not in (None, "ignore"):
16511653
msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
16521654
raise ValueError(msg)
@@ -1676,6 +1678,10 @@ def map_array(
16761678

16771679
if len(mapper) == 0:
16781680
mapper = Series(mapper, dtype=np.float64)
1681+
elif isinstance(mapper, dict):
1682+
mapper = Series(
1683+
mapper.values(), index=Index(mapper.keys(), tupleize_cols=False)
1684+
)
16791685
else:
16801686
mapper = Series(mapper)
16811687

pandas/core/frame.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -10286,7 +10286,9 @@ def apply(
1028610286
either the DataFrame's index (``axis=0``) or the DataFrame's columns
1028710287
(``axis=1``). By default (``result_type=None``), the final return type
1028810288
is inferred from the return type of the applied function. Otherwise,
10289-
it depends on the `result_type` argument.
10289+
it depends on the `result_type` argument. The return type of the applied
10290+
function is inferred based on the first computed result obtained after
10291+
applying the function to a Series object.
1029010292
1029110293
Parameters
1029210294
----------

pandas/core/indexes/multi.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Sequence,
1010
)
1111
from functools import wraps
12+
from itertools import zip_longest
1213
from sys import getsizeof
1314
from typing import (
1415
TYPE_CHECKING,
@@ -588,7 +589,7 @@ def from_tuples(
588589
elif isinstance(tuples, list):
589590
arrays = list(lib.to_object_array_tuples(tuples).T)
590591
else:
591-
arrs = zip(*tuples)
592+
arrs = zip_longest(*tuples, fillvalue=np.nan)
592593
arrays = cast(list[Sequence[Hashable]], arrs)
593594

594595
return cls.from_arrays(arrays, sortorder=sortorder, names=names)

pandas/core/reshape/concat.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -477,18 +477,23 @@ def _sanitize_mixed_ndim(
477477

478478
else:
479479
name = getattr(obj, "name", None)
480+
rename_columns = False
480481
if ignore_index or name is None:
481482
if axis == 1:
482483
# doing a row-wise concatenation so need everything
483484
# to line up
484-
name = 0
485+
if name is None:
486+
name = 0
487+
rename_columns = True
485488
else:
486489
# doing a column-wise concatenation so need series
487490
# to have unique names
488-
name = current_column
489-
current_column += 1
491+
if name is None:
492+
rename_columns = True
493+
name = current_column
494+
current_column += 1
490495
obj = sample._constructor(obj, copy=False)
491-
if isinstance(obj, ABCDataFrame):
496+
if isinstance(obj, ABCDataFrame) and rename_columns:
492497
obj.columns = range(name, name + 1, 1)
493498
else:
494499
obj = sample._constructor({name: obj}, copy=False)

pandas/tests/indexes/multi/test_constructors.py

+13
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,19 @@ def test_from_tuples_with_tuple_label():
410410
tm.assert_frame_equal(expected, result)
411411

412412

413+
@pytest.mark.parametrize(
414+
"keys, expected",
415+
[
416+
((("l1",), ("l1", "l2")), (("l1", np.nan), ("l1", "l2"))),
417+
((("l1", "l2"), ("l1",)), (("l1", "l2"), ("l1", np.nan))),
418+
],
419+
)
420+
def test_from_tuples_with_various_tuple_lengths(keys, expected):
421+
# GH 60695
422+
idx = MultiIndex.from_tuples(keys)
423+
assert tuple(idx) == expected
424+
425+
413426
# ----------------------------------------------------------------------------
414427
# from_product
415428
# ----------------------------------------------------------------------------

pandas/tests/libs/test_hashtable.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -149,18 +149,19 @@ def test_map_locations(self, table_type, dtype, writable):
149149
def test_map_locations_mask(self, table_type, dtype, writable):
150150
if table_type == ht.PyObjectHashTable:
151151
pytest.skip("Mask not supported for object")
152-
N = 3
152+
N = 129 # must be > 128 to test GH#58924
153153
table = table_type(uses_mask=True)
154154
keys = (np.arange(N) + N).astype(dtype)
155155
keys.flags.writeable = writable
156-
table.map_locations(keys, np.array([False, False, True]))
156+
mask = np.concatenate([np.repeat(False, N - 1), [True]], axis=0)
157+
table.map_locations(keys, mask)
157158
for i in range(N - 1):
158159
assert table.get_item(keys[i]) == i
159160

160161
with pytest.raises(KeyError, match=re.escape(str(keys[N - 1]))):
161162
table.get_item(keys[N - 1])
162163

163-
assert table.get_na() == 2
164+
assert table.get_na() == N - 1
164165

165166
def test_lookup(self, table_type, dtype, writable):
166167
N = 3

pandas/tests/reshape/concat/test_concat.py

+60-2
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,8 @@ def test_concat_mixed_objs_index(self):
326326
def test_concat_mixed_objs_index_names(self):
327327
# Test row-wise concat for mixed series/frames with distinct names
328328
# GH2385, GH15047
329+
# GH #60723 & GH #56257 (Updated the test case,
330+
# as the above GH PR ones were incorrect)
329331

330332
index = date_range("01-Jan-2013", periods=10, freq="h")
331333
arr = np.arange(10, dtype="int64")
@@ -341,8 +343,11 @@ def test_concat_mixed_objs_index_names(self):
341343
result = concat([s1, df, s2])
342344
tm.assert_frame_equal(result, expected)
343345

344-
# Rename all series to 0 when ignore_index=True
345-
expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0])
346+
expected = DataFrame(
347+
np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T,
348+
index=np.arange(30, dtype=np.int64),
349+
columns=["foo", 0, "bar"],
350+
)
346351
result = concat([s1, df, s2], ignore_index=True)
347352
tm.assert_frame_equal(result, expected)
348353

@@ -943,3 +948,56 @@ def test_concat_with_moot_ignore_index_and_keys():
943948
msg = f"Cannot set {ignore_index=} and specify keys. Either should be used."
944949
with pytest.raises(ValueError, match=msg):
945950
concat([df1, df2], keys=keys, ignore_index=ignore_index)
951+
952+
953+
@pytest.mark.parametrize(
954+
"inputs, ignore_index, axis, expected",
955+
[
956+
# Concatenating DataFrame and named Series without ignore_index
957+
(
958+
[DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5], name="c")],
959+
False,
960+
0,
961+
DataFrame(
962+
{
963+
"a": [0, 1, None, None],
964+
"b": [2, 3, None, None],
965+
"c": [None, None, 4, 5],
966+
},
967+
index=[0, 1, 0, 1],
968+
),
969+
),
970+
# Concatenating DataFrame and named Series with ignore_index
971+
(
972+
[DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5], name="c")],
973+
True,
974+
0,
975+
DataFrame(
976+
{
977+
"a": [0, 1, None, None],
978+
"b": [2, 3, None, None],
979+
"c": [None, None, 4, 5],
980+
},
981+
index=[0, 1, 2, 3],
982+
),
983+
),
984+
# Concatenating DataFrame and unnamed Series along columns
985+
(
986+
[DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5]), Series([4, 5])],
987+
False,
988+
1,
989+
DataFrame({"a": [0, 1], "b": [2, 3], 0: [4, 5], 1: [4, 5]}, index=[0, 1]),
990+
),
991+
# Concatenating DataFrame and unnamed Series along columns with ignore_index
992+
(
993+
[DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5]), Series([4, 5])],
994+
True,
995+
1,
996+
DataFrame({0: [0, 1], 1: [2, 3], 2: [4, 5], 3: [4, 5]}, index=[0, 1]),
997+
),
998+
],
999+
)
1000+
def test_concat_of_series_and_frame(inputs, ignore_index, axis, expected):
1001+
# GH #60723 and #56257
1002+
result = concat(inputs, ignore_index=ignore_index, axis=axis)
1003+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)