Skip to content

Commit ee5ff56

Browse files
committed
Merge branch 'issue-60550-fix-v2' of https://github.com/Abhibhav2003/pandas-Abhibhav into issue-60550-fix-v2
2 parents d7f4177 + 0007ab4 commit ee5ff56

File tree

16 files changed

+157
-52
lines changed

16 files changed

+157
-52
lines changed

doc/source/getting_started/overview.rst

+1
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,4 @@ License
174174
-------
175175

176176
.. literalinclude:: ../../../LICENSE
177+
:language: none

doc/source/whatsnew/v3.0.0.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -694,8 +694,10 @@ Interval
694694
Indexing
695695
^^^^^^^^
696696
- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
697+
- Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`)
697698
- Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
698699
- Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
700+
- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
699701
- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
700702
- Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
701703

@@ -712,7 +714,7 @@ MultiIndex
712714
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
713715
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
714716
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
715-
-
717+
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
716718

717719
I/O
718720
^^^
@@ -787,6 +789,7 @@ Reshaping
787789
- Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
788790
- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
789791
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
792+
- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
790793

791794
Sparse
792795
^^^^^^

pandas/_libs/hashtable.pxd

+12-12
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ cdef class HashTable:
4141

4242
cdef class UInt64HashTable(HashTable):
4343
cdef kh_uint64_t *table
44-
cdef int64_t na_position
44+
cdef Py_ssize_t na_position
4545
cdef bint uses_mask
4646

4747
cpdef get_item(self, uint64_t val)
@@ -51,7 +51,7 @@ cdef class UInt64HashTable(HashTable):
5151

5252
cdef class Int64HashTable(HashTable):
5353
cdef kh_int64_t *table
54-
cdef int64_t na_position
54+
cdef Py_ssize_t na_position
5555
cdef bint uses_mask
5656

5757
cpdef get_item(self, int64_t val)
@@ -61,7 +61,7 @@ cdef class Int64HashTable(HashTable):
6161

6262
cdef class UInt32HashTable(HashTable):
6363
cdef kh_uint32_t *table
64-
cdef int64_t na_position
64+
cdef Py_ssize_t na_position
6565
cdef bint uses_mask
6666

6767
cpdef get_item(self, uint32_t val)
@@ -71,7 +71,7 @@ cdef class UInt32HashTable(HashTable):
7171

7272
cdef class Int32HashTable(HashTable):
7373
cdef kh_int32_t *table
74-
cdef int64_t na_position
74+
cdef Py_ssize_t na_position
7575
cdef bint uses_mask
7676

7777
cpdef get_item(self, int32_t val)
@@ -81,7 +81,7 @@ cdef class Int32HashTable(HashTable):
8181

8282
cdef class UInt16HashTable(HashTable):
8383
cdef kh_uint16_t *table
84-
cdef int64_t na_position
84+
cdef Py_ssize_t na_position
8585
cdef bint uses_mask
8686

8787
cpdef get_item(self, uint16_t val)
@@ -91,7 +91,7 @@ cdef class UInt16HashTable(HashTable):
9191

9292
cdef class Int16HashTable(HashTable):
9393
cdef kh_int16_t *table
94-
cdef int64_t na_position
94+
cdef Py_ssize_t na_position
9595
cdef bint uses_mask
9696

9797
cpdef get_item(self, int16_t val)
@@ -101,7 +101,7 @@ cdef class Int16HashTable(HashTable):
101101

102102
cdef class UInt8HashTable(HashTable):
103103
cdef kh_uint8_t *table
104-
cdef int64_t na_position
104+
cdef Py_ssize_t na_position
105105
cdef bint uses_mask
106106

107107
cpdef get_item(self, uint8_t val)
@@ -111,7 +111,7 @@ cdef class UInt8HashTable(HashTable):
111111

112112
cdef class Int8HashTable(HashTable):
113113
cdef kh_int8_t *table
114-
cdef int64_t na_position
114+
cdef Py_ssize_t na_position
115115
cdef bint uses_mask
116116

117117
cpdef get_item(self, int8_t val)
@@ -121,7 +121,7 @@ cdef class Int8HashTable(HashTable):
121121

122122
cdef class Float64HashTable(HashTable):
123123
cdef kh_float64_t *table
124-
cdef int64_t na_position
124+
cdef Py_ssize_t na_position
125125
cdef bint uses_mask
126126

127127
cpdef get_item(self, float64_t val)
@@ -131,7 +131,7 @@ cdef class Float64HashTable(HashTable):
131131

132132
cdef class Float32HashTable(HashTable):
133133
cdef kh_float32_t *table
134-
cdef int64_t na_position
134+
cdef Py_ssize_t na_position
135135
cdef bint uses_mask
136136

137137
cpdef get_item(self, float32_t val)
@@ -141,7 +141,7 @@ cdef class Float32HashTable(HashTable):
141141

142142
cdef class Complex64HashTable(HashTable):
143143
cdef kh_complex64_t *table
144-
cdef int64_t na_position
144+
cdef Py_ssize_t na_position
145145
cdef bint uses_mask
146146

147147
cpdef get_item(self, complex64_t val)
@@ -151,7 +151,7 @@ cdef class Complex64HashTable(HashTable):
151151

152152
cdef class Complex128HashTable(HashTable):
153153
cdef kh_complex128_t *table
154-
cdef int64_t na_position
154+
cdef Py_ssize_t na_position
155155
cdef bint uses_mask
156156

157157
cpdef get_item(self, complex128_t val)

pandas/_libs/hashtable_class_helper.pxi.in

+3-3
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ cdef class {{name}}HashTable(HashTable):
535535
int ret = 0
536536
{{c_type}} val
537537
khiter_t k
538-
int8_t na_position = self.na_position
538+
Py_ssize_t na_position = self.na_position
539539

540540
if self.uses_mask and mask is None:
541541
raise NotImplementedError # pragma: no cover
@@ -567,7 +567,7 @@ cdef class {{name}}HashTable(HashTable):
567567
Int64Vector self_locs = Int64Vector()
568568
Int64VectorData *l
569569
Int64VectorData *sl
570-
int8_t na_position = self.na_position
570+
Py_ssize_t na_position = self.na_position
571571

572572
l = &locs.data
573573
sl = &self_locs.data
@@ -609,7 +609,7 @@ cdef class {{name}}HashTable(HashTable):
609609
{{c_type}} val
610610
khiter_t k
611611
intp_t[::1] locs = np.empty(n, dtype=np.intp)
612-
int8_t na_position = self.na_position
612+
Py_ssize_t na_position = self.na_position
613613

614614
if self.uses_mask and mask is None:
615615
raise NotImplementedError # pragma: no cover

pandas/_libs/lib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ def has_only_ints_or_nan(const floating[:] arr) -> bool:
502502
return True
503503

504504

505-
def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len):
505+
def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, intp_t max_len):
506506
cdef:
507507
Py_ssize_t i, n = len(indices)
508508
intp_t k, vstart, vlast, v

pandas/_libs/tslibs/timedeltas.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -1740,7 +1740,8 @@ cdef class _Timedelta(timedelta):
17401740
Format the Timedelta as ISO 8601 Duration.
17411741

17421742
``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the
1743-
values. See https://en.wikipedia.org/wiki/ISO_8601#Durations.
1743+
values. See Wikipedia:
1744+
`ISO 8601 § Durations <https://en.wikipedia.org/wiki/ISO_8601#Durations>`_.
17441745

17451746
Returns
17461747
-------

pandas/_libs/tslibs/timestamps.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1309,7 +1309,7 @@ cdef class _Timestamp(ABCTimestamp):
13091309
By default, the fractional part is omitted if self.microsecond == 0
13101310
and self._nanosecond == 0.
13111311

1312-
If self.tzinfo is not None, the UTC offset is also attached, giving
1312+
If self.tzinfo is not None, the UTC offset is also attached,
13131313
giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmmnnn+HH:MM'.
13141314

13151315
Parameters

pandas/core/algorithms.py

+6
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,8 @@ def map_array(
16471647
If the function returns a tuple with more than one element
16481648
a MultiIndex will be returned.
16491649
"""
1650+
from pandas import Index
1651+
16501652
if na_action not in (None, "ignore"):
16511653
msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
16521654
raise ValueError(msg)
@@ -1676,6 +1678,10 @@ def map_array(
16761678

16771679
if len(mapper) == 0:
16781680
mapper = Series(mapper, dtype=np.float64)
1681+
elif isinstance(mapper, dict):
1682+
mapper = Series(
1683+
mapper.values(), index=Index(mapper.keys(), tupleize_cols=False)
1684+
)
16791685
else:
16801686
mapper = Series(mapper)
16811687

pandas/core/frame.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -10286,7 +10286,9 @@ def apply(
1028610286
either the DataFrame's index (``axis=0``) or the DataFrame's columns
1028710287
(``axis=1``). By default (``result_type=None``), the final return type
1028810288
is inferred from the return type of the applied function. Otherwise,
10289-
it depends on the `result_type` argument.
10289+
it depends on the `result_type` argument. The return type of the applied
10290+
function is inferred based on the first computed result obtained after
10291+
applying the function to a Series object.
1029010292
1029110293
Parameters
1029210294
----------

pandas/core/indexes/multi.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Sequence,
1010
)
1111
from functools import wraps
12+
from itertools import zip_longest
1213
from sys import getsizeof
1314
from typing import (
1415
TYPE_CHECKING,
@@ -588,7 +589,7 @@ def from_tuples(
588589
elif isinstance(tuples, list):
589590
arrays = list(lib.to_object_array_tuples(tuples).T)
590591
else:
591-
arrs = zip(*tuples)
592+
arrs = zip_longest(*tuples, fillvalue=np.nan)
592593
arrays = cast(list[Sequence[Hashable]], arrs)
593594

594595
return cls.from_arrays(arrays, sortorder=sortorder, names=names)

pandas/core/methods/describe.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -345,14 +345,14 @@ def _refine_percentiles(
345345
percentiles : list-like of numbers, optional
346346
The percentiles to include in the output.
347347
"""
348-
# Fix for issue #60550 :
348+
349349
from pandas import Series
350350

351351
if percentiles is None:
352352
return np.array([0.25, 0.5, 0.75])
353353

354-
# Fix for issue #60550 :
355-
if isinstance(percentiles, (list, np.ndarray, Series)) and len(percentiles) == 0:
354+
# Handling empty list , empty numpy array and empty Series :
355+
elif isinstance(percentiles, (list, np.ndarray, Series)) and len(percentiles) == 0:
356356
return np.array([])
357357

358358
# explicit conversion of `percentiles` to list

pandas/core/reshape/concat.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -477,18 +477,23 @@ def _sanitize_mixed_ndim(
477477

478478
else:
479479
name = getattr(obj, "name", None)
480+
rename_columns = False
480481
if ignore_index or name is None:
481482
if axis == 1:
482483
# doing a row-wise concatenation so need everything
483484
# to line up
484-
name = 0
485+
if name is None:
486+
name = 0
487+
rename_columns = True
485488
else:
486489
# doing a column-wise concatenation so need series
487490
# to have unique names
488-
name = current_column
489-
current_column += 1
491+
if name is None:
492+
rename_columns = True
493+
name = current_column
494+
current_column += 1
490495
obj = sample._constructor(obj, copy=False)
491-
if isinstance(obj, ABCDataFrame):
496+
if isinstance(obj, ABCDataFrame) and rename_columns:
492497
obj.columns = range(name, name + 1, 1)
493498
else:
494499
obj = sample._constructor({name: obj}, copy=False)

pandas/tests/indexes/multi/test_constructors.py

+13
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,19 @@ def test_from_tuples_with_tuple_label():
410410
tm.assert_frame_equal(expected, result)
411411

412412

413+
@pytest.mark.parametrize(
414+
"keys, expected",
415+
[
416+
((("l1",), ("l1", "l2")), (("l1", np.nan), ("l1", "l2"))),
417+
((("l1", "l2"), ("l1",)), (("l1", "l2"), ("l1", np.nan))),
418+
],
419+
)
420+
def test_from_tuples_with_various_tuple_lengths(keys, expected):
421+
# GH 60695
422+
idx = MultiIndex.from_tuples(keys)
423+
assert tuple(idx) == expected
424+
425+
413426
# ----------------------------------------------------------------------------
414427
# from_product
415428
# ----------------------------------------------------------------------------

pandas/tests/libs/test_hashtable.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -149,18 +149,19 @@ def test_map_locations(self, table_type, dtype, writable):
149149
def test_map_locations_mask(self, table_type, dtype, writable):
150150
if table_type == ht.PyObjectHashTable:
151151
pytest.skip("Mask not supported for object")
152-
N = 3
152+
N = 129 # must be > 128 to test GH#58924
153153
table = table_type(uses_mask=True)
154154
keys = (np.arange(N) + N).astype(dtype)
155155
keys.flags.writeable = writable
156-
table.map_locations(keys, np.array([False, False, True]))
156+
mask = np.concatenate([np.repeat(False, N - 1), [True]], axis=0)
157+
table.map_locations(keys, mask)
157158
for i in range(N - 1):
158159
assert table.get_item(keys[i]) == i
159160

160161
with pytest.raises(KeyError, match=re.escape(str(keys[N - 1]))):
161162
table.get_item(keys[N - 1])
162163

163-
assert table.get_na() == 2
164+
assert table.get_na() == N - 1
164165

165166
def test_lookup(self, table_type, dtype, writable):
166167
N = 3

0 commit comments

Comments
 (0)