Merge branch 'main' into issue-60550-fix-v2

Abhibhav2003 · web-flow · commit d37419c78234 · 2025-03-08T11:48:29.000+05:30
diff --git a/doc/source/getting_started/overview.rst b/doc/source/getting_started/overview.rst
@@ -174,3 +174,4 @@ License
 -------
 
 .. literalinclude:: ../../../LICENSE
+   :language: none
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -694,8 +694,10 @@ Interval
 Indexing
 ^^^^^^^^
 - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
+- Bug in :meth:`DataFrame.__getitem__` when slicing a :class:`DataFrame` with many rows raised an ``OverflowError`` (:issue:`59531`)
 - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`)
 - Bug in :meth:`DataFrame.loc` with inconsistent behavior of loc-set with 2 given indexes to Series (:issue:`59933`)
+- Bug in :meth:`Index.get_indexer` and similar methods when ``NaN`` is located at or after position 128 (:issue:`58924`)
 - Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`)
 - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`)
 
@@ -712,7 +714,7 @@ MultiIndex
 - :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
 - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
 - Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
--
+- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
 
 I/O
 ^^^
@@ -787,6 +789,7 @@ Reshaping
 - Bug in :meth:`DataFrame.pivot_table` incorrectly subaggregating results when called without an ``index`` argument (:issue:`58722`)
 - Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
+- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd
@@ -41,7 +41,7 @@ cdef class HashTable:
 
 cdef class UInt64HashTable(HashTable):
     cdef kh_uint64_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, uint64_t val)
@@ -51,7 +51,7 @@ cdef class UInt64HashTable(HashTable):
 
 cdef class Int64HashTable(HashTable):
     cdef kh_int64_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, int64_t val)
@@ -61,7 +61,7 @@ cdef class Int64HashTable(HashTable):
 
 cdef class UInt32HashTable(HashTable):
     cdef kh_uint32_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, uint32_t val)
@@ -71,7 +71,7 @@ cdef class UInt32HashTable(HashTable):
 
 cdef class Int32HashTable(HashTable):
     cdef kh_int32_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, int32_t val)
@@ -81,7 +81,7 @@ cdef class Int32HashTable(HashTable):
 
 cdef class UInt16HashTable(HashTable):
     cdef kh_uint16_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, uint16_t val)
@@ -91,7 +91,7 @@ cdef class UInt16HashTable(HashTable):
 
 cdef class Int16HashTable(HashTable):
     cdef kh_int16_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, int16_t val)
@@ -101,7 +101,7 @@ cdef class Int16HashTable(HashTable):
 
 cdef class UInt8HashTable(HashTable):
     cdef kh_uint8_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, uint8_t val)
@@ -111,7 +111,7 @@ cdef class UInt8HashTable(HashTable):
 
 cdef class Int8HashTable(HashTable):
     cdef kh_int8_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, int8_t val)
@@ -121,7 +121,7 @@ cdef class Int8HashTable(HashTable):
 
 cdef class Float64HashTable(HashTable):
     cdef kh_float64_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, float64_t val)
@@ -131,7 +131,7 @@ cdef class Float64HashTable(HashTable):
 
 cdef class Float32HashTable(HashTable):
     cdef kh_float32_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, float32_t val)
@@ -141,7 +141,7 @@ cdef class Float32HashTable(HashTable):
 
 cdef class Complex64HashTable(HashTable):
     cdef kh_complex64_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, complex64_t val)
@@ -151,7 +151,7 @@ cdef class Complex64HashTable(HashTable):
 
 cdef class Complex128HashTable(HashTable):
     cdef kh_complex128_t *table
-    cdef int64_t na_position
+    cdef Py_ssize_t na_position
     cdef bint uses_mask
 
     cpdef get_item(self, complex128_t val)
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -535,7 +535,7 @@ cdef class {{name}}HashTable(HashTable):
             int ret = 0
             {{c_type}} val
             khiter_t k
-            int8_t na_position = self.na_position
+            Py_ssize_t na_position = self.na_position
 
         if self.uses_mask and mask is None:
             raise NotImplementedError  # pragma: no cover
@@ -567,7 +567,7 @@ cdef class {{name}}HashTable(HashTable):
             Int64Vector self_locs = Int64Vector()
             Int64VectorData *l
             Int64VectorData *sl
-            int8_t na_position = self.na_position
+            Py_ssize_t na_position = self.na_position
 
         l = &locs.data
         sl = &self_locs.data
@@ -609,7 +609,7 @@ cdef class {{name}}HashTable(HashTable):
             {{c_type}} val
             khiter_t k
             intp_t[::1] locs = np.empty(n, dtype=np.intp)
-            int8_t na_position = self.na_position
+            Py_ssize_t na_position = self.na_position
 
         if self.uses_mask and mask is None:
             raise NotImplementedError  # pragma: no cover
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -502,7 +502,7 @@ def has_only_ints_or_nan(const floating[:] arr) -> bool:
     return True
 
 
-def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len):
+def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, intp_t max_len):
     cdef:
         Py_ssize_t i, n = len(indices)
         intp_t k, vstart, vlast, v
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1740,7 +1740,8 @@ cdef class _Timedelta(timedelta):
         Format the Timedelta as ISO 8601 Duration.
 
         ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the
-        values. See https://en.wikipedia.org/wiki/ISO_8601#Durations.
+        values. See Wikipedia:
+        `ISO 8601 § Durations <https://en.wikipedia.org/wiki/ISO_8601#Durations>`_.
 
         Returns
         -------
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -1309,7 +1309,7 @@ cdef class _Timestamp(ABCTimestamp):
         By default, the fractional part is omitted if self.microsecond == 0
         and self._nanosecond == 0.
 
-        If self.tzinfo is not None, the UTC offset is also attached, giving
+        If self.tzinfo is not None, the UTC offset is also attached,
         giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmmnnn+HH:MM'.
 
         Parameters
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1647,6 +1647,8 @@ def map_array(
         If the function returns a tuple with more than one element
         a MultiIndex will be returned.
     """
+    from pandas import Index
+
     if na_action not in (None, "ignore"):
         msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
         raise ValueError(msg)
@@ -1676,6 +1678,10 @@ def map_array(
 
             if len(mapper) == 0:
                 mapper = Series(mapper, dtype=np.float64)
+            elif isinstance(mapper, dict):
+                mapper = Series(
+                    mapper.values(), index=Index(mapper.keys(), tupleize_cols=False)
+                )
             else:
                 mapper = Series(mapper)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -10286,7 +10286,9 @@ def apply(
         either the DataFrame's index (``axis=0``) or the DataFrame's columns
         (``axis=1``). By default (``result_type=None``), the final return type
         is inferred from the return type of the applied function. Otherwise,
-        it depends on the `result_type` argument.
+        it depends on the `result_type` argument. The return type of the applied
+        function is inferred based on the first computed result obtained after
+        applying the function to a Series object.
 
         Parameters
         ----------
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -9,6 +9,7 @@
     Sequence,
 )
 from functools import wraps
+from itertools import zip_longest
 from sys import getsizeof
 from typing import (
     TYPE_CHECKING,
@@ -588,7 +589,7 @@ def from_tuples(
         elif isinstance(tuples, list):
             arrays = list(lib.to_object_array_tuples(tuples).T)
         else:
-            arrs = zip(*tuples)
+            arrs = zip_longest(*tuples, fillvalue=np.nan)
             arrays = cast(list[Sequence[Hashable]], arrs)
 
         return cls.from_arrays(arrays, sortorder=sortorder, names=names)
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
@@ -477,18 +477,23 @@ def _sanitize_mixed_ndim(
 
         else:
             name = getattr(obj, "name", None)
+            rename_columns = False
             if ignore_index or name is None:
                 if axis == 1:
                     # doing a row-wise concatenation so need everything
                     # to line up
-                    name = 0
+                    if name is None:
+                        name = 0
+                        rename_columns = True
                 else:
                     # doing a column-wise concatenation so need series
                     # to have unique names
-                    name = current_column
-                    current_column += 1
+                    if name is None:
+                        rename_columns = True
+                        name = current_column
+                        current_column += 1
                 obj = sample._constructor(obj, copy=False)
-                if isinstance(obj, ABCDataFrame):
+                if isinstance(obj, ABCDataFrame) and rename_columns:
                     obj.columns = range(name, name + 1, 1)
             else:
                 obj = sample._constructor({name: obj}, copy=False)
diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py
@@ -410,6 +410,19 @@ def test_from_tuples_with_tuple_label():
     tm.assert_frame_equal(expected, result)
 
 
+@pytest.mark.parametrize(
+    "keys, expected",
+    [
+        ((("l1",), ("l1", "l2")), (("l1", np.nan), ("l1", "l2"))),
+        ((("l1", "l2"), ("l1",)), (("l1", "l2"), ("l1", np.nan))),
+    ],
+)
+def test_from_tuples_with_various_tuple_lengths(keys, expected):
+    # GH 60695
+    idx = MultiIndex.from_tuples(keys)
+    assert tuple(idx) == expected
+
+
 # ----------------------------------------------------------------------------
 # from_product
 # ----------------------------------------------------------------------------
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
@@ -149,18 +149,19 @@ def test_map_locations(self, table_type, dtype, writable):
     def test_map_locations_mask(self, table_type, dtype, writable):
         if table_type == ht.PyObjectHashTable:
             pytest.skip("Mask not supported for object")
-        N = 3
+        N = 129  # must be > 128 to test GH#58924
         table = table_type(uses_mask=True)
         keys = (np.arange(N) + N).astype(dtype)
         keys.flags.writeable = writable
-        table.map_locations(keys, np.array([False, False, True]))
+        mask = np.concatenate([np.repeat(False, N - 1), [True]], axis=0)
+        table.map_locations(keys, mask)
         for i in range(N - 1):
             assert table.get_item(keys[i]) == i
 
         with pytest.raises(KeyError, match=re.escape(str(keys[N - 1]))):
             table.get_item(keys[N - 1])
 
-        assert table.get_na() == 2
+        assert table.get_na() == N - 1
 
     def test_lookup(self, table_type, dtype, writable):
         N = 3
diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py
@@ -326,6 +326,8 @@ def test_concat_mixed_objs_index(self):
     def test_concat_mixed_objs_index_names(self):
         # Test row-wise concat for mixed series/frames with distinct names
         # GH2385, GH15047
+        # GH #60723 & GH #56257 (Updated the test case,
+        # as the above GH PR ones were incorrect)
 
         index = date_range("01-Jan-2013", periods=10, freq="h")
         arr = np.arange(10, dtype="int64")
@@ -341,8 +343,11 @@ def test_concat_mixed_objs_index_names(self):
         result = concat([s1, df, s2])
         tm.assert_frame_equal(result, expected)
 
-        # Rename all series to 0 when ignore_index=True
-        expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0])
+        expected = DataFrame(
+            np.kron(np.where(np.identity(3) == 1, 1, np.nan), arr).T,
+            index=np.arange(30, dtype=np.int64),
+            columns=["foo", 0, "bar"],
+        )
         result = concat([s1, df, s2], ignore_index=True)
         tm.assert_frame_equal(result, expected)
 
@@ -943,3 +948,56 @@ def test_concat_with_moot_ignore_index_and_keys():
     msg = f"Cannot set {ignore_index=} and specify keys. Either should be used."
     with pytest.raises(ValueError, match=msg):
         concat([df1, df2], keys=keys, ignore_index=ignore_index)
+
+
+@pytest.mark.parametrize(
+    "inputs, ignore_index, axis, expected",
+    [
+        # Concatenating DataFrame and named Series without ignore_index
+        (
+            [DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5], name="c")],
+            False,
+            0,
+            DataFrame(
+                {
+                    "a": [0, 1, None, None],
+                    "b": [2, 3, None, None],
+                    "c": [None, None, 4, 5],
+                },
+                index=[0, 1, 0, 1],
+            ),
+        ),
+        # Concatenating DataFrame and named Series with ignore_index
+        (
+            [DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5], name="c")],
+            True,
+            0,
+            DataFrame(
+                {
+                    "a": [0, 1, None, None],
+                    "b": [2, 3, None, None],
+                    "c": [None, None, 4, 5],
+                },
+                index=[0, 1, 2, 3],
+            ),
+        ),
+        # Concatenating DataFrame and unnamed Series along columns
+        (
+            [DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5]), Series([4, 5])],
+            False,
+            1,
+            DataFrame({"a": [0, 1], "b": [2, 3], 0: [4, 5], 1: [4, 5]}, index=[0, 1]),
+        ),
+        # Concatenating DataFrame and unnamed Series along columns with ignore_index
+        (
+            [DataFrame({"a": [0, 1], "b": [2, 3]}), Series([4, 5]), Series([4, 5])],
+            True,
+            1,
+            DataFrame({0: [0, 1], 1: [2, 3], 2: [4, 5], 3: [4, 5]}, index=[0, 1]),
+        ),
+    ],
+)
+def test_concat_of_series_and_frame(inputs, ignore_index, axis, expected):
+    # GH #60723 and #56257
+    result = concat(inputs, ignore_index=ignore_index, axis=axis)
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py

Original file line number	Diff line number	Diff line change
`@@ -174,3 +174,4 @@ License`
`174`	`174`	`-------`
`175`	`175`
`176`	`176`	`.. literalinclude:: ../../../LICENSE`
	`177`	`+ :language: none`