fix: #59950 handle duplicate column names in dataframe queries

miguelcsx · miguelcsx · commit a5bd34be0077 · 2024-10-04T22:01:13.000-05:00
- Fixed an issue where `Dataframe.query()` would throw an unexpected
  error

- The error was caused by `self.dtypes[k]`

- Adjusted the behavior to match the behavior prior to pandas version

- Added tests to ensure that `Dataframe.query()` works as expected
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -704,6 +704,7 @@ Other
 - Bug in :meth:`DataFrame.apply` where passing ``engine="numba"`` ignored ``args`` passed to the applied function (:issue:`58712`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`)
+- Bug in :meth:`DataFrame.query` where using duplicate column names led to a ``TypeError``. (:issue:`59950`)
 - Bug in :meth:`DataFrame.query` which raised an exception or produced incorrect results when expressions contained backtick-quoted column names containing the hash character ``#``, backticks, or characters that fall outside the ASCII range (U+0001..U+007F). (:issue:`59285`) (:issue:`49633`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
 - Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -603,9 +603,9 @@ def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]:
         dtypes = self.dtypes
         return {
             clean_column_name(k): Series(
-                v, copy=False, index=self.index, name=k, dtype=dtypes[k]
+                v, copy=False, index=self.index, name=k, dtype=dtype
             ).__finalize__(self)
-            for k, v in zip(self.columns, self._iter_column_arrays())
+            for k, v, dtype in zip(self.columns, self._iter_column_arrays(), dtypes)
             if not isinstance(k, int)
         }
 
diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
@@ -159,6 +159,25 @@ def test_query_empty_string(self):
         with pytest.raises(ValueError, match=msg):
             df.query("")
 
+    def test_query_duplicate_column_name(self, engine, parser):
+        df = DataFrame(
+            {
+                "A": range(3),
+                "B": range(3),
+                "C": range(3)
+            }
+        ).rename(columns={"B": "A"})
+
+        res = df.query('C == 1', engine=engine, parser=parser)
+
+        expect = DataFrame(
+            [[1, 1, 1]],
+            columns=["A", "A", "C"],
+            index=[1]
+        )
+
+        tm.assert_frame_equal(res, expect)
+
     def test_eval_resolvers_as_list(self):
         # GH 14095
         df = DataFrame(