Deprecate positional access for label based indexes in Series.__getitem__ (#14654)

galipremsagar · web-flow · commit 4539f4f83b42 · 2023-12-18T19:41:53.000-06:00
This PR deprecates positional access in `Series.__getitem__` when a label-based index is present. xref: pandas-dev/pandas#53201 On `pandas_2.0_feature_branch`: ``` = 260 failed, 101179 passed, 2091 skipped, 954 xfailed, 312 xpassed in 1104.58s (0:18:24) = ``` This PR: ``` = 248 failed, 101190 passed, 2091 skipped, 954 xfailed, 312 xpassed in 1105.78s (0:18:25) = ```
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
@@ -371,6 +371,12 @@ def _loc_to_iloc(self, arg):
             arg = arg[0]
         if _is_scalar_or_zero_d_array(arg):
             index_dtype = self._frame.index.dtype
+            warn_msg = (
+                "Series.__getitem__ treating keys as positions is deprecated. "
+                "In a future version, integer keys will always be treated "
+                "as labels (consistent with DataFrame behavior). To access "
+                "a value by position, use `ser.iloc[pos]`"
+            )
             if not _is_non_decimal_numeric_dtype(index_dtype) and not (
                 isinstance(index_dtype, cudf.CategoricalDtype)
                 and is_integer_dtype(index_dtype.categories.dtype)
@@ -379,11 +385,13 @@ def _loc_to_iloc(self, arg):
                 if isinstance(arg, cudf.Scalar) and is_integer_dtype(
                     arg.dtype
                 ):
-                    found_index = arg.value
-                    return found_index
+                    # Do not remove until pandas 3.0 support is added.
+                    warnings.warn(warn_msg, FutureWarning)
+                    return arg.value
                 elif is_integer(arg):
-                    found_index = arg
-                    return found_index
+                    # Do not remove until pandas 3.0 support is added.
+                    warnings.warn(warn_msg, FutureWarning)
+                    return arg
             try:
                 indices = self._frame.index._indices_of(arg)
                 if (n := len(indices)) == 0:
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
@@ -595,12 +595,12 @@ def test_csv_reader_NaN_values():
         header=None,
         na_values=custom_na_values,
     )
-    assert gdf.dtypes[0] == "int8"
+    assert gdf.dtypes.iloc[0] == "int8"
     assert all(gdf["0"][idx] is cudf.NA for idx in range(len(gdf["0"])))
 
     # data type detection should evaluate the column to object if some nulls
     gdf = read_csv(StringIO(all_cells), header=None)
-    assert gdf.dtypes[0] == np.dtype("object")
+    assert gdf.dtypes.iloc[0] == np.dtype("object")
 
 
 def test_csv_reader_thousands(tmpdir):
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
@@ -9,11 +9,13 @@
 import pytest
 
 import cudf
+from cudf.core._compat import PANDAS_GE_210
 from cudf.testing import _utils as utils
 from cudf.testing._utils import (
     INTEGER_TYPES,
     assert_eq,
     assert_exceptions_equal,
+    expect_warning_if,
 )
 
 index_dtypes = INTEGER_TYPES
@@ -151,8 +153,10 @@ def test_series_get_item_iloc_defer(arg):
     ps = pd.Series([1, 2, 3], index=pd.Index(["a", "b", "c"]))
     gs = cudf.from_pandas(ps)
 
-    expect = ps[arg]
-    got = gs[arg]
+    with expect_warning_if(PANDAS_GE_210 and not isinstance(arg, str)):
+        expect = ps[arg]
+    with expect_warning_if(not isinstance(arg, str)):
+        got = gs[arg]
 
     assert_eq(expect, got)
 
@@ -163,7 +167,7 @@ def test_series_iloc_defer_cudf_scalar():
 
     for t in index_dtypes:
         arg = cudf.Scalar(1, dtype=t)
-        got = gs[arg]
+        got = gs.iloc[arg]
         expect = 2
         assert_eq(expect, got)