pandas-dev · mroeschke · Dec 5, 2023 · Nov 30, 2023 · Nov 30, 2023 · Nov 30, 2023
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -420,6 +420,7 @@ MultiIndex
 
 I/O
 ^^^
+- Bug in :func:`read_csv` where ``engine="python"`` was causing a ``TypeError`` when a callable skiprows and a chunk size was specified. (:issue:`55677`)
 - Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
 - Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
 - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -1117,18 +1117,33 @@ def _get_lines(self, rows: int | None = None) -> list[list[Scalar]]:
                 new_rows = []
                 try:
                     if rows is not None:
-                        rows_to_skip = 0
-                        if self.skiprows is not None and self.pos is not None:
-                            # Only read additional rows if pos is in skiprows
-                            rows_to_skip = len(
-                                set(self.skiprows) - set(range(self.pos))
-                            )
-
-                        for _ in range(rows + rows_to_skip):
-                            # assert for mypy, data is Iterator[str] or None, would
-                            # error in next
-                            assert self.data is not None
-                            new_rows.append(next(self.data))
+                        if callable(self.skiprows):
+                            row_index = 0
+                            row_ct = 0
+                            offset = self.pos if self.pos is not None else 0
+                            while row_ct < rows:
+                                # assert for mypy, data is Iterator[str] or None, would
+                                # error in next
+                                assert self.data is not None
+                                new_row = next(self.data)
+                                if not self.skipfunc(offset + row_index):
+                                    row_ct += 1
+                                row_index += 1
+                                new_rows.append(new_row)
+                        else:
+                            # Maintain legacy chunking behavior
+                            rows_to_skip = 0
+                            if self.skiprows is not None and self.pos is not None:
+                                # Only read additional rows if pos is in skiprows
+                                rows_to_skip = len(
+                                    set(self.skiprows) - set(range(self.pos))
+                                )
+
+                            for _ in range(rows + rows_to_skip):
+                                # assert for mypy, data is Iterator[str] or None, would
+                                # error in next
+                                assert self.data is not None
+                                new_rows.append(next(self.data))
 
                         len_new_rows = len(new_rows)
                         new_rows = self._remove_skipped_rows(new_rows)
@@ -1137,11 +1152,11 @@ def _get_lines(self, rows: int | None = None) -> list[list[Scalar]]:
                         rows = 0
 
                         while True:
-                            new_row = self._next_iter_line(row_num=self.pos + rows + 1)
+                            next_row = self._next_iter_line(row_num=self.pos + rows + 1)
                             rows += 1
 
-                            if new_row is not None:
-                                new_rows.append(new_row)
+                            if next_row is not None:
+                                new_rows.append(next_row)
                         len_new_rows = len(new_rows)
 
                 except StopIteration:

diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py
@@ -301,3 +301,31 @@ def test_skip_rows_and_n_rows(all_parsers):
     result = parser.read_csv(StringIO(data), nrows=5, skiprows=[2, 4, 6])
     expected = DataFrame({"a": [1, 3, 5, 7, 8], "b": ["a", "c", "e", "g", "h"]})
     tm.assert_frame_equal(result, expected)
+
+
+@xfail_pyarrow
+def test_skip_rows_with_chunks(all_parsers):
+    # GH 55677
+    data = """col_a
+10
+20
+30
+40
+50
+60
+70
+80
+90
+100
+"""
+    parser = all_parsers
+    reader = parser.read_csv(
+        StringIO(data), engine=parser, skiprows=lambda x: x in [1, 4, 5], chunksize=4
+    )
+    df1 = next(reader)
+    df2 = next(reader)
+
+    tm.assert_frame_equal(
+        df1, DataFrame({"col_a": [20, 30, 60, 70]}, index=[0, 1, 2, 3])
+    )
+    tm.assert_frame_equal(df2, DataFrame({"col_a": [80, 90, 100]}, index=[4, 5, 6]))