From 53a17688a4d36af67bea3e8b2623072250851454 Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Fri, 20 Aug 2021 10:48:03 +0530
Subject: [PATCH 01/13] BUG: Throw a ParserError when header rows have unequal
 column counts (GH43102)

---
 pandas/io/parsers/base_parser.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 5714bbab016c8..25158de1f3943 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -341,6 +341,11 @@ def _extract_multi_indexer_columns(
         # extract the columns
         field_count = len(header[0])
 
+        #check if header lengths are equal
+        for l in range(len(header)):
+            if len(header[l])!=field_count:
+                raise ParserError(f"Header rows must have equal number of columns. Mismatch found at row " + str(l))
+            
         def extract(r):
             return tuple(r[i] for i in range(field_count) if i not in sic)
 

From 128b4e3c01256287945a430bd99a29bab2e20459 Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Fri, 20 Aug 2021 11:19:23 +0530
Subject: [PATCH 02/13] BUG: Throw a ParserError when header rows have unequal
 column counts. Updated to comply with PEP8 (GH43102)

---
 pandas/io/parsers/base_parser.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 25158de1f3943..a228e2e9dc478 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -341,11 +341,12 @@ def _extract_multi_indexer_columns(
         # extract the columns
         field_count = len(header[0])
 
-        #check if header lengths are equal
-        for l in range(len(header)):
-            if len(header[l])!=field_count:
-                raise ParserError(f"Header rows must have equal number of columns. Mismatch found at row " + str(l))
-            
+        # check if header lengths are equal
+        for header_len in range(len(header)):
+            if len(header[header_len]) != field_count:
+                raise ParserError("Header rows must have equal number of columns."
+                                  f" Mismatch found at row {header_len}")
+
         def extract(r):
             return tuple(r[i] for i in range(field_count) if i not in sic)
 

From 95bac98b2ba892e3e90a152446046080a02f5c3f Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Fri, 20 Aug 2021 22:00:27 +0530
Subject: [PATCH 03/13] Added Test. (GH43102)

---
 pandas/io/parsers/base_parser.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index a228e2e9dc478..df68a180afce3 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -342,10 +342,12 @@ def _extract_multi_indexer_columns(
         field_count = len(header[0])
 
         # check if header lengths are equal
-        for header_len in range(len(header)):
-            if len(header[header_len]) != field_count:
-                raise ParserError("Header rows must have equal number of columns."
-                                  f" Mismatch found at row {header_len}")
+        for header_iter in range(len(header)):
+            if len(header[header_iter]) != field_count:
+                raise ParserError(
+                    "Header rows must have equal number of columns. "
+                    f"Mismatch found at header {header_iter}."
+                )
 
         def extract(r):
             return tuple(r[i] for i in range(field_count) if i not in sic)

From 10422a87d3ecf1a5d1d29422a85f7ab7ee5f5b82 Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Fri, 20 Aug 2021 22:06:40 +0530
Subject: [PATCH 04/13] Added Test. (GH43102)

---
 pandas/tests/io/parser/test_header.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index 3b814360d3aa4..8c93e40d11a8f 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -585,3 +585,20 @@ def test_read_csv_multiindex_columns(all_parsers):
     tm.assert_frame_equal(df1, expected.iloc[:1])
     df2 = parser.read_csv(StringIO(s2), header=[0, 1])
     tm.assert_frame_equal(df2, expected)
+
+
+def test_read_csv_multi_header_length_check(all_parsers):
+    # GH#43102
+    parser = all_parsers
+
+    case = """row11,row12,row13
+row21,row22, row23
+row31,row32
+"""
+
+    with pytest.raises(
+        ValueError,
+        match="Header rows must have equal number of columns. "
+        "Mismatch found at header 1.",
+    ):
+        parser.read_csv(StringIO(case), sep=",", header=[0, 2])

From 658c291bf4458c0b728cd9cbe2b270ac503b8e8a Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Fri, 20 Aug 2021 22:19:56 +0530
Subject: [PATCH 05/13] Added Test. (GH43102)

---
 pandas/tests/io/parser/test_header.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index 8c93e40d11a8f..e902cab485b35 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -597,7 +597,7 @@ def test_read_csv_multi_header_length_check(all_parsers):
 """
 
     with pytest.raises(
-        ValueError,
+        ParserError,
         match="Header rows must have equal number of columns. "
         "Mismatch found at header 1.",
     ):

From a02d476a626889314879502f4626b15087233210 Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Sat, 21 Aug 2021 00:11:48 +0530
Subject: [PATCH 06/13] Added Changes. (GH43102)

---
 pandas/io/parsers/base_parser.py      | 8 ++------
 pandas/tests/io/parser/test_header.py | 6 ++----
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index df68a180afce3..34c51632bff2d 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -342,12 +342,8 @@ def _extract_multi_indexer_columns(
         field_count = len(header[0])
 
         # check if header lengths are equal
-        for header_iter in range(len(header)):
-            if len(header[header_iter]) != field_count:
-                raise ParserError(
-                    "Header rows must have equal number of columns. "
-                    f"Mismatch found at header {header_iter}."
-                )
+        if not all(len(header_iter) == field_count for header_iter in header[1:]):
+            raise ParserError("Header rows must have an equal number of columns.")
 
         def extract(r):
             return tuple(r[i] for i in range(field_count) if i not in sic)
diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index e902cab485b35..2a8f41a3f775e 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -597,8 +597,6 @@ def test_read_csv_multi_header_length_check(all_parsers):
 """
 
     with pytest.raises(
-        ParserError,
-        match="Header rows must have equal number of columns. "
-        "Mismatch found at header 1.",
+        ParserError, match="Header rows must have an equal number of columns."
     ):
-        parser.read_csv(StringIO(case), sep=",", header=[0, 2])
+        parser.read_csv(StringIO(case), header=[0, 2])

From 5f534ea8f2eeab7b07b9d6972ef0b45c2493c70d Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Mon, 23 Aug 2021 00:12:59 +0530
Subject: [PATCH 07/13] Added whatsnew

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 205a49e7786a7..6649467e2cb9f 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -319,7 +319,7 @@ I/O
 - Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`)
 - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
 - Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`)
--
+- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raises an ``IndexError`` (:issue:`43102`)
 
 Period
 ^^^^^^

From 5239ece71cdc8abf22b4d0788ebb4129c4c0fd5c Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Mon, 23 Aug 2021 02:16:34 +0530
Subject: [PATCH 08/13] Added whatsnew

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 6649467e2cb9f..729a3bbb73057 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -319,7 +319,7 @@ I/O
 - Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`)
 - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
 - Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`)
-- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raises an ``IndexError`` (:issue:`43102`)
+- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
 
 Period
 ^^^^^^

From 863e9960dc9bd204389e4daeb575d12079f54473 Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Tue, 24 Aug 2021 00:16:29 +0530
Subject: [PATCH 09/13] Test without whatsnew

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 729a3bbb73057..205a49e7786a7 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -319,7 +319,7 @@ I/O
 - Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`)
 - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
 - Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`)
-- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
+-
 
 Period
 ^^^^^^

From 532e6cb8e0613cc35c300671572b787861525f02 Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Tue, 24 Aug 2021 01:16:14 +0530
Subject: [PATCH 10/13] Add whatsnew again

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 205a49e7786a7..729a3bbb73057 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -319,7 +319,7 @@ I/O
 - Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`)
 - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
 - Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`)
--
+- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
 
 Period
 ^^^^^^

From 1caf42db6d7ea1e4a689c4dd70745a8b0e32779b Mon Sep 17 00:00:00 2001
From: quantumalaviya <45961148+quantumalaviya@users.noreply.github.com>
Date: Sun, 5 Sep 2021 09:50:02 +0530
Subject: [PATCH 11/13] Update v1.4.0.rst

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index a0b1c4e80ab75..e2b42a0ea14f5 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -377,7 +377,7 @@ I/O
 - Bug in :func:`read_fwf`, where difference in lengths of ``colspecs`` and ``names`` was not raising ``ValueError`` (:issue:`40830`)
 - Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`)
 - Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
-- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
+-
 
 Period
 ^^^^^^

From 2ca6ccf8a0e6026528090ca5b846126507c438d2 Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Sun, 5 Sep 2021 10:05:51 +0530
Subject: [PATCH 12/13] Merge upstream

---
 doc/source/whatsnew/v1.4.0.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index a44825a78b1a0..52cbaa033e707 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -382,7 +382,8 @@ I/O
 - Bug in :func:`read_fwf`, where difference in lengths of ``colspecs`` and ``names`` was not raising ``ValueError`` (:issue:`40830`)
 - Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`)
 - Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
--
+- Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
+- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
 
 Period
 ^^^^^^

From 3f1fb3944bec76ede3adffae1a0f98cc0fba8939 Mon Sep 17 00:00:00 2001
From: Sarvagya Malaviya <malaviyasarvagya22@gmail.com>
Date: Sun, 5 Sep 2021 13:56:45 +0530
Subject: [PATCH 13/13] Skipping test on PyArrow

---
 pandas/tests/io/parser/test_header.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index af08c819baddd..d4b87070720d1 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -606,6 +606,7 @@ def test_read_csv_multiindex_columns(all_parsers):
     tm.assert_frame_equal(df2, expected)
 
 
+@skip_pyarrow
 def test_read_csv_multi_header_length_check(all_parsers):
     # GH#43102
     parser = all_parsers