From c2e91f86315a7b59abcee09aacf0fd2f8c2b04fa Mon Sep 17 00:00:00 2001
From: michal <michaltus@gmail.com>
Date: Sat, 18 Sep 2021 20:07:01 +0200
Subject: [PATCH 1/5] Fixes GH43540

---
 pandas/io/common.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index ba1cc82bfea56..628828466e757 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -877,7 +877,8 @@ def read(self, size: int = -1) -> str | bytes:
         if self.decode:
             # memory mapping is applied before compression. Encoding should
             # be applied to the de-compressed data.
-            return content.decode(self.encoding, errors=self.errors)
+            final: bool = len(content) == 0
+            return self.decoder.decode(content, final=final)
         return content
 
     def __next__(self) -> str:

From a900ef9e3a57b4d027775a26a43f63dfff870dad Mon Sep 17 00:00:00 2001
From: michal <michaltus@gmail.com>
Date: Sat, 18 Sep 2021 20:07:01 +0200
Subject: [PATCH 2/5] Revert "Fixes GH43540"

This reverts commit c2e91f86315a7b59abcee09aacf0fd2f8c2b04fa.
---
 pandas/io/common.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/io/common.py b/pandas/io/common.py
index 628828466e757..ba1cc82bfea56 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -877,8 +877,7 @@ def read(self, size: int = -1) -> str | bytes:
         if self.decode:
             # memory mapping is applied before compression. Encoding should
             # be applied to the de-compressed data.
-            final: bool = len(content) == 0
-            return self.decoder.decode(content, final=final)
+            return content.decode(self.encoding, errors=self.errors)
         return content
 
     def __next__(self) -> str:

From 7eaa1263b9d3435c111ed8708cbebc03d43aeec3 Mon Sep 17 00:00:00 2001
From: michal <michaltus@gmail.com>
Date: Tue, 21 Sep 2021 19:19:20 +0200
Subject: [PATCH 3/5] Fix GH43540

---
 doc/source/whatsnew/v1.3.4.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst
index 87b08fae52c15..e6475c75532e5 100644
--- a/doc/source/whatsnew/v1.3.4.rst
+++ b/doc/source/whatsnew/v1.3.4.rst
@@ -19,6 +19,7 @@ Fixed regressions
 - Fixed performance regression in :meth:`MultiIndex.equals` (:issue:`43549`)
 - Fixed regression in :meth:`Series.cat.reorder_categories` failing to update the categories on the ``Series`` (:issue:`43232`)
 - Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`)
+- Fixed regression in :meth:`pandas.read_csv` raising UnicodeDecodeError exception when memory_map=True (:issue:`43540`)
 -
 
 .. ---------------------------------------------------------------------------

From 61f13b8bd84ba2e5b72d66377f529a8474b4d657 Mon Sep 17 00:00:00 2001
From: michal <michaltus@gmail.com>
Date: Tue, 21 Sep 2021 19:48:33 +0200
Subject: [PATCH 4/5] Fix GH43540

---
 doc/source/whatsnew/v1.3.4.rst          |  2 +-
 pandas/io/common.py                     |  3 ++-
 pandas/tests/io/parser/test_encoding.py | 26 +++++++++++++++++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst
index e6475c75532e5..6212f2c6f3399 100644
--- a/doc/source/whatsnew/v1.3.4.rst
+++ b/doc/source/whatsnew/v1.3.4.rst
@@ -19,7 +19,7 @@ Fixed regressions
 - Fixed performance regression in :meth:`MultiIndex.equals` (:issue:`43549`)
 - Fixed regression in :meth:`Series.cat.reorder_categories` failing to update the categories on the ``Series`` (:issue:`43232`)
 - Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`)
-- Fixed regression in :meth:`pandas.read_csv` raising UnicodeDecodeError exception when memory_map=True (:issue:`43540`)
+- Fixed regression in :meth:`pandas.read_csv` raising ``UnicodeDecodeError`` exception when ``memory_map=True`` (:issue:`43540`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/io/common.py b/pandas/io/common.py
index ba1cc82bfea56..6dfddd571b88f 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -877,7 +877,8 @@ def read(self, size: int = -1) -> str | bytes:
         if self.decode:
             # memory mapping is applied before compression. Encoding should
             # be applied to the de-compressed data.
-            return content.decode(self.encoding, errors=self.errors)
+            final = size == -1 or len(content) < size
+            return self.decoder.decode(content, final=final)
         return content
 
     def __next__(self) -> str:
diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py
index 680c437f7087e..b269679bddeca 100644
--- a/pandas/tests/io/parser/test_encoding.py
+++ b/pandas/tests/io/parser/test_encoding.py
@@ -252,6 +252,32 @@ def test_encoding_memory_map(all_parsers, encoding):
     tm.assert_frame_equal(df, expected)
 
 
+@skip_pyarrow
+def test_chunk_splits_multibyte_char(all_parsers):
+    """
+    Chunk splits a multibyte character with memory_map=True
+
+    GH 43540
+    """
+    parser = all_parsers
+    # DEFAULT_CHUNKSIZE = 262144, defined in parsers.pyx
+    df = DataFrame(data=["a" * 127] * 2048)
+
+    # Put two-bytes utf-8 encoded character "ą" at the end of chunk
+    # utf-8 encoding of "ą" is b'\xc4\x85'
+    df.iloc[2047] = "a" * 127 + "ą"
+    with tm.ensure_clean("bug-gh43540.csv") as fname:
+        df.to_csv(
+            fname,
+            index=False,
+            header=False,
+            encoding="utf-8",
+            engine="c",
+        )
+        dfr = parser.read_csv(fname, header=None, memory_map=True)
+    tm.assert_frame_equal(dfr, df)
+
+
 def test_not_readable(all_parsers):
     # GH43439
     parser = all_parsers

From 3a262778b3c6759d23e61c47385e3b36c1d66cb8 Mon Sep 17 00:00:00 2001
From: michal <michaltus@gmail.com>
Date: Tue, 21 Sep 2021 20:17:07 +0200
Subject: [PATCH 5/5] Fix GH43540

---
 pandas/tests/io/parser/test_encoding.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py
index b269679bddeca..6ca3fdf9a6258 100644
--- a/pandas/tests/io/parser/test_encoding.py
+++ b/pandas/tests/io/parser/test_encoding.py
@@ -267,14 +267,8 @@ def test_chunk_splits_multibyte_char(all_parsers):
     # utf-8 encoding of "ą" is b'\xc4\x85'
     df.iloc[2047] = "a" * 127 + "ą"
     with tm.ensure_clean("bug-gh43540.csv") as fname:
-        df.to_csv(
-            fname,
-            index=False,
-            header=False,
-            encoding="utf-8",
-            engine="c",
-        )
-        dfr = parser.read_csv(fname, header=None, memory_map=True)
+        df.to_csv(fname, index=False, header=False, encoding="utf-8")
+        dfr = parser.read_csv(fname, header=None, memory_map=True, engine="c")
     tm.assert_frame_equal(dfr, df)