From c38e1ce3ec8a4c2da9f7abe7b9c8fab4cdc9cb50 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Wed, 25 Dec 2019 12:42:24 +0200
Subject: [PATCH 1/7] CI: Added unwanted patterns check

---
 ci/code_checks.sh                        |  4 ++
 scripts/validate_string_concatenation.py | 73 ++++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100755 scripts/validate_string_concatenation.py

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 94eaab0a5b4da..462275d9b5bec 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -207,6 +207,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include=*.{py,pyx} 'xrange' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of not concatenated strings' ; echo $MSG
+    python ./scripts/validate_string_concatenation.py pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
     INVGREP_APPEND=" <- trailing whitespaces found"
     invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
diff --git a/scripts/validate_string_concatenation.py b/scripts/validate_string_concatenation.py
new file mode 100755
index 0000000000000..a262bbe583380
--- /dev/null
+++ b/scripts/validate_string_concatenation.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+"""
+Check where there is a string that needs to be concatenated.
+"""
+
+import os
+import sys
+import token
+import tokenize
+
+FILE_EXTENTIONS_TO_CHECK = [".py", ".pyx"]
+
+
+def main():
+    path = sys.argv[1]
+
+    if not os.path.exists(path):
+        raise ValueError("Please enter a valid path, to a file/directory.")
+
+    if os.path.isfile(path):
+        # Means that the given path is of a single file.
+        sys.exit(is_concatenated(path))
+
+    status_codes = set()
+    # Means that the given path is of a directory.
+    for subdir, _, files in os.walk(path):
+        for file_name in files:
+            ext = os.path.splitext(os.path.join(subdir, file_name))[1]
+            if ext in FILE_EXTENTIONS_TO_CHECK:
+                status_codes.add(is_concatenated(os.path.join(subdir, file_name)))
+
+    if 1 in status_codes:
+        sys.exit(1)
+
+    sys.exit(0)
+
+
+def is_concatenated(file_path):
+    """
+    Checking if the file containing strings that needs to be concatenated.
+
+    Parameters
+    ----------
+    file_path : str
+        File path pointing to a single file.
+
+    Returns
+    -------
+    int
+        Status code representing if the file needs a fix.
+        0 - All good.
+        1 - Needs to be fixed.
+    """
+    with open(file_path, "r") as file_name:
+        toks = list(tokenize.generate_tokens(file_name.readline))
+        for i in range(len(toks) - 1):
+            tok = toks[i]
+            tok2 = toks[i + 1]
+            if tok[0] == token.STRING and tok[0] == tok2[0]:
+                print(
+                    "{file_path}:{line_number}:\t{start} and {end}".format(
+                        file_path=file_path,
+                        line_number=tok[2][0],
+                        start=tok[1],
+                        end=tok2[1],
+                    )
+                )
+                return 1
+        return 0
+
+
+if __name__ == "__main__":
+    main()

From 207b20d3263974d0e29efb5e057eecdca72abcdb Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Wed, 25 Dec 2019 14:38:13 +0200
Subject: [PATCH 2/7] Removed unconcatenated strings

---
 pandas/core/arrays/datetimelike.py                 |  4 +---
 pandas/core/dtypes/cast.py                         |  8 ++------
 pandas/io/json/_json.py                            |  2 +-
 pandas/io/json/_table_schema.py                    |  8 +++-----
 pandas/tests/arithmetic/test_period.py             |  6 ++----
 pandas/tests/arrays/test_timedeltas.py             |  5 ++---
 pandas/tests/base/test_ops.py                      |  4 +---
 pandas/tests/frame/test_constructors.py            |  4 ++--
 pandas/tests/frame/test_missing.py                 |  2 +-
 pandas/tests/indexes/datetimes/test_tools.py       |  2 +-
 pandas/tests/indexing/test_loc.py                  |  4 +---
 pandas/tests/io/formats/test_to_csv.py             | 10 +++-------
 pandas/tests/io/json/test_pandas.py                |  2 +-
 pandas/tests/io/json/test_readlines.py             |  2 +-
 pandas/tests/io/json/test_ujson.py                 |  6 +++---
 pandas/tests/io/parser/test_common.py              |  5 ++---
 pandas/tests/io/parser/test_textreader.py          |  2 +-
 pandas/tests/io/pytables/test_store.py             |  2 +-
 pandas/tests/io/test_common.py                     |  8 ++------
 pandas/tests/reshape/merge/test_join.py            | 12 +++---------
 pandas/tests/reshape/merge/test_merge.py           |  2 +-
 pandas/tests/series/test_constructors.py           |  4 ++--
 pandas/tests/series/test_dtypes.py                 |  2 +-
 pandas/tests/series/test_missing.py                |  8 ++++----
 pandas/tests/tslibs/test_parse_iso8601.py          |  4 +---
 pandas/tests/util/test_validate_args_and_kwargs.py |  4 +---
 pandas/tests/util/test_validate_kwargs.py          |  4 +---
 pandas/util/_test_decorators.py                    |  2 +-
 pandas/util/_validators.py                         |  6 ++----
 29 files changed, 48 insertions(+), 86 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 045e511e32586..c37bd01d5fe30 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -916,9 +916,7 @@ def _is_unique(self):
 
     def _add_datetimelike_scalar(self, other):
         # Overriden by TimedeltaArray
-        raise TypeError(
-            f"cannot add {type(self).__name__} and " f"{type(other).__name__}"
-        )
+        raise TypeError(f"cannot add {type(self).__name__} and {type(other).__name__}")
 
     _add_datetime_arraylike = _add_datetimelike_scalar
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 1ab21f18f3bdc..946070f8fad98 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -820,9 +820,7 @@ def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False):
         if dtype.kind == "M":
             return arr.astype(dtype)
 
-        raise TypeError(
-            f"cannot astype a datetimelike from [{arr.dtype}] " f"to [{dtype}]"
-        )
+        raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")
 
     elif is_timedelta64_dtype(arr):
         if is_object_dtype(dtype):
@@ -842,9 +840,7 @@ def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False):
         elif dtype == _TD_DTYPE:
             return arr.astype(_TD_DTYPE, copy=copy)
 
-        raise TypeError(
-            f"cannot astype a timedelta from [{arr.dtype}] " f"to [{dtype}]"
-        )
+        raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")
 
     elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer):
 
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index c22089b4e1eae..4b0c0389f1439 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -53,7 +53,7 @@ def to_json(
 
     if not index and orient not in ["split", "table"]:
         raise ValueError(
-            "'index=False' is only valid when 'orient' is " "'split' or 'table'"
+            "'index=False' is only valid when 'orient' is 'split' or 'table'"
         )
 
     path_or_buf = stringify_path(path_or_buf)
diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
index bc5a9783391a4..87bfd6030ec31 100644
--- a/pandas/io/json/_table_schema.py
+++ b/pandas/io/json/_table_schema.py
@@ -81,9 +81,7 @@ def set_default_names(data):
         if len(nms) == 1 and data.index.name == "index":
             warnings.warn("Index name of 'index' is not round-trippable")
         elif len(nms) > 1 and any(x.startswith("level_") for x in nms):
-            warnings.warn(
-                "Index names beginning with 'level_' are not " "round-trippable"
-            )
+            warnings.warn("Index names beginning with 'level_' are not round-trippable")
         return data
 
     data = data.copy()
@@ -317,12 +315,12 @@ def parse_table_schema(json, precise_float):
 
     # Cannot directly use as_type with timezone data on object; raise for now
     if any(str(x).startswith("datetime64[ns, ") for x in dtypes.values()):
-        raise NotImplementedError('table="orient" can not yet read timezone ' "data")
+        raise NotImplementedError('table="orient" can not yet read timezone data')
 
     # No ISO constructor for Timedelta as of yet, so need to raise
     if "timedelta64" in dtypes.values():
         raise NotImplementedError(
-            'table="orient" can not yet read ' "ISO-formatted Timedelta data"
+            'table="orient" can not yet read ISO-formatted Timedelta data'
         )
 
     df = df.astype(dtypes)
diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py
index ed693d873efb8..5917c8deee8a9 100644
--- a/pandas/tests/arithmetic/test_period.py
+++ b/pandas/tests/arithmetic/test_period.py
@@ -168,9 +168,7 @@ def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_with_array):
 
         # TODO: Could parametrize over boxes for idx?
         idx = PeriodIndex(["2011", "2012", "2013", "2014"], freq="A")
-        rev_msg = (
-            r"Input has different freq=(M|2M|3M) from " r"PeriodArray\(freq=A-DEC\)"
-        )
+        rev_msg = r"Input has different freq=(M|2M|3M) from PeriodArray\(freq=A-DEC\)"
         idx_msg = rev_msg if box_with_array is tm.to_array else msg
         with pytest.raises(IncompatibleFrequency, match=idx_msg):
             base <= idx
@@ -184,7 +182,7 @@ def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_with_array):
             Period("2011", freq="4M") >= base
 
         idx = PeriodIndex(["2011", "2012", "2013", "2014"], freq="4M")
-        rev_msg = r"Input has different freq=(M|2M|3M) from " r"PeriodArray\(freq=4M\)"
+        rev_msg = r"Input has different freq=(M|2M|3M) from PeriodArray\(freq=4M\)"
         idx_msg = rev_msg if box_with_array is tm.to_array else msg
         with pytest.raises(IncompatibleFrequency, match=idx_msg):
             base <= idx
diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py
index 42e7bee97e671..bb6ef09bad17e 100644
--- a/pandas/tests/arrays/test_timedeltas.py
+++ b/pandas/tests/arrays/test_timedeltas.py
@@ -41,13 +41,12 @@ def test_other_type_raises(self):
     def test_incorrect_dtype_raises(self):
         # TODO: why TypeError for 'category' but ValueError for i8?
         with pytest.raises(
-            ValueError, match=r"category cannot be converted " r"to timedelta64\[ns\]"
+            ValueError, match=r"category cannot be converted to timedelta64\[ns\]"
         ):
             TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype="category")
 
         with pytest.raises(
-            ValueError,
-            match=r"dtype int64 cannot be converted " r"to timedelta64\[ns\]",
+            ValueError, match=r"dtype int64 cannot be converted to timedelta64\[ns\]",
         ):
             TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("int64"))
 
diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py
index 04277ce929bca..4231aa844f282 100644
--- a/pandas/tests/base/test_ops.py
+++ b/pandas/tests/base/test_ops.py
@@ -698,9 +698,7 @@ def test_duplicated_drop_duplicates_index(self):
 
                 with pytest.raises(
                     TypeError,
-                    match=(
-                        r"drop_duplicates\(\) got an " r"unexpected keyword argument"
-                    ),
+                    match=r"drop_duplicates\(\) got an unexpected keyword argument",
                 ):
                     idx.drop_duplicates(inplace=True)
 
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 3e5027ee54cb3..f3cc11cb7027d 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -479,11 +479,11 @@ def test_constructor_error_msgs(self):
             DataFrame(np.zeros((3, 3, 3)), columns=["A", "B", "C"], index=[1])
 
         # wrong size axis labels
-        msg = "Shape of passed values " r"is \(2, 3\), indices " r"imply \(1, 3\)"
+        msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
         with pytest.raises(ValueError, match=msg):
             DataFrame(np.random.rand(2, 3), columns=["A", "B", "C"], index=[1])
 
-        msg = "Shape of passed values " r"is \(2, 3\), indices " r"imply \(2, 2\)"
+        msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)"
         with pytest.raises(ValueError, match=msg):
             DataFrame(np.random.rand(2, 3), columns=["A", "B"], index=[1, 2])
 
diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py
index ea7e9b4ac490d..f9a2061aa1ff4 100644
--- a/pandas/tests/frame/test_missing.py
+++ b/pandas/tests/frame/test_missing.py
@@ -662,7 +662,7 @@ def test_fillna_invalid_method(self, float_frame):
 
     def test_fillna_invalid_value(self, float_frame):
         # list
-        msg = '"value" parameter must be a scalar or dict, but you passed' ' a "{}"'
+        msg = '"value" parameter must be a scalar or dict, but you passed a "{}"'
         with pytest.raises(TypeError, match=msg.format("list")):
             float_frame.fillna([1, 2])
         # tuple
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index f1c23d7b245c6..1aaacfc0949c3 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -1298,7 +1298,7 @@ def test_dataframe(self, cache):
         tm.assert_series_equal(result, expected)
 
         # extra columns
-        msg = "extra keys have been passed to the datetime assemblage: " r"\[foo\]"
+        msg = r"extra keys have been passed to the datetime assemblage: \[foo\]"
         with pytest.raises(ValueError, match=msg):
             df2 = df.copy()
             df2["foo"] = 1
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 6f20ec649b200..8b3620e8cd843 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -242,9 +242,7 @@ def test_loc_to_fail(self):
         with pytest.raises(KeyError, match=msg):
             s.loc[[-1, -2]]
 
-        msg = (
-            r"\"None of \[Index\(\['4'\], dtype='object'\)\] are" r" in the \[index\]\""
-        )
+        msg = r"\"None of \[Index\(\['4'\], dtype='object'\)\] are in the \[index\]\""
         with pytest.raises(KeyError, match=msg):
             s.loc[["4"]]
 
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 80edbd828194d..24233a0ec84b1 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -376,16 +376,14 @@ def test_to_csv_string_with_lf(self):
                 assert f.read() == expected_noarg
         with tm.ensure_clean("lf_test.csv") as path:
             # case 2: LF as line terminator
-            expected_lf = b"int,str_lf\n" b"1,abc\n" b'2,"d\nef"\n' b'3,"g\nh\n\ni"\n'
+            expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n'
             df.to_csv(path, line_terminator="\n", index=False)
             with open(path, "rb") as f:
                 assert f.read() == expected_lf
         with tm.ensure_clean("lf_test.csv") as path:
             # case 3: CRLF as line terminator
             # 'line_terminator' should not change inner element
-            expected_crlf = (
-                b"int,str_lf\r\n" b"1,abc\r\n" b'2,"d\nef"\r\n' b'3,"g\nh\n\ni"\r\n'
-            )
+            expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n'
             df.to_csv(path, line_terminator="\r\n", index=False)
             with open(path, "rb") as f:
                 assert f.read() == expected_crlf
@@ -412,9 +410,7 @@ def test_to_csv_string_with_crlf(self):
                 assert f.read() == expected_noarg
         with tm.ensure_clean("crlf_test.csv") as path:
             # case 2: LF as line terminator
-            expected_lf = (
-                b"int,str_crlf\n" b"1,abc\n" b'2,"d\r\nef"\n' b'3,"g\r\nh\r\n\r\ni"\n'
-            )
+            expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n'
             df.to_csv(path, line_terminator="\n", index=False)
             with open(path, "rb") as f:
                 assert f.read() == expected_lf
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 6489fedad03e3..6e27b79458faf 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1244,7 +1244,7 @@ def test_to_jsonl(self):
         # GH15096: escaped characters in columns and data
         df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
         result = df.to_json(orient="records", lines=True)
-        expected = '{"a\\\\":"foo\\\\","b":"bar"}\n' '{"a\\\\":"foo\\"","b":"bar"}'
+        expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}'
         assert result == expected
         tm.assert_frame_equal(pd.read_json(result, lines=True), df)
 
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index b85032904c5ec..90da175855c34 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -56,7 +56,7 @@ def test_to_jsonl():
     # GH15096: escaped characters in columns and data
     df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"])
     result = df.to_json(orient="records", lines=True)
-    expected = '{"a\\\\":"foo\\\\","b":"bar"}\n' '{"a\\\\":"foo\\"","b":"bar"}'
+    expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}'
     assert result == expected
     tm.assert_frame_equal(read_json(result, lines=True), df)
 
diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py
index 6008f6b651c2a..dab2882499634 100644
--- a/pandas/tests/io/json/test_ujson.py
+++ b/pandas/tests/io/json/test_ujson.py
@@ -111,9 +111,9 @@ def test_encode_decimal(self):
     @pytest.mark.parametrize("ensure_ascii", [True, False])
     def test_encode_string_conversion(self, ensure_ascii):
         string_input = "A string \\ / \b \f \n \r \t </script> &"
-        not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n ' '\\r \\t <\\/script> &"'
+        not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n \\r \\t <\\/script> &"'
         html_encoded = (
-            '"A string \\\\ \\/ \\b \\f \\n \\r \\t ' '\\u003c\\/script\\u003e \\u0026"'
+            '"A string \\\\ \\/ \\b \\f \\n \\r \\t \\u003c\\/script\\u003e \\u0026"'
         )
 
         def helper(expected_output, **encode_kwargs):
@@ -816,7 +816,7 @@ def test_array_numpy_labelled(self):
 
         # see gh-10837: write out the dump explicitly
         # so there is no dependency on iteration order
-        input_dumps = '[{"a": 42, "b":31}, {"a": 24, "c": 99}, ' '{"a": 2.4, "b": 78}]'
+        input_dumps = '[{"a": 42, "b":31}, {"a": 24, "c": 99}, {"a": 2.4, "b": 78}]'
         output = ujson.loads(input_dumps, numpy=True, labelled=True)
         expected_vals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2))
         assert (expected_vals == output[0]).all()
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index fe360f1346c7c..42a4a55988b0f 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -1144,9 +1144,8 @@ def test_escapechar(all_parsers):
         StringIO(data), escapechar="\\", quotechar='"', encoding="utf-8"
     )
 
-    assert result["SEARCH_TERM"][2] == (
-        'SLAGBORD, "Bergslagen", ' "IKEA:s 1700-tals serie"
-    )
+    assert result["SEARCH_TERM"][2] == 'SLAGBORD, "Bergslagen", IKEA:s 1700-tals serie'
+
     tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"]))
 
 
diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py
index 75a5b7cd53ddb..e34f1010d690e 100644
--- a/pandas/tests/io/parser/test_textreader.py
+++ b/pandas/tests/io/parser/test_textreader.py
@@ -179,7 +179,7 @@ def test_header_not_enough_lines(self):
         assert_array_dicts_equal(recs, expected)
 
     def test_escapechar(self):
-        data = '\\"hello world"\n' '\\"hello world"\n' '\\"hello world"'
+        data = '\\"hello world"\n\\"hello world"\n\\"hello world"'
 
         reader = TextReader(StringIO(data), delimiter=",", header=None, escapechar="\\")
         result = reader.read()
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index 3cd9d9cdd67d2..18d265438dee2 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -3214,7 +3214,7 @@ def test_frame_select_complex(self, setup_path):
             tm.assert_frame_equal(result, expected)
 
             result = store.select(
-                "df", "(index>df.index[3] & " 'index<=df.index[6]) | string="bar"'
+                "df", '(index>df.index[3] & index<=df.index[6]) | string="bar"'
             )
             expected = df.loc[
                 ((df.index > df.index[3]) & (df.index <= df.index[6]))
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index f4efbbeda6311..cfcd2c9f2df95 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -142,9 +142,7 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext):
 
         path = os.path.join(HERE, "data", "does_not_exist." + fn_ext)
         msg1 = r"File (b')?.+does_not_exist\.{}'? does not exist".format(fn_ext)
-        msg2 = (
-            r"\[Errno 2\] No such file or directory: '.+does_not_exist" r"\.{}'"
-        ).format(fn_ext)
+        msg2 = fr"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'"
         msg3 = "Expected object or value"
         msg4 = "path_or_buf needs to be a string file path or file-like"
         msg5 = (
@@ -180,9 +178,7 @@ def test_read_expands_user_home_dir(
         monkeypatch.setattr(icom, "_expand_user", lambda x: os.path.join("foo", x))
 
         msg1 = r"File (b')?.+does_not_exist\.{}'? does not exist".format(fn_ext)
-        msg2 = (
-            r"\[Errno 2\] No such file or directory:" r" '.+does_not_exist\.{}'"
-        ).format(fn_ext)
+        msg2 = fr"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'"
         msg3 = "Unexpected character found when decoding 'false'"
         msg4 = "path_or_buf needs to be a string file path or file-like"
         msg5 = (
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index e477b7608ab93..94a21c06162a6 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -226,9 +226,7 @@ def test_join_on_fails_with_different_right_index(self):
             {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)},
             index=tm.makeCustomIndex(10, 2),
         )
-        msg = (
-            r"len\(left_on\) must equal the number of levels in the index" ' of "right"'
-        )
+        msg = r'len\(left_on\) must equal the number of levels in the index of "right"'
         with pytest.raises(ValueError, match=msg):
             merge(df, df2, left_on="a", right_index=True)
 
@@ -240,9 +238,7 @@ def test_join_on_fails_with_different_left_index(self):
         df2 = DataFrame(
             {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}
         )
-        msg = (
-            r"len\(right_on\) must equal the number of levels in the index" ' of "left"'
-        )
+        msg = r'len\(right_on\) must equal the number of levels in the index of "left"'
         with pytest.raises(ValueError, match=msg):
             merge(df, df2, right_on="b", left_index=True)
 
@@ -737,9 +733,7 @@ def test_join_multi_to_multi(self, join_type):
         )
         tm.assert_frame_equal(expected, result)
 
-        msg = (
-            r"len\(left_on\) must equal the number of levels in the index" ' of "right"'
-        )
+        msg = r'len\(left_on\) must equal the number of levels in the index of "right"'
         with pytest.raises(ValueError, match=msg):
             left.join(right, on="xy", how=join_type)
 
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 5f4e8323c7127..e191bf67c51ca 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -744,7 +744,7 @@ def test_overlapping_columns_error_message(self):
 
         # #2649, #10639
         df2.columns = ["key1", "foo", "foo"]
-        msg = r"Data columns not unique: Index\(\['foo', 'foo'\]," r" dtype='object'\)"
+        msg = r"Data columns not unique: Index\(\['foo', 'foo'\], dtype='object'\)"
         with pytest.raises(MergeError, match=msg):
             merge(df, df2)
 
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index c772038619db0..fffb9c577bf3d 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -773,7 +773,7 @@ def test_constructor_dtype_datetime64(self):
         dts.astype("int64")
 
         # invalid casting
-        msg = r"cannot astype a datetimelike from \[datetime64\[ns\]\] to" r" \[int32\]"
+        msg = r"cannot astype a datetimelike from \[datetime64\[ns\]\] to \[int32\]"
         with pytest.raises(TypeError, match=msg):
             dts.astype("int32")
 
@@ -1198,7 +1198,7 @@ def test_constructor_dtype_timedelta64(self):
         td.astype("int64")
 
         # invalid casting
-        msg = r"cannot astype a timedelta from \[timedelta64\[ns\]\] to" r" \[int32\]"
+        msg = r"cannot astype a timedelta from \[timedelta64\[ns\]\] to \[int32\]"
         with pytest.raises(TypeError, match=msg):
             td.astype("int32")
 
diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
index ff4842791b4fd..69e34a4d97006 100644
--- a/pandas/tests/series/test_dtypes.py
+++ b/pandas/tests/series/test_dtypes.py
@@ -273,7 +273,7 @@ def test_astype_categorical_to_other(self):
         expected = s
         tm.assert_series_equal(s.astype("category"), expected)
         tm.assert_series_equal(s.astype(CategoricalDtype()), expected)
-        msg = r"could not convert string to float|" r"invalid literal for float\(\)"
+        msg = r"could not convert string to float|invalid literal for float\(\)"
         with pytest.raises(ValueError, match=msg):
             s.astype("float64")
 
diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py
index 45159cc28c5b7..196749a965885 100644
--- a/pandas/tests/series/test_missing.py
+++ b/pandas/tests/series/test_missing.py
@@ -502,11 +502,11 @@ def test_fillna_int(self):
 
     def test_fillna_raise(self):
         s = Series(np.random.randint(-100, 100, 50))
-        msg = '"value" parameter must be a scalar or dict, but you passed a' ' "list"'
+        msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
         with pytest.raises(TypeError, match=msg):
             s.fillna([1, 2])
 
-        msg = '"value" parameter must be a scalar or dict, but you passed a' ' "tuple"'
+        msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
         with pytest.raises(TypeError, match=msg):
             s.fillna((1, 2))
 
@@ -593,11 +593,11 @@ def test_fillna_categorical_raise(self):
         with pytest.raises(ValueError, match="fill value must be in categories"):
             s.fillna({1: "d", 3: "a"})
 
-        msg = '"value" parameter must be a scalar or ' 'dict, but you passed a "list"'
+        msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
         with pytest.raises(TypeError, match=msg):
             s.fillna(["a", "b"])
 
-        msg = '"value" parameter must be a scalar or ' 'dict, but you passed a "tuple"'
+        msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
         with pytest.raises(TypeError, match=msg):
             s.fillna(("a", "b"))
 
diff --git a/pandas/tests/tslibs/test_parse_iso8601.py b/pandas/tests/tslibs/test_parse_iso8601.py
index a6e7aee46b485..a58f227c20c7f 100644
--- a/pandas/tests/tslibs/test_parse_iso8601.py
+++ b/pandas/tests/tslibs/test_parse_iso8601.py
@@ -59,9 +59,7 @@ def test_parsers_iso8601_invalid(date_str):
 
 def test_parsers_iso8601_invalid_offset_invalid():
     date_str = "2001-01-01 12-34-56"
-    msg = "Timezone hours offset out of range " 'in datetime string "{s}"'.format(
-        s=date_str
-    )
+    msg = f'Timezone hours offset out of range in datetime string "{date_str}"'
 
     with pytest.raises(ValueError, match=msg):
         tslib._test_parse_iso8601(date_str)
diff --git a/pandas/tests/util/test_validate_args_and_kwargs.py b/pandas/tests/util/test_validate_args_and_kwargs.py
index 396056466bb81..6aa2088c07b5d 100644
--- a/pandas/tests/util/test_validate_args_and_kwargs.py
+++ b/pandas/tests/util/test_validate_args_and_kwargs.py
@@ -76,9 +76,7 @@ def test_duplicate_argument():
     kwargs = {"foo": None, "bar": None}
     args = (None,)  # duplicate value for "foo"
 
-    msg = r"{fname}\(\) got multiple values for keyword " r"argument '{arg}'".format(
-        fname=_fname, arg="foo"
-    )
+    msg = fr"{_fname}\(\) got multiple values for keyword argument 'foo'"
 
     with pytest.raises(TypeError, match=msg):
         validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args)
diff --git a/pandas/tests/util/test_validate_kwargs.py b/pandas/tests/util/test_validate_kwargs.py
index b6241def4e5d6..54b5c6ed034a2 100644
--- a/pandas/tests/util/test_validate_kwargs.py
+++ b/pandas/tests/util/test_validate_kwargs.py
@@ -16,9 +16,7 @@ def test_bad_kwarg():
     compat_args[bad_arg + "o"] = "bar"
     kwargs = {good_arg: "foo", bad_arg: "bar"}
 
-    msg = r"{fname}\(\) got an unexpected " r"keyword argument '{arg}'".format(
-        fname=_fname, arg=bad_arg
-    )
+    msg = fr"{_fname}\(\) got an unexpected keyword argument '{bad_arg}'"
 
     with pytest.raises(TypeError, match=msg):
         validate_kwargs(_fname, kwargs, compat_args)
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 7e14ed27d5bd4..a280da6e239b2 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -191,7 +191,7 @@ def skip_if_no(package: str, min_version: Optional[str] = None) -> Callable:
 )
 skip_if_no_ne = pytest.mark.skipif(
     not _USE_NUMEXPR,
-    reason=f"numexpr enabled->{_USE_NUMEXPR}, " f"installed->{_NUMEXPR_INSTALLED}",
+    reason=f"numexpr enabled->{_USE_NUMEXPR}, installed->{_NUMEXPR_INSTALLED}",
 )
 
 
diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py
index 547fe748ae941..6cc14c7804b4a 100644
--- a/pandas/util/_validators.py
+++ b/pandas/util/_validators.py
@@ -120,9 +120,7 @@ def _check_for_invalid_keys(fname, kwargs, compat_args):
 
     if diff:
         bad_arg = list(diff)[0]
-        raise TypeError(
-            (f"{fname}() got an unexpected " f"keyword argument '{bad_arg}'")
-        )
+        raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
 
 
 def validate_kwargs(fname, kwargs, compat_args):
@@ -202,7 +200,7 @@ def validate_args_and_kwargs(fname, args, kwargs, max_fname_arg_count, compat_ar
     for key in args_dict:
         if key in kwargs:
             raise TypeError(
-                f"{fname}() got multiple values for keyword " f"argument '{key}'"
+                f"{fname}() got multiple values for keyword argument '{key}'"
             )
 
     kwargs.update(args_dict)

From d920bfd1f146e80d1ca8c957db57659cce920e65 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Wed, 25 Dec 2019 15:34:35 +0200
Subject: [PATCH 3/7] Make the script to not stop on the first occurrence at
 each file.

---
 scripts/validate_string_concatenation.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/validate_string_concatenation.py b/scripts/validate_string_concatenation.py
index a262bbe583380..29ae18234ebe9 100755
--- a/scripts/validate_string_concatenation.py
+++ b/scripts/validate_string_concatenation.py
@@ -51,12 +51,14 @@ def is_concatenated(file_path):
         0 - All good.
         1 - Needs to be fixed.
     """
+    need_fix = False
     with open(file_path, "r") as file_name:
         toks = list(tokenize.generate_tokens(file_name.readline))
         for i in range(len(toks) - 1):
             tok = toks[i]
             tok2 = toks[i + 1]
             if tok[0] == token.STRING and tok[0] == tok2[0]:
+                need_fix = True
                 print(
                     "{file_path}:{line_number}:\t{start} and {end}".format(
                         file_path=file_path,
@@ -65,8 +67,8 @@ def is_concatenated(file_path):
                         end=tok2[1],
                     )
                 )
-                return 1
-        return 0
+
+    return int(need_fix)
 
 
 if __name__ == "__main__":

From e88f604ad049c61a6c1c5980cc2d1d0f8a4eebc9 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Wed, 25 Dec 2019 16:26:44 +0200
Subject: [PATCH 4/7] Added support for pxi.ini files

---
 scripts/validate_string_concatenation.py | 42 ++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/scripts/validate_string_concatenation.py b/scripts/validate_string_concatenation.py
index 29ae18234ebe9..904ec9ea03fbd 100755
--- a/scripts/validate_string_concatenation.py
+++ b/scripts/validate_string_concatenation.py
@@ -8,7 +8,7 @@
 import token
 import tokenize
 
-FILE_EXTENTIONS_TO_CHECK = [".py", ".pyx"]
+FILE_EXTENTIONS_TO_CHECK = [".pxd", ".py", ".pyx", ".pyx.ini"]
 
 
 def main():
@@ -25,7 +25,7 @@ def main():
     # Means that the given path is of a directory.
     for subdir, _, files in os.walk(path):
         for file_name in files:
-            ext = os.path.splitext(os.path.join(subdir, file_name))[1]
+            ext = full_ext(os.path.join(subdir, file_name))
             if ext in FILE_EXTENTIONS_TO_CHECK:
                 status_codes.add(is_concatenated(os.path.join(subdir, file_name)))
 
@@ -35,6 +35,44 @@ def main():
     sys.exit(0)
 
 
+def full_ext(path):
+    """
+    Get the full file extention name.
+
+    Parameters
+    ----------
+    path : str
+        File path.
+
+    Returns
+    -------
+    str
+        Full extention of a file.
+
+    Notes
+    -----
+    This function is needed only because of file extentions like
+    ` .pxi.ini` for example.
+
+    Examples
+    -------
+
+    With one suffix:
+
+    >>> ext = full_ext('/full/path/to/file.py')
+    >>> ext
+        .py
+
+    Wuth two suffixes:
+
+    >>> ext = full_ext('/full/path/to/file.pxi.ini')
+    >>> ext
+        .pxi.ini
+    """
+    ext_list = [".{suffix}".format(suffix=suffix) for suffix in path.split(".")[1:]]
+    return "".join(ext_list)
+
+
 def is_concatenated(file_path):
     """
     Checking if the file containing strings that needs to be concatenated.

From 545ba18fc6355a4ca62df7efb0f5f989ef2e4784 Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Wed, 25 Dec 2019 16:30:32 +0200
Subject: [PATCH 5/7] alimcmaster1 review fixes

---
 ci/code_checks.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 462275d9b5bec..2b467a03966cd 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -208,7 +208,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check for use of not concatenated strings' ; echo $MSG
-    python ./scripts/validate_string_concatenation.py pandas
+    python $BASE_DIR/scripts/validate_string_concatenation.py pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG

From edb2da1d348705149f0105169dd56aa6459698fd Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Wed, 25 Dec 2019 19:35:11 +0200
Subject: [PATCH 6/7] Fixes for jbrockmendel's review

---
 scripts/validate_string_concatenation.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/scripts/validate_string_concatenation.py b/scripts/validate_string_concatenation.py
index 904ec9ea03fbd..a274cbb63dd04 100755
--- a/scripts/validate_string_concatenation.py
+++ b/scripts/validate_string_concatenation.py
@@ -1,6 +1,24 @@
 #!/usr/bin/env python
 """
+GH #30454
+
 Check where there is a string that needs to be concatenated.
+
+This is necessary after black formating,
+where for example black transforms this:
+
+>>> foo = (
+...         "bar "
+...         "baz"
+...     )
+
+into this:
+
+>>> foo = ("bar " "baz")
+
+Black is not considering this as an
+issue (see https://github.com/psf/black/issues/1051), so we are checking
+it here.
 """
 
 import os
@@ -8,7 +26,7 @@
 import token
 import tokenize
 
-FILE_EXTENTIONS_TO_CHECK = [".pxd", ".py", ".pyx", ".pyx.ini"]
+FILE_EXTENSIONS_TO_CHECK = [".pxd", ".py", ".pyx", ".pyx.ini"]
 
 
 def main():
@@ -26,7 +44,7 @@ def main():
     for subdir, _, files in os.walk(path):
         for file_name in files:
             ext = full_ext(os.path.join(subdir, file_name))
-            if ext in FILE_EXTENTIONS_TO_CHECK:
+            if ext in FILE_EXTENSIONS_TO_CHECK:
                 status_codes.add(is_concatenated(os.path.join(subdir, file_name)))
 
     if 1 in status_codes:

From cd99f2fb903cdced48bad2eab1ed93589c7cf4dd Mon Sep 17 00:00:00 2001
From: MomIsBestFriend <>
Date: Wed, 25 Dec 2019 21:20:33 +0200
Subject: [PATCH 7/7] Seperating the PR

---
 ci/code_checks.sh                        |   4 -
 scripts/validate_string_concatenation.py | 131 -----------------------
 2 files changed, 135 deletions(-)
 delete mode 100755 scripts/validate_string_concatenation.py

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2b467a03966cd..94eaab0a5b4da 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -207,10 +207,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include=*.{py,pyx} 'xrange' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Check for use of not concatenated strings' ; echo $MSG
-    python $BASE_DIR/scripts/validate_string_concatenation.py pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
     MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
     INVGREP_APPEND=" <- trailing whitespaces found"
     invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
diff --git a/scripts/validate_string_concatenation.py b/scripts/validate_string_concatenation.py
deleted file mode 100755
index a274cbb63dd04..0000000000000
--- a/scripts/validate_string_concatenation.py
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python
-"""
-GH #30454
-
-Check where there is a string that needs to be concatenated.
-
-This is necessary after black formating,
-where for example black transforms this:
-
->>> foo = (
-...         "bar "
-...         "baz"
-...     )
-
-into this:
-
->>> foo = ("bar " "baz")
-
-Black is not considering this as an
-issue (see https://github.com/psf/black/issues/1051), so we are checking
-it here.
-"""
-
-import os
-import sys
-import token
-import tokenize
-
-FILE_EXTENSIONS_TO_CHECK = [".pxd", ".py", ".pyx", ".pyx.ini"]
-
-
-def main():
-    path = sys.argv[1]
-
-    if not os.path.exists(path):
-        raise ValueError("Please enter a valid path, to a file/directory.")
-
-    if os.path.isfile(path):
-        # Means that the given path is of a single file.
-        sys.exit(is_concatenated(path))
-
-    status_codes = set()
-    # Means that the given path is of a directory.
-    for subdir, _, files in os.walk(path):
-        for file_name in files:
-            ext = full_ext(os.path.join(subdir, file_name))
-            if ext in FILE_EXTENSIONS_TO_CHECK:
-                status_codes.add(is_concatenated(os.path.join(subdir, file_name)))
-
-    if 1 in status_codes:
-        sys.exit(1)
-
-    sys.exit(0)
-
-
-def full_ext(path):
-    """
-    Get the full file extention name.
-
-    Parameters
-    ----------
-    path : str
-        File path.
-
-    Returns
-    -------
-    str
-        Full extention of a file.
-
-    Notes
-    -----
-    This function is needed only because of file extentions like
-    ` .pxi.ini` for example.
-
-    Examples
-    -------
-
-    With one suffix:
-
-    >>> ext = full_ext('/full/path/to/file.py')
-    >>> ext
-        .py
-
-    Wuth two suffixes:
-
-    >>> ext = full_ext('/full/path/to/file.pxi.ini')
-    >>> ext
-        .pxi.ini
-    """
-    ext_list = [".{suffix}".format(suffix=suffix) for suffix in path.split(".")[1:]]
-    return "".join(ext_list)
-
-
-def is_concatenated(file_path):
-    """
-    Checking if the file containing strings that needs to be concatenated.
-
-    Parameters
-    ----------
-    file_path : str
-        File path pointing to a single file.
-
-    Returns
-    -------
-    int
-        Status code representing if the file needs a fix.
-        0 - All good.
-        1 - Needs to be fixed.
-    """
-    need_fix = False
-    with open(file_path, "r") as file_name:
-        toks = list(tokenize.generate_tokens(file_name.readline))
-        for i in range(len(toks) - 1):
-            tok = toks[i]
-            tok2 = toks[i + 1]
-            if tok[0] == token.STRING and tok[0] == tok2[0]:
-                need_fix = True
-                print(
-                    "{file_path}:{line_number}:\t{start} and {end}".format(
-                        file_path=file_path,
-                        line_number=tok[2][0],
-                        start=tok[1],
-                        end=tok2[1],
-                    )
-                )
-
-    return int(need_fix)
-
-
-if __name__ == "__main__":
-    main()