From 2bfa90a35c18ca6a6a71f78a920863c50f78477e Mon Sep 17 00:00:00 2001
From: Ronald Barnes <ron@ronaldbarnes.ca>
Date: Mon, 21 Nov 2022 21:30:07 -0800
Subject: [PATCH 1/5] Updated documentation indicating default behaviour is to
 strip whitespace, and how to override. Enhances GH-issue-16950
 https://github.com/pandas-dev/pandas/pull/16950

---
 doc/source/user_guide/io.rst | 9 ++++++---
 pandas/io/parsers/readers.py | 6 +++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index b4bf3ef024d4c..f9637d13da0be 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -1366,8 +1366,10 @@ a different usage of the ``delimiter`` parameter:
 * ``widths``: A list of field widths which can be used instead of 'colspecs'
   if the intervals are contiguous.
 * ``delimiter``: Characters to consider as filler characters in the fixed-width file.
-  Can be used to specify the filler character of the fields
-  if it is not spaces (e.g., '~').
+  Default is "`` \t``"  (space and tab).
+  Used to specify the character(s) to strip from start and end of every field.
+  To preserve whitespace, set to a character that does not exist in the data,
+  i.e. "\0".
 
 Consider a typical fixed-width data file:
 
@@ -1404,8 +1406,9 @@ column widths for contiguous columns:
    df = pd.read_fwf("bar.csv", widths=widths, header=None)
    df
 
-The parser will take care of extra white spaces around the columns
+The parser will take care of extra whitespace around the columns,
 so it's ok to have extra separation between the columns in the file.
+To preserve whitespace around the columns, see ``delimiter``.
 
 By default, ``read_fwf`` will try to infer the file's ``colspecs`` by using the
 first 100 rows of the file. It can do it only in cases when the columns are
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 700a2b6ba964c..ac909ebc21960 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -1231,6 +1231,7 @@ def read_fwf(
     *,
     colspecs: Sequence[tuple[int, int]] | str | None = "infer",
     widths: Sequence[int] | None = None,
+    delimiter: str | None = " \t",
     infer_nrows: int = 100,
     **kwds,
 ) -> DataFrame | TextFileReader:
@@ -1251,7 +1252,7 @@ def read_fwf(
         Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
         expected. A local file could be:
         ``file://localhost/path/to/table.csv``.
-    colspecs : list of tuple (int, int) or 'infer'. optional
+    colspecs : list of tuple (int, int) or 'infer', optional
         A list of tuples giving the extents of the fixed-width
         fields of each line as half-open intervals (i.e.,  [from, to[ ).
         String value 'infer' can be used to instruct the parser to try
@@ -1260,6 +1261,9 @@ def read_fwf(
     widths : list of int, optional
         A list of field widths which can be used instead of 'colspecs' if
         the intervals are contiguous.
+    delimiter : str, default " \t" (space and tab), optional
+        Character(s) to strip from start and end of each field. To
+        preserve whitespace, must be non-default value (i.e. delimiter="\0").
     infer_nrows : int, default 100
         The number of rows to consider when letting the parser determine the
         `colspecs`.

From f297d9904565231f59753883c2ecd267a1f133e5 Mon Sep 17 00:00:00 2001
From: Ronald Barnes <ron@ronaldbarnes.ca>
Date: Mon, 21 Nov 2022 21:52:33 -0800
Subject: [PATCH 2/5] Fix failed Sphinx lint issue.

---
 doc/source/user_guide/io.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index f9637d13da0be..a8b58bebbc4b5 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -1366,7 +1366,7 @@ a different usage of the ``delimiter`` parameter:
 * ``widths``: A list of field widths which can be used instead of 'colspecs'
   if the intervals are contiguous.
 * ``delimiter``: Characters to consider as filler characters in the fixed-width file.
-  Default is "`` \t``"  (space and tab).
+  Default are space and tab characters.
   Used to specify the character(s) to strip from start and end of every field.
   To preserve whitespace, set to a character that does not exist in the data,
   i.e. "\0".

From a0304a7c0504c4d4b1cdcfefa3bbd528e6fefc5c Mon Sep 17 00:00:00 2001
From: Ronald Barnes <ron@ronaldbarnes.ca>
Date: Mon, 21 Nov 2022 22:09:48 -0800
Subject: [PATCH 3/5] Added delimiter to _fwf_defaults.

---
 pandas/io/parsers/readers.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index ac909ebc21960..0a6175cc65c1e 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -435,7 +435,12 @@
     "float_precision": None,
 }
 
-_fwf_defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None}
+_fwf_defaults = {
+    "colspecs": "infer",
+    "infer_nrows": 100,
+    "widths": None,
+    "delimiter": " 	",
+}
 
 _c_unsupported = {"skipfooter"}
 _python_unsupported = {"low_memory", "float_precision"}

From 7adb89d405ed7803952ff0a2452a99c37115c0ea Mon Sep 17 00:00:00 2001
From: Ronald Barnes <ron@ronaldbarnes.ca>
Date: Mon, 21 Nov 2022 23:22:11 -0800
Subject: [PATCH 4/5] Changed comment from ## to # per flake8.

---
 pandas/io/parsers/readers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 0a6175cc65c1e..b9e2faf6e57b7 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -439,7 +439,7 @@
     "colspecs": "infer",
     "infer_nrows": 100,
     "widths": None,
-    "delimiter": " 	",
+    "delimiter": " 	",  # space & [TAB]
 }
 
 _c_unsupported = {"skipfooter"}

From ab111c7549948efeba7f9a3042dac1cfcf6284be Mon Sep 17 00:00:00 2001
From: Ronald Barnes <ron@ronaldbarnes.ca>
Date: Wed, 23 Nov 2022 00:28:05 -0800
Subject: [PATCH 5/5] Delimiters used by colspecs='infer'

---
 pandas/io/parsers/readers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
index 6a2f9c9baf24b..defe396813d5a 100644
--- a/pandas/io/parsers/readers.py
+++ b/pandas/io/parsers/readers.py
@@ -1274,6 +1274,7 @@ def read_fwf(
     delimiter : str, default " \t" (space and tab), optional
         Character(s) to strip from start and end of each field. To
         preserve whitespace, must be non-default value (i.e. delimiter="\0").
+        Used by `colspecs="infer"` to determine column boundaries.
     infer_nrows : int, default 100
         The number of rows to consider when letting the parser determine the
         `colspecs`.