@@ -1181,10 +1181,13 @@ def __init__(
1181
1181
comment : str | None ,
1182
1182
skiprows : set [int ] | None = None ,
1183
1183
infer_nrows : int = 100 ,
1184
+ keep_whitespace : bool | None = True ,
1185
+ whitespace_chars : str | None = " \t " ,
1184
1186
) -> None :
1185
1187
self .f = f
1186
1188
self .buffer : Iterator | None = None
1187
- self .delimiter = "\r \n " + delimiter if delimiter else "\n \r \t "
1189
+ self .keep_whitespace = keep_whitespace
1190
+ self .whitespace_chars = whitespace_chars
1188
1191
self .comment = comment
1189
1192
if colspecs == "infer" :
1190
1193
self .colspecs = self .detect_colspecs (
@@ -1211,6 +1214,36 @@ def __init__(
1211
1214
"2 element tuple or list of integers"
1212
1215
)
1213
1216
1217
+ if not isinstance (self .keep_whitespace , bool ):
1218
+ raise TypeError (
1219
+ "keep_whitespace must be type bool (True or False), "
1220
+ f"input was type { type (self .keep_whitespace ).__name__ } : "
1221
+ f'"{ self .keep_whitespace } "'
1222
+ )
1223
+ if delimiter :
1224
+ ## Delimiters in fixed-width files removed:
1225
+ ## use colspecs, widths, or read_table()
1226
+ import warnings
1227
+
1228
+ ## See link regarding fixing anti-patterns & unexpected default behaviour:
1229
+ ## https://github.com/pandas-dev/pandas/pull/49832#discussion_r1030615937
1230
+ ##
1231
+ ## Deprecation warnings ignored by default, show them:
1232
+ warnings .simplefilter ("always" )
1233
+ warnings .formatwarning = (
1234
+ lambda msg , cat , file , line , args1 : f"NOTICE:\n { msg } \n \n "
1235
+ f'{ cat } \n File "{ file } ", line { line } '
1236
+ "in FixedWidthReader.__init__\n "
1237
+ )
1238
+ warnings .warn (
1239
+ (
1240
+ "Delimiters are deprecated in fixed-width files "
1241
+ + "- use colspecs or widths\n "
1242
+ + "See keep_whitespace in read_fwf(), also see read_table()."
1243
+ ),
1244
+ DeprecationWarning ,
1245
+ )
1246
+
1214
1247
def get_rows (self , infer_nrows : int , skiprows : set [int ] | None = None ) -> list [str ]:
1215
1248
"""
1216
1249
Read rows from self.f, skipping as specified.
@@ -1283,7 +1316,14 @@ def __next__(self) -> list[str]:
1283
1316
else :
1284
1317
line = next (self .f ) # type: ignore[arg-type]
1285
1318
# Note: 'colspecs' is a sequence of half-open intervals.
1286
- return [line [from_ :to ].strip (self .delimiter ) for (from_ , to ) in self .colspecs ]
1319
+ line = line .rstrip ("\r \n " )
1320
+ if self .keep_whitespace :
1321
+ return [line [from_ :to ] for (from_ , to ) in self .colspecs ]
1322
+ else :
1323
+ return [
1324
+ line [from_ :to ].strip (self .whitespace_chars )
1325
+ for (from_ , to ) in self .colspecs
1326
+ ]
1287
1327
1288
1328
1289
1329
class FixedWidthFieldParser (PythonParser ):
@@ -1296,6 +1336,8 @@ def __init__(self, f: ReadCsvBuffer[str], **kwds) -> None:
1296
1336
# Support iterators, convert to a list.
1297
1337
self .colspecs = kwds .pop ("colspecs" )
1298
1338
self .infer_nrows = kwds .pop ("infer_nrows" )
1339
+ self .keep_whitespace = kwds .pop ("keep_whitespace" , True )
1340
+ self .whitespace_chars = kwds .pop ("whitespace_chars" , " \t " )
1299
1341
PythonParser .__init__ (self , f , ** kwds )
1300
1342
1301
1343
def _make_reader (self , f : IO [str ] | ReadCsvBuffer [str ]) -> None :
@@ -1306,6 +1348,8 @@ def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> None:
1306
1348
self .comment ,
1307
1349
self .skiprows ,
1308
1350
self .infer_nrows ,
1351
+ self .keep_whitespace ,
1352
+ self .whitespace_chars ,
1309
1353
)
1310
1354
1311
1355
def _remove_empty_lines (self , lines : list [list [Scalar ]]) -> list [list [Scalar ]]:
0 commit comments