@@ -104,6 +104,9 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds) -> None:
104
104
self .decimal = kwds ["decimal" ]
105
105
106
106
self .comment = kwds ["comment" ]
107
+ ## GH51569
108
+ self .keep_whitespace = kwds .get ("keep_whitespace" )
109
+ self .whitespace_chars = kwds .get ("whitespace_chars" )
107
110
108
111
# Set self.data to something that can read lines.
109
112
if isinstance (f , list ):
@@ -1180,11 +1183,20 @@ def __init__(
1180
1183
comment : str | None ,
1181
1184
skiprows : set [int ] | None = None ,
1182
1185
infer_nrows : int = 100 ,
1186
+ ## GH51569
1187
+ keep_whitespace : bool | tuple [bool , bool ] = (False , False ),
1188
+ whitespace_chars : str = " \t " ,
1183
1189
) -> None :
1184
1190
self .f = f
1185
1191
self .buffer : Iterator | None = None
1186
1192
self .delimiter = "\r \n " + delimiter if delimiter else "\n \r \t "
1187
1193
self .comment = comment
1194
+ self .keep_whitespace = keep_whitespace
1195
+ ## Backwards compatibility means supporting delimiter:
1196
+ if delimiter :
1197
+ whitespace_chars = whitespace_chars + delimiter
1198
+ self .whitespace_chars = whitespace_chars
1199
+
1188
1200
if colspecs == "infer" :
1189
1201
self .colspecs = self .detect_colspecs (
1190
1202
infer_nrows = infer_nrows , skiprows = skiprows
@@ -1210,6 +1222,33 @@ def __init__(
1210
1222
"2 element tuple or list of integers"
1211
1223
)
1212
1224
1225
+ ## GH51569
1226
+ ## Accept boolean, but convert to tuple(bool,bool) for (left,right) of fields:
1227
+ if isinstance (self .keep_whitespace , bool ):
1228
+ self .keep_whitespace = (keep_whitespace , keep_whitespace )
1229
+ ## Ensure tuple is (bool,bool):
1230
+ if (
1231
+ isinstance (self .keep_whitespace , tuple )
1232
+ and len (self .keep_whitespace ) == 2
1233
+ and isinstance (self .keep_whitespace [0 ], bool )
1234
+ and isinstance (self .keep_whitespace [1 ], bool )
1235
+ ):
1236
+ # Define custom lstrip & rstrip *once*, at __init__:
1237
+ if self .keep_whitespace [0 ] is True :
1238
+ self .ltrim = lambda x : x
1239
+ else :
1240
+ self .ltrim = lambda x : x .lstrip (self .whitespace_chars )
1241
+ if self .keep_whitespace [1 ] is True :
1242
+ self .rtrim = lambda x : x
1243
+ else :
1244
+ self .rtrim = lambda x : x .rstrip (self .whitespace_chars )
1245
+ else :
1246
+ raise ValueError (
1247
+ "'keep_whitespace' must be a bool or tuple(bool,bool)."
1248
+ f"\n Received '{ type (self .keep_whitespace ).__name__ } ': "
1249
+ f"'{ self .keep_whitespace } '."
1250
+ )
1251
+
1213
1252
def get_rows (self , infer_nrows : int , skiprows : set [int ] | None = None ) -> list [str ]:
1214
1253
"""
1215
1254
Read rows from self.f, skipping as specified.
@@ -1281,8 +1320,14 @@ def __next__(self) -> list[str]:
1281
1320
line = next (self .f ) # type: ignore[arg-type]
1282
1321
else :
1283
1322
line = next (self .f ) # type: ignore[arg-type]
1323
+
1324
+ line = line .rstrip ("\r \n " )
1325
+
1284
1326
# Note: 'colspecs' is a sequence of half-open intervals.
1285
- return [line [from_ :to ].strip (self .delimiter ) for (from_ , to ) in self .colspecs ]
1327
+ return [self .ltrim (self .rtrim (line [from_ :to ])) for (from_ , to ) in self .colspecs ]
1328
+
1329
+
1330
+ # return [line[from_:to].strip(self.delimiter) for (from_, to) in self.colspecs]
1286
1331
1287
1332
1288
1333
class FixedWidthFieldParser (PythonParser ):
@@ -1305,6 +1350,9 @@ def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> None:
1305
1350
self .comment ,
1306
1351
self .skiprows ,
1307
1352
self .infer_nrows ,
1353
+ ## GH51569
1354
+ self .keep_whitespace ,
1355
+ self .whitespace_chars ,
1308
1356
)
1309
1357
1310
1358
def _remove_empty_lines (self , lines : list [list [Scalar ]]) -> list [list [Scalar ]]:
0 commit comments