Skip to content

Commit 16a8902

Browse files
author
Tom Augspurger
committed
Merge pull request #7081 from TomAugspurger/fwf-slice
BUG: read_fwf colspec should treat None like slice
2 parents fa1f585 + b7b6b1f commit 16a8902

File tree

4 files changed

+35
-2
lines changed

4 files changed

+35
-2
lines changed

doc/source/io.rst

+2
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,8 @@ two extra parameters:
809809
String value 'infer' can be used to instruct the parser to try detecting
810810
the column specifications from the first 100 rows of the data. Default
811811
behaviour, if not specified, is to infer.
812+
As with regular python slices, you can slice to the end of the line
813+
with ``None``, e.g. ``colspecs = [(0, 1), (1, None)]``.
812814
- ``widths``: A list of field widths which can be used instead of 'colspecs'
813815
if the intervals are contiguous.
814816

doc/source/release.rst

+3
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,9 @@ Bug Fixes
479479
claim that they contained all the things (:issue:`7066`).
480480
- Bug in ``DataFrame.boxplot`` where it failed to use the axis passed as the ``ax`` argument (:issue:`3578`)
481481
- Bug in the ``XlsxWriter`` and ``XlwtWriter`` implementations that resulted in datetime columns being formatted without the time (:issue:`7075`)
482+
were being passed to plotting method
483+
- :func:`read_fwf` treats ``None`` in ``colspec`` like regular python slices. It now reads from the beginning
484+
or until the end of the line when ``colspec`` contains a ``None`` (previously raised a ``TypeError``)
482485

483486
pandas 0.13.1
484487
-------------

pandas/io/parsers.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -2232,10 +2232,11 @@ def __init__(self, f, colspecs, delimiter, comment):
22322232
"input was a %r" % type(colspecs).__name__)
22332233

22342234
for colspec in self.colspecs:
2235+
22352236
if not (isinstance(colspec, (tuple, list)) and
22362237
len(colspec) == 2 and
2237-
isinstance(colspec[0], (int, np.integer)) and
2238-
isinstance(colspec[1], (int, np.integer))):
2238+
isinstance(colspec[0], (int, np.integer, type(None))) and
2239+
isinstance(colspec[1], (int, np.integer, type(None)))):
22392240
raise TypeError('Each column specification must be '
22402241
'2 element tuple or list of integers')
22412242

pandas/io/tests/test_parsers.py

+27
Original file line numberDiff line numberDiff line change
@@ -2326,6 +2326,33 @@ def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(self):
23262326
'Each column specification must be.+'):
23272327
read_fwf(StringIO(self.data1), [('a', 1)])
23282328

2329+
def test_fwf_colspecs_None(self):
2330+
# GH 7079
2331+
data = """\
2332+
123456
2333+
456789
2334+
"""
2335+
colspecs = [(0, 3), (3, None)]
2336+
result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
2337+
expected = DataFrame([[123, 456], [456, 789]])
2338+
tm.assert_frame_equal(result, expected)
2339+
2340+
colspecs = [(None, 3), (3, 6)]
2341+
result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
2342+
expected = DataFrame([[123, 456], [456, 789]])
2343+
tm.assert_frame_equal(result, expected)
2344+
2345+
colspecs = [(0, None), (3, None)]
2346+
result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
2347+
expected = DataFrame([[123456, 456], [456789, 789]])
2348+
tm.assert_frame_equal(result, expected)
2349+
2350+
colspecs = [(None, None), (3, 6)]
2351+
result = read_fwf(StringIO(data), colspecs=colspecs, header=None)
2352+
expected = DataFrame([[123456, 456], [456789, 789]])
2353+
tm.assert_frame_equal(result, expected)
2354+
2355+
23292356
def test_fwf_regression(self):
23302357
# GH 3594
23312358
#### turns out 'T060' is parsable as a datetime slice!

0 commit comments

Comments
 (0)