Skip to content

Commit 590874d

Browse files
committed
BUG: Parse trailing NaN values for the Python parser
1 parent f3d7c18 commit 590874d

File tree

6 files changed

+51
-14
lines changed

6 files changed

+51
-14
lines changed

doc/source/whatsnew/v0.18.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ Bug Fixes
349349

350350

351351
- Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`)
352+
- Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`)
352353

353354

354355

pandas/io/parsers.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -2226,14 +2226,16 @@ def _get_index_name(self, columns):
22262226
return index_name, orig_names, columns
22272227

22282228
def _rows_to_cols(self, content):
2229-
zipped_content = list(lib.to_object_array(content).T)
2230-
22312229
col_len = self.num_original_columns
2232-
zip_len = len(zipped_content)
22332230

22342231
if self._implicit_index:
22352232
col_len += len(self.index_col)
22362233

2234+
# see gh-13320
2235+
zipped_content = list(lib.to_object_array(
2236+
content, min_width=col_len).T)
2237+
zip_len = len(zipped_content)
2238+
22372239
if self.skip_footer < 0:
22382240
raise ValueError('skip footer cannot be negative')
22392241

pandas/io/tests/parser/c_parser_only.py

-9
Original file line numberDiff line numberDiff line change
@@ -360,15 +360,6 @@ def test_raise_on_passed_int_dtype_with_nas(self):
360360
sep=",", skipinitialspace=True,
361361
dtype={'DOY': np.int64})
362362

363-
def test_na_trailing_columns(self):
364-
data = """Date,Currenncy,Symbol,Type,Units,UnitPrice,Cost,Tax
365-
2012-03-14,USD,AAPL,BUY,1000
366-
2012-05-12,USD,SBUX,SELL,500"""
367-
368-
result = self.read_csv(StringIO(data))
369-
self.assertEqual(result['Date'][1], '2012-05-12')
370-
self.assertTrue(result['UnitPrice'].isnull().all())
371-
372363
def test_parse_ragged_csv(self):
373364
data = """1,2,3
374365
1,2,3,4

pandas/io/tests/parser/na_values.py

+9
Original file line numberDiff line numberDiff line change
@@ -241,3 +241,12 @@ def test_na_values_na_filter_override(self):
241241
columns=['A', 'B'])
242242
out = self.read_csv(StringIO(data), na_values=['B'], na_filter=False)
243243
tm.assert_frame_equal(out, expected)
244+
245+
def test_na_trailing_columns(self):
246+
data = """Date,Currenncy,Symbol,Type,Units,UnitPrice,Cost,Tax
247+
2012-03-14,USD,AAPL,BUY,1000
248+
2012-05-12,USD,SBUX,SELL,500"""
249+
250+
result = self.read_csv(StringIO(data))
251+
self.assertEqual(result['Date'][1], '2012-05-12')
252+
self.assertTrue(result['UnitPrice'].isnull().all())

pandas/src/inference.pyx

+19-2
Original file line numberDiff line numberDiff line change
@@ -1132,15 +1132,32 @@ def map_infer(ndarray arr, object f, bint convert=1):
11321132
return result
11331133

11341134

1135-
def to_object_array(list rows):
1135+
def to_object_array(list rows, int min_width=0):
1136+
"""
1137+
Convert a list of lists into an object array.
1138+
1139+
Parameters
1140+
----------
1141+
rows : 2-d array (N, K)
1142+
A list of lists to be converted into an array
1143+
min_width : int
1144+
The minimum width of the object array. If a list
1145+
in `rows` contains fewer than `width` elements,
1146+
the remaining elements in the corresponding row
1147+
will all be `NaN`.
1148+
1149+
Returns
1150+
-------
1151+
obj_array : numpy array of the object dtype
1152+
"""
11361153
cdef:
11371154
Py_ssize_t i, j, n, k, tmp
11381155
ndarray[object, ndim=2] result
11391156
list row
11401157

11411158
n = len(rows)
11421159

1143-
k = 0
1160+
k = min_width
11441161
for i from 0 <= i < n:
11451162
tmp = len(rows[i])
11461163
if tmp > k:

pandas/tests/test_infer_and_convert.py

+17
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,23 @@ def test_to_object_array_tuples(self):
201201
except ImportError:
202202
pass
203203

204+
def test_to_object_array_width(self):
205+
# see gh-13320
206+
rows = [[1, 2, 3], [4, 5, 6]]
207+
208+
expected = np.array(rows, dtype=object)
209+
out = lib.to_object_array(rows)
210+
tm.assert_numpy_array_equal(out, expected)
211+
212+
expected = np.array(rows, dtype=object)
213+
out = lib.to_object_array(rows, min_width=1)
214+
tm.assert_numpy_array_equal(out, expected)
215+
216+
expected = np.array([[1, 2, 3, None, None],
217+
[4, 5, 6, None, None]], dtype=object)
218+
out = lib.to_object_array(rows, min_width=5)
219+
tm.assert_numpy_array_equal(out, expected)
220+
204221
def test_object(self):
205222

206223
# GH 7431

0 commit comments

Comments
 (0)