Skip to content

Commit bc1402e

Browse files
committed
TST: Parser tests refactoring
1) Moved no columns test from CParser-only to common.py 2) Moved erroneous placed skiprows tests into their proper place
1 parent 70be8a9 commit bc1402e

File tree

4 files changed

+123
-123
lines changed

4 files changed

+123
-123
lines changed

pandas/io/tests/parser/c_parser_only.py

-9
Original file line numberDiff line numberDiff line change
@@ -419,15 +419,6 @@ def test_tokenize_CR_with_quoting(self):
419419
expected = self.read_csv(StringIO(data.replace('\r', '\n')))
420420
tm.assert_frame_equal(result, expected)
421421

422-
def test_raise_on_no_columns(self):
423-
# single newline
424-
data = "\n"
425-
self.assertRaises(ValueError, self.read_csv, StringIO(data))
426-
427-
# test with more than a single newline
428-
data = "\n\n\n"
429-
self.assertRaises(ValueError, self.read_csv, StringIO(data))
430-
431422
def test_grow_boundary_at_cap(self):
432423
# See gh-12494
433424
#

pandas/io/tests/parser/common.py

+9
Original file line numberDiff line numberDiff line change
@@ -1323,3 +1323,12 @@ def test_inf_parsing(self):
13231323
# TODO: remove condition when 'na_filter' is supported for Python
13241324
df = self.read_csv(StringIO(data), index_col=0, na_filter=False)
13251325
tm.assert_almost_equal(df['A'].values, expected.values)
1326+
1327+
def test_raise_on_no_columns(self):
1328+
# single newline
1329+
data = "\n"
1330+
self.assertRaises(EmptyDataError, self.read_csv, StringIO(data))
1331+
1332+
# test with more than a single newline
1333+
data = "\n\n\n"
1334+
self.assertRaises(EmptyDataError, self.read_csv, StringIO(data))

pandas/io/tests/parser/na_values.py

-114
Original file line numberDiff line numberDiff line change
@@ -250,117 +250,3 @@ def test_na_values_keep_default(self):
250250
'Three': ['None', 'two', 'None', 'nan', 'five', '',
251251
'seven']})
252252
tm.assert_frame_equal(xp.reindex(columns=df.columns), df)
253-
254-
def test_skiprow_with_newline(self):
255-
# see gh-12775 and gh-10911
256-
data = """id,text,num_lines
257-
1,"line 11
258-
line 12",2
259-
2,"line 21
260-
line 22",2
261-
3,"line 31",1"""
262-
expected = [[2, 'line 21\nline 22', 2],
263-
[3, 'line 31', 1]]
264-
expected = DataFrame(expected, columns=[
265-
'id', 'text', 'num_lines'])
266-
df = self.read_csv(StringIO(data), skiprows=[1])
267-
tm.assert_frame_equal(df, expected)
268-
269-
data = ('a,b,c\n~a\n b~,~e\n d~,'
270-
'~f\n f~\n1,2,~12\n 13\n 14~')
271-
expected = [['a\n b', 'e\n d', 'f\n f']]
272-
expected = DataFrame(expected, columns=[
273-
'a', 'b', 'c'])
274-
df = self.read_csv(StringIO(data),
275-
quotechar="~",
276-
skiprows=[2])
277-
tm.assert_frame_equal(df, expected)
278-
279-
data = ('Text,url\n~example\n '
280-
'sentence\n one~,url1\n~'
281-
'example\n sentence\n two~,url2\n~'
282-
'example\n sentence\n three~,url3')
283-
expected = [['example\n sentence\n two', 'url2']]
284-
expected = DataFrame(expected, columns=[
285-
'Text', 'url'])
286-
df = self.read_csv(StringIO(data),
287-
quotechar="~",
288-
skiprows=[1, 3])
289-
tm.assert_frame_equal(df, expected)
290-
291-
def test_skiprow_with_quote(self):
292-
# see gh-12775 and gh-10911
293-
data = """id,text,num_lines
294-
1,"line '11' line 12",2
295-
2,"line '21' line 22",2
296-
3,"line '31' line 32",1"""
297-
expected = [[2, "line '21' line 22", 2],
298-
[3, "line '31' line 32", 1]]
299-
expected = DataFrame(expected, columns=[
300-
'id', 'text', 'num_lines'])
301-
df = self.read_csv(StringIO(data), skiprows=[1])
302-
tm.assert_frame_equal(df, expected)
303-
304-
def test_skiprow_with_newline_and_quote(self):
305-
# see gh-12775 and gh-10911
306-
data = """id,text,num_lines
307-
1,"line \n'11' line 12",2
308-
2,"line \n'21' line 22",2
309-
3,"line \n'31' line 32",1"""
310-
expected = [[2, "line \n'21' line 22", 2],
311-
[3, "line \n'31' line 32", 1]]
312-
expected = DataFrame(expected, columns=[
313-
'id', 'text', 'num_lines'])
314-
df = self.read_csv(StringIO(data), skiprows=[1])
315-
tm.assert_frame_equal(df, expected)
316-
317-
data = """id,text,num_lines
318-
1,"line '11\n' line 12",2
319-
2,"line '21\n' line 22",2
320-
3,"line '31\n' line 32",1"""
321-
expected = [[2, "line '21\n' line 22", 2],
322-
[3, "line '31\n' line 32", 1]]
323-
expected = DataFrame(expected, columns=[
324-
'id', 'text', 'num_lines'])
325-
df = self.read_csv(StringIO(data), skiprows=[1])
326-
tm.assert_frame_equal(df, expected)
327-
328-
data = """id,text,num_lines
329-
1,"line '11\n' \r\tline 12",2
330-
2,"line '21\n' \r\tline 22",2
331-
3,"line '31\n' \r\tline 32",1"""
332-
expected = [[2, "line '21\n' \r\tline 22", 2],
333-
[3, "line '31\n' \r\tline 32", 1]]
334-
expected = DataFrame(expected, columns=[
335-
'id', 'text', 'num_lines'])
336-
df = self.read_csv(StringIO(data), skiprows=[1])
337-
tm.assert_frame_equal(df, expected)
338-
339-
def test_skiprows_lineterminator(self):
340-
# see gh-9079
341-
data = '\n'.join(['SMOSMANIA ThetaProbe-ML2X ',
342-
'2007/01/01 01:00 0.2140 U M ',
343-
'2007/01/01 02:00 0.2141 M O ',
344-
'2007/01/01 04:00 0.2142 D M '])
345-
expected = DataFrame([['2007/01/01', '01:00', 0.2140, 'U', 'M'],
346-
['2007/01/01', '02:00', 0.2141, 'M', 'O'],
347-
['2007/01/01', '04:00', 0.2142, 'D', 'M']],
348-
columns=['date', 'time', 'var', 'flag',
349-
'oflag'])
350-
351-
# test with default line terminators "LF" and "CRLF"
352-
df = self.read_csv(StringIO(data), skiprows=1, delim_whitespace=True,
353-
names=['date', 'time', 'var', 'flag', 'oflag'])
354-
tm.assert_frame_equal(df, expected)
355-
356-
df = self.read_csv(StringIO(data.replace('\n', '\r\n')),
357-
skiprows=1, delim_whitespace=True,
358-
names=['date', 'time', 'var', 'flag', 'oflag'])
359-
tm.assert_frame_equal(df, expected)
360-
361-
# "CR" is not respected with the Python parser yet
362-
if self.engine == 'c':
363-
df = self.read_csv(StringIO(data.replace('\n', '\r')),
364-
skiprows=1, delim_whitespace=True,
365-
names=['date', 'time', 'var', 'flag', 'oflag'])
366-
tm.assert_frame_equal(df, expected)

pandas/io/tests/parser/skiprows.py

+114
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,117 @@ def test_skiprows_blank(self):
7676
datetime(2000, 1, 3)])
7777
expected.index.name = 0
7878
tm.assert_frame_equal(data, expected)
79+
80+
def test_skiprow_with_newline(self):
81+
# see gh-12775 and gh-10911
82+
data = """id,text,num_lines
83+
1,"line 11
84+
line 12",2
85+
2,"line 21
86+
line 22",2
87+
3,"line 31",1"""
88+
expected = [[2, 'line 21\nline 22', 2],
89+
[3, 'line 31', 1]]
90+
expected = DataFrame(expected, columns=[
91+
'id', 'text', 'num_lines'])
92+
df = self.read_csv(StringIO(data), skiprows=[1])
93+
tm.assert_frame_equal(df, expected)
94+
95+
data = ('a,b,c\n~a\n b~,~e\n d~,'
96+
'~f\n f~\n1,2,~12\n 13\n 14~')
97+
expected = [['a\n b', 'e\n d', 'f\n f']]
98+
expected = DataFrame(expected, columns=[
99+
'a', 'b', 'c'])
100+
df = self.read_csv(StringIO(data),
101+
quotechar="~",
102+
skiprows=[2])
103+
tm.assert_frame_equal(df, expected)
104+
105+
data = ('Text,url\n~example\n '
106+
'sentence\n one~,url1\n~'
107+
'example\n sentence\n two~,url2\n~'
108+
'example\n sentence\n three~,url3')
109+
expected = [['example\n sentence\n two', 'url2']]
110+
expected = DataFrame(expected, columns=[
111+
'Text', 'url'])
112+
df = self.read_csv(StringIO(data),
113+
quotechar="~",
114+
skiprows=[1, 3])
115+
tm.assert_frame_equal(df, expected)
116+
117+
def test_skiprow_with_quote(self):
118+
# see gh-12775 and gh-10911
119+
data = """id,text,num_lines
120+
1,"line '11' line 12",2
121+
2,"line '21' line 22",2
122+
3,"line '31' line 32",1"""
123+
expected = [[2, "line '21' line 22", 2],
124+
[3, "line '31' line 32", 1]]
125+
expected = DataFrame(expected, columns=[
126+
'id', 'text', 'num_lines'])
127+
df = self.read_csv(StringIO(data), skiprows=[1])
128+
tm.assert_frame_equal(df, expected)
129+
130+
def test_skiprow_with_newline_and_quote(self):
131+
# see gh-12775 and gh-10911
132+
data = """id,text,num_lines
133+
1,"line \n'11' line 12",2
134+
2,"line \n'21' line 22",2
135+
3,"line \n'31' line 32",1"""
136+
expected = [[2, "line \n'21' line 22", 2],
137+
[3, "line \n'31' line 32", 1]]
138+
expected = DataFrame(expected, columns=[
139+
'id', 'text', 'num_lines'])
140+
df = self.read_csv(StringIO(data), skiprows=[1])
141+
tm.assert_frame_equal(df, expected)
142+
143+
data = """id,text,num_lines
144+
1,"line '11\n' line 12",2
145+
2,"line '21\n' line 22",2
146+
3,"line '31\n' line 32",1"""
147+
expected = [[2, "line '21\n' line 22", 2],
148+
[3, "line '31\n' line 32", 1]]
149+
expected = DataFrame(expected, columns=[
150+
'id', 'text', 'num_lines'])
151+
df = self.read_csv(StringIO(data), skiprows=[1])
152+
tm.assert_frame_equal(df, expected)
153+
154+
data = """id,text,num_lines
155+
1,"line '11\n' \r\tline 12",2
156+
2,"line '21\n' \r\tline 22",2
157+
3,"line '31\n' \r\tline 32",1"""
158+
expected = [[2, "line '21\n' \r\tline 22", 2],
159+
[3, "line '31\n' \r\tline 32", 1]]
160+
expected = DataFrame(expected, columns=[
161+
'id', 'text', 'num_lines'])
162+
df = self.read_csv(StringIO(data), skiprows=[1])
163+
tm.assert_frame_equal(df, expected)
164+
165+
def test_skiprows_lineterminator(self):
166+
# see gh-9079
167+
data = '\n'.join(['SMOSMANIA ThetaProbe-ML2X ',
168+
'2007/01/01 01:00 0.2140 U M ',
169+
'2007/01/01 02:00 0.2141 M O ',
170+
'2007/01/01 04:00 0.2142 D M '])
171+
expected = DataFrame([['2007/01/01', '01:00', 0.2140, 'U', 'M'],
172+
['2007/01/01', '02:00', 0.2141, 'M', 'O'],
173+
['2007/01/01', '04:00', 0.2142, 'D', 'M']],
174+
columns=['date', 'time', 'var', 'flag',
175+
'oflag'])
176+
177+
# test with default line terminators "LF" and "CRLF"
178+
df = self.read_csv(StringIO(data), skiprows=1, delim_whitespace=True,
179+
names=['date', 'time', 'var', 'flag', 'oflag'])
180+
tm.assert_frame_equal(df, expected)
181+
182+
df = self.read_csv(StringIO(data.replace('\n', '\r\n')),
183+
skiprows=1, delim_whitespace=True,
184+
names=['date', 'time', 'var', 'flag', 'oflag'])
185+
tm.assert_frame_equal(df, expected)
186+
187+
# "CR" is not respected with the Python parser yet
188+
if self.engine == 'c':
189+
df = self.read_csv(StringIO(data.replace('\n', '\r')),
190+
skiprows=1, delim_whitespace=True,
191+
names=['date', 'time', 'var', 'flag', 'oflag'])
192+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)