Skip to content

Commit ed4cd3a

Browse files
gfyoungjreback
authored andcommitted
TST: Parser tests refactoring
1) Moved no columns test from CParser-only to `common.py` 2) Moved erroneous placed skiprows tests into their proper place Author: gfyoung <[email protected]> Closes pandas-dev#13319 from gfyoung/test-parsers-refactor and squashes the following commits: bc1402e [gfyoung] TST: Parser tests refactoring
1 parent 721be62 commit ed4cd3a

File tree

4 files changed

+123
-123
lines changed

4 files changed

+123
-123
lines changed

pandas/io/tests/parser/c_parser_only.py

-9
Original file line numberDiff line numberDiff line change
@@ -419,15 +419,6 @@ def test_tokenize_CR_with_quoting(self):
419419
expected = self.read_csv(StringIO(data.replace('\r', '\n')))
420420
tm.assert_frame_equal(result, expected)
421421

422-
def test_raise_on_no_columns(self):
423-
# single newline
424-
data = "\n"
425-
self.assertRaises(ValueError, self.read_csv, StringIO(data))
426-
427-
# test with more than a single newline
428-
data = "\n\n\n"
429-
self.assertRaises(ValueError, self.read_csv, StringIO(data))
430-
431422
def test_grow_boundary_at_cap(self):
432423
# See gh-12494
433424
#

pandas/io/tests/parser/common.py

+9
Original file line numberDiff line numberDiff line change
@@ -1323,3 +1323,12 @@ def test_inf_parsing(self):
13231323
# TODO: remove condition when 'na_filter' is supported for Python
13241324
df = self.read_csv(StringIO(data), index_col=0, na_filter=False)
13251325
tm.assert_almost_equal(df['A'].values, expected.values)
1326+
1327+
def test_raise_on_no_columns(self):
1328+
# single newline
1329+
data = "\n"
1330+
self.assertRaises(EmptyDataError, self.read_csv, StringIO(data))
1331+
1332+
# test with more than a single newline
1333+
data = "\n\n\n"
1334+
self.assertRaises(EmptyDataError, self.read_csv, StringIO(data))

pandas/io/tests/parser/na_values.py

-114
Original file line numberDiff line numberDiff line change
@@ -223,117 +223,3 @@ def test_na_values_keep_default(self):
223223
'Three': ['None', 'two', 'None', 'nan', 'five', '',
224224
'seven']})
225225
tm.assert_frame_equal(xp.reindex(columns=df.columns), df)
226-
227-
def test_skiprow_with_newline(self):
228-
# see gh-12775 and gh-10911
229-
data = """id,text,num_lines
230-
1,"line 11
231-
line 12",2
232-
2,"line 21
233-
line 22",2
234-
3,"line 31",1"""
235-
expected = [[2, 'line 21\nline 22', 2],
236-
[3, 'line 31', 1]]
237-
expected = DataFrame(expected, columns=[
238-
'id', 'text', 'num_lines'])
239-
df = self.read_csv(StringIO(data), skiprows=[1])
240-
tm.assert_frame_equal(df, expected)
241-
242-
data = ('a,b,c\n~a\n b~,~e\n d~,'
243-
'~f\n f~\n1,2,~12\n 13\n 14~')
244-
expected = [['a\n b', 'e\n d', 'f\n f']]
245-
expected = DataFrame(expected, columns=[
246-
'a', 'b', 'c'])
247-
df = self.read_csv(StringIO(data),
248-
quotechar="~",
249-
skiprows=[2])
250-
tm.assert_frame_equal(df, expected)
251-
252-
data = ('Text,url\n~example\n '
253-
'sentence\n one~,url1\n~'
254-
'example\n sentence\n two~,url2\n~'
255-
'example\n sentence\n three~,url3')
256-
expected = [['example\n sentence\n two', 'url2']]
257-
expected = DataFrame(expected, columns=[
258-
'Text', 'url'])
259-
df = self.read_csv(StringIO(data),
260-
quotechar="~",
261-
skiprows=[1, 3])
262-
tm.assert_frame_equal(df, expected)
263-
264-
def test_skiprow_with_quote(self):
265-
# see gh-12775 and gh-10911
266-
data = """id,text,num_lines
267-
1,"line '11' line 12",2
268-
2,"line '21' line 22",2
269-
3,"line '31' line 32",1"""
270-
expected = [[2, "line '21' line 22", 2],
271-
[3, "line '31' line 32", 1]]
272-
expected = DataFrame(expected, columns=[
273-
'id', 'text', 'num_lines'])
274-
df = self.read_csv(StringIO(data), skiprows=[1])
275-
tm.assert_frame_equal(df, expected)
276-
277-
def test_skiprow_with_newline_and_quote(self):
278-
# see gh-12775 and gh-10911
279-
data = """id,text,num_lines
280-
1,"line \n'11' line 12",2
281-
2,"line \n'21' line 22",2
282-
3,"line \n'31' line 32",1"""
283-
expected = [[2, "line \n'21' line 22", 2],
284-
[3, "line \n'31' line 32", 1]]
285-
expected = DataFrame(expected, columns=[
286-
'id', 'text', 'num_lines'])
287-
df = self.read_csv(StringIO(data), skiprows=[1])
288-
tm.assert_frame_equal(df, expected)
289-
290-
data = """id,text,num_lines
291-
1,"line '11\n' line 12",2
292-
2,"line '21\n' line 22",2
293-
3,"line '31\n' line 32",1"""
294-
expected = [[2, "line '21\n' line 22", 2],
295-
[3, "line '31\n' line 32", 1]]
296-
expected = DataFrame(expected, columns=[
297-
'id', 'text', 'num_lines'])
298-
df = self.read_csv(StringIO(data), skiprows=[1])
299-
tm.assert_frame_equal(df, expected)
300-
301-
data = """id,text,num_lines
302-
1,"line '11\n' \r\tline 12",2
303-
2,"line '21\n' \r\tline 22",2
304-
3,"line '31\n' \r\tline 32",1"""
305-
expected = [[2, "line '21\n' \r\tline 22", 2],
306-
[3, "line '31\n' \r\tline 32", 1]]
307-
expected = DataFrame(expected, columns=[
308-
'id', 'text', 'num_lines'])
309-
df = self.read_csv(StringIO(data), skiprows=[1])
310-
tm.assert_frame_equal(df, expected)
311-
312-
def test_skiprows_lineterminator(self):
313-
# see gh-9079
314-
data = '\n'.join(['SMOSMANIA ThetaProbe-ML2X ',
315-
'2007/01/01 01:00 0.2140 U M ',
316-
'2007/01/01 02:00 0.2141 M O ',
317-
'2007/01/01 04:00 0.2142 D M '])
318-
expected = DataFrame([['2007/01/01', '01:00', 0.2140, 'U', 'M'],
319-
['2007/01/01', '02:00', 0.2141, 'M', 'O'],
320-
['2007/01/01', '04:00', 0.2142, 'D', 'M']],
321-
columns=['date', 'time', 'var', 'flag',
322-
'oflag'])
323-
324-
# test with default line terminators "LF" and "CRLF"
325-
df = self.read_csv(StringIO(data), skiprows=1, delim_whitespace=True,
326-
names=['date', 'time', 'var', 'flag', 'oflag'])
327-
tm.assert_frame_equal(df, expected)
328-
329-
df = self.read_csv(StringIO(data.replace('\n', '\r\n')),
330-
skiprows=1, delim_whitespace=True,
331-
names=['date', 'time', 'var', 'flag', 'oflag'])
332-
tm.assert_frame_equal(df, expected)
333-
334-
# "CR" is not respected with the Python parser yet
335-
if self.engine == 'c':
336-
df = self.read_csv(StringIO(data.replace('\n', '\r')),
337-
skiprows=1, delim_whitespace=True,
338-
names=['date', 'time', 'var', 'flag', 'oflag'])
339-
tm.assert_frame_equal(df, expected)

pandas/io/tests/parser/skiprows.py

+114
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,117 @@ def test_skiprows_blank(self):
7676
datetime(2000, 1, 3)])
7777
expected.index.name = 0
7878
tm.assert_frame_equal(data, expected)
79+
80+
def test_skiprow_with_newline(self):
81+
# see gh-12775 and gh-10911
82+
data = """id,text,num_lines
83+
1,"line 11
84+
line 12",2
85+
2,"line 21
86+
line 22",2
87+
3,"line 31",1"""
88+
expected = [[2, 'line 21\nline 22', 2],
89+
[3, 'line 31', 1]]
90+
expected = DataFrame(expected, columns=[
91+
'id', 'text', 'num_lines'])
92+
df = self.read_csv(StringIO(data), skiprows=[1])
93+
tm.assert_frame_equal(df, expected)
94+
95+
data = ('a,b,c\n~a\n b~,~e\n d~,'
96+
'~f\n f~\n1,2,~12\n 13\n 14~')
97+
expected = [['a\n b', 'e\n d', 'f\n f']]
98+
expected = DataFrame(expected, columns=[
99+
'a', 'b', 'c'])
100+
df = self.read_csv(StringIO(data),
101+
quotechar="~",
102+
skiprows=[2])
103+
tm.assert_frame_equal(df, expected)
104+
105+
data = ('Text,url\n~example\n '
106+
'sentence\n one~,url1\n~'
107+
'example\n sentence\n two~,url2\n~'
108+
'example\n sentence\n three~,url3')
109+
expected = [['example\n sentence\n two', 'url2']]
110+
expected = DataFrame(expected, columns=[
111+
'Text', 'url'])
112+
df = self.read_csv(StringIO(data),
113+
quotechar="~",
114+
skiprows=[1, 3])
115+
tm.assert_frame_equal(df, expected)
116+
117+
def test_skiprow_with_quote(self):
118+
# see gh-12775 and gh-10911
119+
data = """id,text,num_lines
120+
1,"line '11' line 12",2
121+
2,"line '21' line 22",2
122+
3,"line '31' line 32",1"""
123+
expected = [[2, "line '21' line 22", 2],
124+
[3, "line '31' line 32", 1]]
125+
expected = DataFrame(expected, columns=[
126+
'id', 'text', 'num_lines'])
127+
df = self.read_csv(StringIO(data), skiprows=[1])
128+
tm.assert_frame_equal(df, expected)
129+
130+
def test_skiprow_with_newline_and_quote(self):
131+
# see gh-12775 and gh-10911
132+
data = """id,text,num_lines
133+
1,"line \n'11' line 12",2
134+
2,"line \n'21' line 22",2
135+
3,"line \n'31' line 32",1"""
136+
expected = [[2, "line \n'21' line 22", 2],
137+
[3, "line \n'31' line 32", 1]]
138+
expected = DataFrame(expected, columns=[
139+
'id', 'text', 'num_lines'])
140+
df = self.read_csv(StringIO(data), skiprows=[1])
141+
tm.assert_frame_equal(df, expected)
142+
143+
data = """id,text,num_lines
144+
1,"line '11\n' line 12",2
145+
2,"line '21\n' line 22",2
146+
3,"line '31\n' line 32",1"""
147+
expected = [[2, "line '21\n' line 22", 2],
148+
[3, "line '31\n' line 32", 1]]
149+
expected = DataFrame(expected, columns=[
150+
'id', 'text', 'num_lines'])
151+
df = self.read_csv(StringIO(data), skiprows=[1])
152+
tm.assert_frame_equal(df, expected)
153+
154+
data = """id,text,num_lines
155+
1,"line '11\n' \r\tline 12",2
156+
2,"line '21\n' \r\tline 22",2
157+
3,"line '31\n' \r\tline 32",1"""
158+
expected = [[2, "line '21\n' \r\tline 22", 2],
159+
[3, "line '31\n' \r\tline 32", 1]]
160+
expected = DataFrame(expected, columns=[
161+
'id', 'text', 'num_lines'])
162+
df = self.read_csv(StringIO(data), skiprows=[1])
163+
tm.assert_frame_equal(df, expected)
164+
165+
def test_skiprows_lineterminator(self):
166+
# see gh-9079
167+
data = '\n'.join(['SMOSMANIA ThetaProbe-ML2X ',
168+
'2007/01/01 01:00 0.2140 U M ',
169+
'2007/01/01 02:00 0.2141 M O ',
170+
'2007/01/01 04:00 0.2142 D M '])
171+
expected = DataFrame([['2007/01/01', '01:00', 0.2140, 'U', 'M'],
172+
['2007/01/01', '02:00', 0.2141, 'M', 'O'],
173+
['2007/01/01', '04:00', 0.2142, 'D', 'M']],
174+
columns=['date', 'time', 'var', 'flag',
175+
'oflag'])
176+
177+
# test with default line terminators "LF" and "CRLF"
178+
df = self.read_csv(StringIO(data), skiprows=1, delim_whitespace=True,
179+
names=['date', 'time', 'var', 'flag', 'oflag'])
180+
tm.assert_frame_equal(df, expected)
181+
182+
df = self.read_csv(StringIO(data.replace('\n', '\r\n')),
183+
skiprows=1, delim_whitespace=True,
184+
names=['date', 'time', 'var', 'flag', 'oflag'])
185+
tm.assert_frame_equal(df, expected)
186+
187+
# "CR" is not respected with the Python parser yet
188+
if self.engine == 'c':
189+
df = self.read_csv(StringIO(data.replace('\n', '\r')),
190+
skiprows=1, delim_whitespace=True,
191+
names=['date', 'time', 'var', 'flag', 'oflag'])
192+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)