Skip to content

Commit 8568aed

Browse files
committed
TST: Read bz2 files from S3 in PY2
Addresses #14874
1 parent 09dcbff commit 8568aed

File tree

1 file changed

+24
-43
lines changed

1 file changed

+24
-43
lines changed

pandas/io/tests/parser/test_network.py

+24-43
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
import pandas.util.testing as tm
1414
from pandas import DataFrame
15-
from pandas import compat
1615
from pandas.io.parsers import read_csv, read_table
1716

1817

@@ -64,18 +63,12 @@ def setUp(self):
6463
@tm.network
6564
def test_parse_public_s3_bucket(self):
6665
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
67-
if comp == 'bz2' and compat.PY2:
68-
# The Python 2 C parser can't read bz2 from S3.
69-
self.assertRaises(ValueError, read_csv,
70-
's3://pandas-test/tips.csv' + ext,
71-
compression=comp)
72-
else:
73-
df = read_csv('s3://pandas-test/tips.csv' +
74-
ext, compression=comp)
75-
self.assertTrue(isinstance(df, DataFrame))
76-
self.assertFalse(df.empty)
77-
tm.assert_frame_equal(read_csv(
78-
tm.get_data_path('tips.csv')), df)
66+
df = read_csv('s3://pandas-test/tips.csv' +
67+
ext, compression=comp)
68+
self.assertTrue(isinstance(df, DataFrame))
69+
self.assertFalse(df.empty)
70+
tm.assert_frame_equal(read_csv(
71+
tm.get_data_path('tips.csv')), df)
7972

8073
# Read public file from bucket with not-public contents
8174
df = read_csv('s3://cant_get_it/tips.csv')
@@ -104,43 +97,31 @@ def test_parse_public_s3a_bucket(self):
10497
@tm.network
10598
def test_parse_public_s3_bucket_nrows(self):
10699
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
107-
if comp == 'bz2' and compat.PY2:
108-
# The Python 2 C parser can't read bz2 from S3.
109-
self.assertRaises(ValueError, read_csv,
110-
's3://pandas-test/tips.csv' + ext,
111-
compression=comp)
112-
else:
113-
df = read_csv('s3://pandas-test/tips.csv' +
114-
ext, nrows=10, compression=comp)
115-
self.assertTrue(isinstance(df, DataFrame))
116-
self.assertFalse(df.empty)
117-
tm.assert_frame_equal(read_csv(
118-
tm.get_data_path('tips.csv')).iloc[:10], df)
100+
df = read_csv('s3://pandas-test/tips.csv' +
101+
ext, nrows=10, compression=comp)
102+
self.assertTrue(isinstance(df, DataFrame))
103+
self.assertFalse(df.empty)
104+
tm.assert_frame_equal(read_csv(
105+
tm.get_data_path('tips.csv')).iloc[:10], df)
119106

120107
@tm.network
121108
def test_parse_public_s3_bucket_chunked(self):
122109
# Read with a chunksize
123110
chunksize = 5
124111
local_tips = read_csv(tm.get_data_path('tips.csv'))
125112
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
126-
if comp == 'bz2' and compat.PY2:
127-
# The Python 2 C parser can't read bz2 from S3.
128-
self.assertRaises(ValueError, read_csv,
129-
's3://pandas-test/tips.csv' + ext,
130-
compression=comp)
131-
else:
132-
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
133-
chunksize=chunksize, compression=comp)
134-
self.assertEqual(df_reader.chunksize, chunksize)
135-
for i_chunk in [0, 1, 2]:
136-
# Read a couple of chunks and make sure we see them
137-
# properly.
138-
df = df_reader.get_chunk()
139-
self.assertTrue(isinstance(df, DataFrame))
140-
self.assertFalse(df.empty)
141-
true_df = local_tips.iloc[
142-
chunksize * i_chunk: chunksize * (i_chunk + 1)]
143-
tm.assert_frame_equal(true_df, df)
113+
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
114+
chunksize=chunksize, compression=comp)
115+
self.assertEqual(df_reader.chunksize, chunksize)
116+
for i_chunk in [0, 1, 2]:
117+
# Read a couple of chunks and make sure we see them
118+
# properly.
119+
df = df_reader.get_chunk()
120+
self.assertTrue(isinstance(df, DataFrame))
121+
self.assertFalse(df.empty)
122+
true_df = local_tips.iloc[
123+
chunksize * i_chunk: chunksize * (i_chunk + 1)]
124+
tm.assert_frame_equal(true_df, df)
144125

145126
@tm.network
146127
def test_parse_public_s3_bucket_chunked_python(self):

0 commit comments

Comments
 (0)