Skip to content

Commit 6c632db

Browse files
committed
TST: Read bz2 files from S3 in PY2
Addresses #14874
1 parent 09dcbff commit 6c632db

File tree

1 file changed

+24
-42
lines changed

1 file changed

+24
-42
lines changed

pandas/io/tests/parser/test_network.py

+24-42
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,12 @@ def setUp(self):
6464
@tm.network
6565
def test_parse_public_s3_bucket(self):
6666
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
67-
if comp == 'bz2' and compat.PY2:
68-
# The Python 2 C parser can't read bz2 from S3.
69-
self.assertRaises(ValueError, read_csv,
70-
's3://pandas-test/tips.csv' + ext,
71-
compression=comp)
72-
else:
73-
df = read_csv('s3://pandas-test/tips.csv' +
74-
ext, compression=comp)
75-
self.assertTrue(isinstance(df, DataFrame))
76-
self.assertFalse(df.empty)
77-
tm.assert_frame_equal(read_csv(
78-
tm.get_data_path('tips.csv')), df)
67+
df = read_csv('s3://pandas-test/tips.csv' +
68+
ext, compression=comp)
69+
self.assertTrue(isinstance(df, DataFrame))
70+
self.assertFalse(df.empty)
71+
tm.assert_frame_equal(read_csv(
72+
tm.get_data_path('tips.csv')), df)
7973

8074
# Read public file from bucket with not-public contents
8175
df = read_csv('s3://cant_get_it/tips.csv')
@@ -104,43 +98,31 @@ def test_parse_public_s3a_bucket(self):
10498
@tm.network
10599
def test_parse_public_s3_bucket_nrows(self):
106100
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
107-
if comp == 'bz2' and compat.PY2:
108-
# The Python 2 C parser can't read bz2 from S3.
109-
self.assertRaises(ValueError, read_csv,
110-
's3://pandas-test/tips.csv' + ext,
111-
compression=comp)
112-
else:
113-
df = read_csv('s3://pandas-test/tips.csv' +
114-
ext, nrows=10, compression=comp)
115-
self.assertTrue(isinstance(df, DataFrame))
116-
self.assertFalse(df.empty)
117-
tm.assert_frame_equal(read_csv(
118-
tm.get_data_path('tips.csv')).iloc[:10], df)
101+
df = read_csv('s3://pandas-test/tips.csv' +
102+
ext, nrows=10, compression=comp)
103+
self.assertTrue(isinstance(df, DataFrame))
104+
self.assertFalse(df.empty)
105+
tm.assert_frame_equal(read_csv(
106+
tm.get_data_path('tips.csv')).iloc[:10], df)
119107

120108
@tm.network
121109
def test_parse_public_s3_bucket_chunked(self):
122110
# Read with a chunksize
123111
chunksize = 5
124112
local_tips = read_csv(tm.get_data_path('tips.csv'))
125113
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
126-
if comp == 'bz2' and compat.PY2:
127-
# The Python 2 C parser can't read bz2 from S3.
128-
self.assertRaises(ValueError, read_csv,
129-
's3://pandas-test/tips.csv' + ext,
130-
compression=comp)
131-
else:
132-
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
133-
chunksize=chunksize, compression=comp)
134-
self.assertEqual(df_reader.chunksize, chunksize)
135-
for i_chunk in [0, 1, 2]:
136-
# Read a couple of chunks and make sure we see them
137-
# properly.
138-
df = df_reader.get_chunk()
139-
self.assertTrue(isinstance(df, DataFrame))
140-
self.assertFalse(df.empty)
141-
true_df = local_tips.iloc[
142-
chunksize * i_chunk: chunksize * (i_chunk + 1)]
143-
tm.assert_frame_equal(true_df, df)
114+
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
115+
chunksize=chunksize, compression=comp)
116+
self.assertEqual(df_reader.chunksize, chunksize)
117+
for i_chunk in [0, 1, 2]:
118+
# Read a couple of chunks and make sure we see them
119+
# properly.
120+
df = df_reader.get_chunk()
121+
self.assertTrue(isinstance(df, DataFrame))
122+
self.assertFalse(df.empty)
123+
true_df = local_tips.iloc[
124+
chunksize * i_chunk: chunksize * (i_chunk + 1)]
125+
tm.assert_frame_equal(true_df, df)
144126

145127
@tm.network
146128
def test_parse_public_s3_bucket_chunked_python(self):

0 commit comments

Comments
 (0)