1
1
from io import BytesIO
2
2
import os
3
+ import zipfile
3
4
4
5
import numpy as np
5
6
import pytest
@@ -88,16 +89,23 @@ def test_to_read_gcs(gcs_buffer, format):
88
89
tm .assert_frame_equal (df1 , df2 )
89
90
90
91
91
- def assert_equal_zip_safe (result : bytes , expected : bytes ):
92
+ def assert_equal_zip_safe (result : bytes , expected : bytes , compression : str ):
92
93
"""
93
- We would like to assert these are equal, but the 10th and 11th bytes are a
94
- last-modified timestamp, which in some builds is off-by-one, so we check around
95
- that.
94
+ For zip compression, only compare the CRC-32 checksum of the file contents
95
+ to avoid checking the time-dependent last-modified timestamp which
96
+ in some CI builds is off-by-one
96
97
97
98
See https://en.wikipedia.org/wiki/ZIP_(file_format)#File_headers
98
99
"""
99
- assert result [:9 ] == expected [:9 ]
100
- assert result [11 :] == expected [11 :]
100
+ if compression == "zip" :
101
+ # Only compare the CRC checksum of the file contents
102
+ with zipfile .ZipFile (BytesIO (result )) as exp , zipfile .ZipFile (
103
+ BytesIO (expected )
104
+ ) as res :
105
+ for res_info , exp_info in zip (res .infolist (), exp .infolist ()):
106
+ assert res_info .CRC == exp_info .CRC
107
+ else :
108
+ assert result == expected
101
109
102
110
103
111
@td .skip_if_no ("gcsfs" )
@@ -126,7 +134,7 @@ def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding)
126
134
df .to_csv (path_gcs , compression = compression , encoding = encoding )
127
135
res = gcs_buffer .getvalue ()
128
136
expected = buffer .getvalue ()
129
- assert_equal_zip_safe (res , expected )
137
+ assert_equal_zip_safe (res , expected , compression_only )
130
138
131
139
read_df = read_csv (
132
140
path_gcs , index_col = 0 , compression = compression_only , encoding = encoding
@@ -142,7 +150,7 @@ def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding)
142
150
143
151
res = gcs_buffer .getvalue ()
144
152
expected = buffer .getvalue ()
145
- assert_equal_zip_safe (res , expected )
153
+ assert_equal_zip_safe (res , expected , compression_only )
146
154
147
155
read_df = read_csv (path_gcs , index_col = 0 , compression = "infer" , encoding = encoding )
148
156
tm .assert_frame_equal (df , read_df )
0 commit comments