|
15 | 15 | import numpy as np
|
16 | 16 | import tempfile
|
17 | 17 | import pytest
|
18 |
| -import itertools |
19 |
| -from scipy.sparse import coo_matrix |
20 | 18 | from sagemaker.amazon.common import (
|
21 | 19 | record_deserializer,
|
22 | 20 | write_numpy_to_dense_tensor,
|
@@ -152,195 +150,6 @@ def test_invalid_label():
|
152 | 150 | write_numpy_to_dense_tensor(f, array, label_data)
|
153 | 151 |
|
154 | 152 |
|
155 |
| -def test_dense_float_write_spmatrix_to_sparse_tensor(): |
156 |
| - array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] |
157 |
| - keys_data = [[0, 1, 2], [0, 1, 2]] |
158 |
| - array = coo_matrix(np.array(array_data)) |
159 |
| - with tempfile.TemporaryFile() as f: |
160 |
| - write_spmatrix_to_sparse_tensor(f, array) |
161 |
| - f.seek(0) |
162 |
| - for record_data, expected_data, expected_keys in zip( |
163 |
| - read_recordio(f), array_data, keys_data |
164 |
| - ): |
165 |
| - record = Record() |
166 |
| - record.ParseFromString(record_data) |
167 |
| - assert record.features["values"].float64_tensor.values == expected_data |
168 |
| - assert record.features["values"].float64_tensor.keys == expected_keys |
169 |
| - assert record.features["values"].float64_tensor.shape == [len(expected_data)] |
170 |
| - |
171 |
| - |
172 |
| -def test_dense_float32_write_spmatrix_to_sparse_tensor(): |
173 |
| - array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] |
174 |
| - keys_data = [[0, 1, 2], [0, 1, 2]] |
175 |
| - array = coo_matrix(np.array(array_data).astype(np.dtype("float32"))) |
176 |
| - with tempfile.TemporaryFile() as f: |
177 |
| - write_spmatrix_to_sparse_tensor(f, array) |
178 |
| - f.seek(0) |
179 |
| - for record_data, expected_data, expected_keys in zip( |
180 |
| - read_recordio(f), array_data, keys_data |
181 |
| - ): |
182 |
| - record = Record() |
183 |
| - record.ParseFromString(record_data) |
184 |
| - assert record.features["values"].float32_tensor.values == expected_data |
185 |
| - assert record.features["values"].float32_tensor.keys == expected_keys |
186 |
| - assert record.features["values"].float32_tensor.shape == [len(expected_data)] |
187 |
| - |
188 |
| - |
189 |
| -def test_dense_int_write_spmatrix_to_sparse_tensor(): |
190 |
| - array_data = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]] |
191 |
| - keys_data = [[0, 1, 2], [0, 1, 2]] |
192 |
| - array = coo_matrix(np.array(array_data).astype(np.dtype("int"))) |
193 |
| - with tempfile.TemporaryFile() as f: |
194 |
| - write_spmatrix_to_sparse_tensor(f, array) |
195 |
| - f.seek(0) |
196 |
| - for record_data, expected_data, expected_keys in zip( |
197 |
| - read_recordio(f), array_data, keys_data |
198 |
| - ): |
199 |
| - record = Record() |
200 |
| - record.ParseFromString(record_data) |
201 |
| - assert record.features["values"].int32_tensor.values == expected_data |
202 |
| - assert record.features["values"].int32_tensor.keys == expected_keys |
203 |
| - assert record.features["values"].int32_tensor.shape == [len(expected_data)] |
204 |
| - |
205 |
| - |
206 |
| -def test_dense_int_spmatrix_to_sparse_label(): |
207 |
| - array_data = [[1, 2, 3], [10, 20, 3]] |
208 |
| - keys_data = [[0, 1, 2], [0, 1, 2]] |
209 |
| - array = coo_matrix(np.array(array_data)) |
210 |
| - label_data = np.array([99, 98, 97]) |
211 |
| - with tempfile.TemporaryFile() as f: |
212 |
| - write_spmatrix_to_sparse_tensor(f, array, label_data) |
213 |
| - f.seek(0) |
214 |
| - for record_data, expected_data, expected_keys, label in zip( |
215 |
| - read_recordio(f), array_data, keys_data, label_data |
216 |
| - ): |
217 |
| - record = Record() |
218 |
| - record.ParseFromString(record_data) |
219 |
| - assert record.features["values"].int32_tensor.values == expected_data |
220 |
| - assert record.features["values"].int32_tensor.keys == expected_keys |
221 |
| - assert record.label["values"].int32_tensor.values == [label] |
222 |
| - assert record.features["values"].int32_tensor.shape == [len(expected_data)] |
223 |
| - |
224 |
| - |
225 |
| -def test_dense_float32_spmatrix_to_sparse_label(): |
226 |
| - array_data = [[1, 2, 3], [10, 20, 3]] |
227 |
| - keys_data = [[0, 1, 2], [0, 1, 2]] |
228 |
| - array = coo_matrix(np.array(array_data).astype("float32")) |
229 |
| - label_data = np.array([99, 98, 97]) |
230 |
| - with tempfile.TemporaryFile() as f: |
231 |
| - write_spmatrix_to_sparse_tensor(f, array, label_data) |
232 |
| - f.seek(0) |
233 |
| - for record_data, expected_data, expected_keys, label in zip( |
234 |
| - read_recordio(f), array_data, keys_data, label_data |
235 |
| - ): |
236 |
| - record = Record() |
237 |
| - record.ParseFromString(record_data) |
238 |
| - assert record.features["values"].float32_tensor.values == expected_data |
239 |
| - assert record.features["values"].float32_tensor.keys == expected_keys |
240 |
| - assert record.label["values"].int32_tensor.values == [label] |
241 |
| - assert record.features["values"].float32_tensor.shape == [len(expected_data)] |
242 |
| - |
243 |
| - |
244 |
| -def test_dense_float64_spmatrix_to_sparse_label(): |
245 |
| - array_data = [[1, 2, 3], [10, 20, 3]] |
246 |
| - keys_data = [[0, 1, 2], [0, 1, 2]] |
247 |
| - array = coo_matrix(np.array(array_data).astype("float64")) |
248 |
| - label_data = np.array([99, 98, 97]) |
249 |
| - with tempfile.TemporaryFile() as f: |
250 |
| - write_spmatrix_to_sparse_tensor(f, array, label_data) |
251 |
| - f.seek(0) |
252 |
| - for record_data, expected_data, expected_keys, label in zip( |
253 |
| - read_recordio(f), array_data, keys_data, label_data |
254 |
| - ): |
255 |
| - record = Record() |
256 |
| - record.ParseFromString(record_data) |
257 |
| - assert record.features["values"].float64_tensor.values == expected_data |
258 |
| - assert record.features["values"].float64_tensor.keys == expected_keys |
259 |
| - assert record.label["values"].int32_tensor.values == [label] |
260 |
| - assert record.features["values"].float64_tensor.shape == [len(expected_data)] |
261 |
| - |
262 |
| - |
263 |
| -def test_invalid_sparse_label(): |
264 |
| - array_data = [[1, 2, 3], [10, 20, 3]] |
265 |
| - array = coo_matrix(np.array(array_data)) |
266 |
| - label_data = np.array([99, 98, 97, 1000]).astype(np.dtype("float64")) |
267 |
| - with tempfile.TemporaryFile() as f: |
268 |
| - with pytest.raises(ValueError): |
269 |
| - write_spmatrix_to_sparse_tensor(f, array, label_data) |
270 |
| - |
271 |
| - |
272 |
| -def test_sparse_float_write_spmatrix_to_sparse_tensor(): |
273 |
| - n = 4 |
274 |
| - array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]] |
275 |
| - keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]] |
276 |
| - |
277 |
| - flatten_data = list(itertools.chain.from_iterable(array_data)) |
278 |
| - y_indices = list(itertools.chain.from_iterable(keys_data)) |
279 |
| - x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))] |
280 |
| - x_indices = list(itertools.chain.from_iterable(x_indices)) |
281 |
| - |
282 |
| - array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float64") |
283 |
| - with tempfile.TemporaryFile() as f: |
284 |
| - write_spmatrix_to_sparse_tensor(f, array) |
285 |
| - f.seek(0) |
286 |
| - for record_data, expected_data, expected_keys in zip( |
287 |
| - read_recordio(f), array_data, keys_data |
288 |
| - ): |
289 |
| - record = Record() |
290 |
| - record.ParseFromString(record_data) |
291 |
| - assert record.features["values"].float64_tensor.values == expected_data |
292 |
| - assert record.features["values"].float64_tensor.keys == expected_keys |
293 |
| - assert record.features["values"].float64_tensor.shape == [n] |
294 |
| - |
295 |
| - |
296 |
| -def test_sparse_float32_write_spmatrix_to_sparse_tensor(): |
297 |
| - n = 4 |
298 |
| - array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]] |
299 |
| - keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]] |
300 |
| - |
301 |
| - flatten_data = list(itertools.chain.from_iterable(array_data)) |
302 |
| - y_indices = list(itertools.chain.from_iterable(keys_data)) |
303 |
| - x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))] |
304 |
| - x_indices = list(itertools.chain.from_iterable(x_indices)) |
305 |
| - |
306 |
| - array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="float32") |
307 |
| - with tempfile.TemporaryFile() as f: |
308 |
| - write_spmatrix_to_sparse_tensor(f, array) |
309 |
| - f.seek(0) |
310 |
| - for record_data, expected_data, expected_keys in zip( |
311 |
| - read_recordio(f), array_data, keys_data |
312 |
| - ): |
313 |
| - record = Record() |
314 |
| - record.ParseFromString(record_data) |
315 |
| - assert record.features["values"].float32_tensor.values == expected_data |
316 |
| - assert record.features["values"].float32_tensor.keys == expected_keys |
317 |
| - assert record.features["values"].float32_tensor.shape == [n] |
318 |
| - |
319 |
| - |
320 |
| -def test_sparse_int_write_spmatrix_to_sparse_tensor(): |
321 |
| - n = 4 |
322 |
| - array_data = [[1.0, 2.0], [10.0, 30.0], [100.0, 200.0, 300.0, 400.0], [1000.0, 2000.0, 3000.0]] |
323 |
| - keys_data = [[0, 1], [1, 2], [0, 1, 2, 3], [0, 2, 3]] |
324 |
| - |
325 |
| - flatten_data = list(itertools.chain.from_iterable(array_data)) |
326 |
| - y_indices = list(itertools.chain.from_iterable(keys_data)) |
327 |
| - x_indices = [[i] * len(keys_data[i]) for i in range(len(keys_data))] |
328 |
| - x_indices = list(itertools.chain.from_iterable(x_indices)) |
329 |
| - |
330 |
| - array = coo_matrix((flatten_data, (x_indices, y_indices)), dtype="int") |
331 |
| - with tempfile.TemporaryFile() as f: |
332 |
| - write_spmatrix_to_sparse_tensor(f, array) |
333 |
| - f.seek(0) |
334 |
| - for record_data, expected_data, expected_keys in zip( |
335 |
| - read_recordio(f), array_data, keys_data |
336 |
| - ): |
337 |
| - record = Record() |
338 |
| - record.ParseFromString(record_data) |
339 |
| - assert record.features["values"].int32_tensor.values == expected_data |
340 |
| - assert record.features["values"].int32_tensor.keys == expected_keys |
341 |
| - assert record.features["values"].int32_tensor.shape == [n] |
342 |
| - |
343 |
| - |
344 | 153 | def test_dense_to_sparse():
|
345 | 154 | array_data = [[1, 2, 3], [10, 20, 3]]
|
346 | 155 | array = np.array(array_data)
|
|
0 commit comments