Skip to content

Allow TensorFlow json serializer to accept dicts with ndarray values #404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 4, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
CHANGELOG
=========

1.10.2
======

* bug-fix: default TensorFlow json serializer accepts dict of numpy arrays

1.10.1
======

Expand Down
31 changes: 6 additions & 25 deletions src/sagemaker/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import csv
import json
import numpy as np
import six
from six import StringIO, BytesIO

from sagemaker.content_types import CONTENT_TYPE_JSON, CONTENT_TYPE_CSV, CONTENT_TYPE_NPY
Expand Down Expand Up @@ -237,48 +238,28 @@ def __call__(self, data):
Returns:
object: Serialized data used for the request.
"""
if isinstance(data, np.ndarray):
if not data.size > 0:
raise ValueError("empty array can't be serialized")
return _json_serialize_numpy_array(data)

if isinstance(data, list):
if not len(data) > 0:
raise ValueError("empty array can't be serialized")
return _json_serialize_python_object(data)

if isinstance(data, dict):
if not len(data.keys()) > 0:
raise ValueError("empty dictionary can't be serialized")
return _json_serialize_python_object(data)
# convert each value in dict from a numpy array to a list if necessary, so they can be json serialized
return json.dumps({k: _ndarray_to_list(v) for k, v in six.iteritems(data)})

# files and buffers
if hasattr(data, 'read'):
return _json_serialize_from_buffer(data)

raise ValueError("Unable to handle input format: {}".format(type(data)))
return json.dumps(_ndarray_to_list(data))


json_serializer = _JsonSerializer()


def _json_serialize_numpy_array(data):
# numpy arrays can't be serialized but we know they have uniform type
return _json_serialize_python_object(data.tolist())


def _json_serialize_python_object(data):
return _json_serialize_object(data)
def _ndarray_to_list(data):
return data.tolist() if isinstance(data, np.ndarray) else data


def _json_serialize_from_buffer(buff):
return buff.read()


def _json_serialize_object(data):
return json.dumps(data)


class _JsonDeserializer(object):
def __init__(self):
self.accept = CONTENT_TYPE_JSON
Expand Down
15 changes: 4 additions & 11 deletions tests/unit/test_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,8 @@ def test_json_serializer_numpy_valid_2dimensional():
assert result == '[[1, 2, 3], [3, 4, 5]]'


def test_json_serializer_numpy_invalid_empty():
with pytest.raises(ValueError) as invalid_input:
json_serializer(np.array([]))

assert "empty array" in str(invalid_input)
def test_json_serializer_empty():
assert json_serializer(np.array([])) == '[]'


def test_json_serializer_python_array():
Expand All @@ -62,15 +59,11 @@ def test_json_serializer_python_dictionary():


def test_json_serializer_python_invalid_empty():
with pytest.raises(ValueError) as error:
json_serializer([])
assert "empty array" in str(error)
assert json_serializer([]) == '[]'


def test_json_serializer_python_dictionary_invalid_empty():
with pytest.raises(ValueError) as error:
json_serializer({})
assert "empty dictionary" in str(error)
assert json_serializer({}) == '{}'


def test_json_serializer_csv_buffer():
Expand Down
21 changes: 21 additions & 0 deletions tests/unit/test_tf_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,3 +336,24 @@ def mock_response(expected_response, sagemaker_session, content_type):
sagemaker_session.sagemaker_runtime_client.invoke_endpoint.return_value = {
'ContentType': content_type,
'Body': io.BytesIO(expected_response)}


def test_json_serialize_dict():
data = {'tensor1': [1, 2, 3], 'tensor2': [4, 5, 6]}
serialized = tf_json_serializer(data)
# deserialize again for assertion, since dict order is not guaranteed
deserialized = json.loads(serialized)
assert deserialized == data


def test_json_serialize_dict_with_numpy():
data = {'tensor1': np.asarray([1, 2, 3]), 'tensor2': np.asarray([4, 5, 6])}
serialized = tf_json_serializer(data)
# deserialize again for assertion, since dict order is not guaranteed
deserialized = json.loads(serialized)
assert deserialized == {'tensor1': [1, 2, 3], 'tensor2': [4, 5, 6]}


def test_json_serialize_numpy():
data = np.asarray([[1, 2, 3], [4, 5, 6]])
assert tf_json_serializer(data) == '[[1, 2, 3], [4, 5, 6]]'