From 90b7c014a7a079ec62d68e141bf3f835eadff650 Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Thu, 9 Jul 2020 19:34:08 -0500 Subject: [PATCH] Add JSON serializer --- .../tensorflow/upgrade_from_legacy.rst | 6 +- src/sagemaker/mxnet/model.py | 5 +- src/sagemaker/predictor.py | 50 ------------- src/sagemaker/serializers.py | 34 +++++++++ src/sagemaker/tensorflow/model.py | 5 +- tests/integ/test_inference_pipeline.py | 5 +- tests/integ/test_multidatamodel.py | 4 +- tests/unit/sagemaker/test_serializers.py | 74 +++++++++++++++++++ tests/unit/test_predictor.py | 51 +------------ 9 files changed, 124 insertions(+), 110 deletions(-) create mode 100644 tests/unit/sagemaker/test_serializers.py diff --git a/doc/frameworks/tensorflow/upgrade_from_legacy.rst b/doc/frameworks/tensorflow/upgrade_from_legacy.rst index f47968d868..84f77c01a2 100644 --- a/doc/frameworks/tensorflow/upgrade_from_legacy.rst +++ b/doc/frameworks/tensorflow/upgrade_from_legacy.rst @@ -245,10 +245,10 @@ For example, if you want to use JSON serialization and deserialization: .. code:: python - from sagemaker.predictor import json_deserializer, json_serializer + from sagemaker.predictor import json_deserializer + from sagemaker.serializers import JSONSerializer - predictor.content_type = "application/json" - predictor.serializer = json_serializer + predictor.serializer = JSONSerializer() predictor.accept = "application/json" predictor.deserializer = json_deserializer diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py index 202ea6227e..cc1f9396f2 100644 --- a/src/sagemaker/mxnet/model.py +++ b/src/sagemaker/mxnet/model.py @@ -26,7 +26,8 @@ ) from sagemaker.model import FrameworkModel, MODEL_SERVER_WORKERS_PARAM_NAME from sagemaker.mxnet import defaults -from sagemaker.predictor import Predictor, json_serializer, json_deserializer +from sagemaker.predictor import Predictor, json_deserializer +from sagemaker.serializers import JSONSerializer logger = logging.getLogger("sagemaker") @@ -50,7 +51,7 @@ def __init__(self, endpoint_name, sagemaker_session=None): using the default AWS configuration chain. """ super(MXNetPredictor, self).__init__( - endpoint_name, sagemaker_session, json_serializer, json_deserializer + endpoint_name, sagemaker_session, JSONSerializer(), json_deserializer ) diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py index 04ccc36a69..5a65559301 100644 --- a/src/sagemaker/predictor.py +++ b/src/sagemaker/predictor.py @@ -16,7 +16,6 @@ import codecs import csv import json -import six from six import StringIO, BytesIO import numpy as np @@ -623,55 +622,6 @@ def __call__(self, stream, content_type): csv_deserializer = _CsvDeserializer() -class _JsonSerializer(object): - """Placeholder docstring""" - - def __init__(self): - """Placeholder docstring""" - self.content_type = CONTENT_TYPE_JSON - - def __call__(self, data): - """Take data of various formats and serialize them into the expected - request body. This uses information about supported input formats for - the deployed model. - - Args: - data (object): Data to be serialized. - - Returns: - object: Serialized data used for the request. - """ - if isinstance(data, dict): - # convert each value in dict from a numpy array to a list if necessary, so they can be - # json serialized - return json.dumps({k: _ndarray_to_list(v) for k, v in six.iteritems(data)}) - - # files and buffers - if hasattr(data, "read"): - return _json_serialize_from_buffer(data) - - return json.dumps(_ndarray_to_list(data)) - - -json_serializer = _JsonSerializer() - - -def _ndarray_to_list(data): - """ - Args: - data: - """ - return data.tolist() if isinstance(data, np.ndarray) else data - - -def _json_serialize_from_buffer(buff): - """ - Args: - buff: - """ - return buff.read() - - class _JsonDeserializer(object): """Placeholder docstring""" diff --git a/src/sagemaker/serializers.py b/src/sagemaker/serializers.py index 15328c28f2..cf7034ad98 100644 --- a/src/sagemaker/serializers.py +++ b/src/sagemaker/serializers.py @@ -14,6 +14,9 @@ from __future__ import absolute_import import abc +import json + +import numpy as np class BaseSerializer(abc.ABC): @@ -38,3 +41,34 @@ def serialize(self, data): @abc.abstractmethod def CONTENT_TYPE(self): """The MIME type of the data sent to the inference endpoint.""" + + +class JSONSerializer(BaseSerializer): + """Serialize data to a JSON formatted string.""" + + CONTENT_TYPE = "application/json" + + def serialize(self, data): + """Serialize data of various formats to a JSON formatted string. + + Args: + data (object): Data to be serialized. + + Returns: + str: The data serialized as a JSON string. + """ + if isinstance(data, dict): + return json.dumps( + { + key: value.tolist() if isinstance(value, np.ndarray) else value + for key, value in data.items() + } + ) + + if hasattr(data, "read"): + return data.read() + + if isinstance(data, np.ndarray): + return json.dumps(data.tolist()) + + return json.dumps(data) diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py index fe8e6cfafd..5215f971cc 100644 --- a/src/sagemaker/tensorflow/model.py +++ b/src/sagemaker/tensorflow/model.py @@ -18,7 +18,8 @@ import sagemaker from sagemaker.content_types import CONTENT_TYPE_JSON from sagemaker.fw_utils import create_image_uri -from sagemaker.predictor import json_serializer, json_deserializer, Predictor +from sagemaker.predictor import json_deserializer, Predictor +from sagemaker.serializers import JSONSerializer class TensorFlowPredictor(Predictor): @@ -30,7 +31,7 @@ def __init__( self, endpoint_name, sagemaker_session=None, - serializer=json_serializer, + serializer=JSONSerializer(), deserializer=json_deserializer, content_type=None, model_name=None, diff --git a/tests/integ/test_inference_pipeline.py b/tests/integ/test_inference_pipeline.py index ec056d9c92..1cef895b51 100644 --- a/tests/integ/test_inference_pipeline.py +++ b/tests/integ/test_inference_pipeline.py @@ -26,7 +26,8 @@ from sagemaker.content_types import CONTENT_TYPE_CSV from sagemaker.model import Model from sagemaker.pipeline import PipelineModel -from sagemaker.predictor import Predictor, json_serializer +from sagemaker.predictor import Predictor +from sagemaker.serializers import JSONSerializer from sagemaker.sparkml.model import SparkMLModel from sagemaker.utils import sagemaker_timestamp @@ -128,7 +129,7 @@ def test_inference_pipeline_model_deploy(sagemaker_session, cpu_instance_type): predictor = Predictor( endpoint_name=endpoint_name, sagemaker_session=sagemaker_session, - serializer=json_serializer, + serializer=JSONSerializer, content_type=CONTENT_TYPE_CSV, accept=CONTENT_TYPE_CSV, ) diff --git a/tests/integ/test_multidatamodel.py b/tests/integ/test_multidatamodel.py index 5e29b4607b..572aba9269 100644 --- a/tests/integ/test_multidatamodel.py +++ b/tests/integ/test_multidatamodel.py @@ -290,7 +290,7 @@ def test_multi_data_model_deploy_trained_model_from_framework_estimator( assert PRETRAINED_MODEL_PATH_2 in endpoint_models # Define a predictor to set `serializer` parameter with npy_serializer - # instead of `json_serializer` in the default predictor returned by `MXNetPredictor` + # instead of `JSONSerializer` in the default predictor returned by `MXNetPredictor` # Since we are using a placeholder container image the prediction results are not accurate. predictor = Predictor( endpoint_name=endpoint_name, @@ -391,7 +391,7 @@ def test_multi_data_model_deploy_train_model_from_amazon_first_party_estimator( assert PRETRAINED_MODEL_PATH_2 in endpoint_models # Define a predictor to set `serializer` parameter with npy_serializer - # instead of `json_serializer` in the default predictor returned by `MXNetPredictor` + # instead of `JSONSerializer` in the default predictor returned by `MXNetPredictor` # Since we are using a placeholder container image the prediction results are not accurate. predictor = Predictor( endpoint_name=endpoint_name, diff --git a/tests/unit/sagemaker/test_serializers.py b/tests/unit/sagemaker/test_serializers.py new file mode 100644 index 0000000000..3e2d14dafb --- /dev/null +++ b/tests/unit/sagemaker/test_serializers.py @@ -0,0 +1,74 @@ +# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +from __future__ import absolute_import + +import json +import os + +import numpy as np +import pytest + +from sagemaker.serializers import JSONSerializer +from tests.unit import DATA_DIR + + +@pytest.fixture +def json_serializer(): + return JSONSerializer() + + +def test_json_serializer_numpy_valid(json_serializer): + result = json_serializer.serialize(np.array([1, 2, 3])) + + assert result == "[1, 2, 3]" + + +def test_json_serializer_numpy_valid_2dimensional(json_serializer): + result = json_serializer.serialize(np.array([[1, 2, 3], [3, 4, 5]])) + + assert result == "[[1, 2, 3], [3, 4, 5]]" + + +def test_json_serializer_empty(json_serializer): + assert json_serializer.serialize(np.array([])) == "[]" + + +def test_json_serializer_python_array(json_serializer): + result = json_serializer.serialize([1, 2, 3]) + + assert result == "[1, 2, 3]" + + +def test_json_serializer_python_dictionary(json_serializer): + d = {"gender": "m", "age": 22, "city": "Paris"} + + result = json_serializer.serialize(d) + + assert json.loads(result) == d + + +def test_json_serializer_python_invalid_empty(json_serializer): + assert json_serializer.serialize([]) == "[]" + + +def test_json_serializer_python_dictionary_invalid_empty(json_serializer): + assert json_serializer.serialize({}) == "{}" + + +def test_json_serializer_csv_buffer(json_serializer): + csv_file_path = os.path.join(DATA_DIR, "with_integers.csv") + with open(csv_file_path) as csv_file: + validation_value = csv_file.read() + csv_file.seek(0) + result = json_serializer.serialize(csv_file) + assert result == validation_value diff --git a/tests/unit/test_predictor.py b/tests/unit/test_predictor.py index 0c760a6332..65055d4abc 100644 --- a/tests/unit/test_predictor.py +++ b/tests/unit/test_predictor.py @@ -22,64 +22,17 @@ from sagemaker.predictor import Predictor from sagemaker.predictor import ( - json_serializer, json_deserializer, csv_serializer, csv_deserializer, npy_serializer, ) +from sagemaker.serializers import JSONSerializer from tests.unit import DATA_DIR # testing serialization functions -def test_json_serializer_numpy_valid(): - result = json_serializer(np.array([1, 2, 3])) - - assert result == "[1, 2, 3]" - - -def test_json_serializer_numpy_valid_2dimensional(): - result = json_serializer(np.array([[1, 2, 3], [3, 4, 5]])) - - assert result == "[[1, 2, 3], [3, 4, 5]]" - - -def test_json_serializer_empty(): - assert json_serializer(np.array([])) == "[]" - - -def test_json_serializer_python_array(): - result = json_serializer([1, 2, 3]) - - assert result == "[1, 2, 3]" - - -def test_json_serializer_python_dictionary(): - d = {"gender": "m", "age": 22, "city": "Paris"} - - result = json_serializer(d) - - assert json.loads(result) == d - - -def test_json_serializer_python_invalid_empty(): - assert json_serializer([]) == "[]" - - -def test_json_serializer_python_dictionary_invalid_empty(): - assert json_serializer({}) == "{}" - - -def test_json_serializer_csv_buffer(): - csv_file_path = os.path.join(DATA_DIR, "with_integers.csv") - with open(csv_file_path) as csv_file: - validation_value = csv_file.read() - csv_file.seek(0) - result = json_serializer(csv_file) - assert result == validation_value - - def test_csv_serializer_str(): original = "1,2,3" result = csv_serializer("1,2,3") @@ -404,7 +357,7 @@ def test_predict_call_with_headers_and_json(): sagemaker_session, content_type="not/json", accept="also/not-json", - serializer=json_serializer, + serializer=JSONSerializer(), ) data = [1, 2]