From 90b7c014a7a079ec62d68e141bf3f835eadff650 Mon Sep 17 00:00:00 2001
From: Balaji Veeramani <bvveeram@amazon.com>
Date: Thu, 9 Jul 2020 19:34:08 -0500
Subject: [PATCH] Add JSON serializer

---
 .../tensorflow/upgrade_from_legacy.rst        |  6 +-
 src/sagemaker/mxnet/model.py                  |  5 +-
 src/sagemaker/predictor.py                    | 50 -------------
 src/sagemaker/serializers.py                  | 34 +++++++++
 src/sagemaker/tensorflow/model.py             |  5 +-
 tests/integ/test_inference_pipeline.py        |  5 +-
 tests/integ/test_multidatamodel.py            |  4 +-
 tests/unit/sagemaker/test_serializers.py      | 74 +++++++++++++++++++
 tests/unit/test_predictor.py                  | 51 +------------
 9 files changed, 124 insertions(+), 110 deletions(-)
 create mode 100644 tests/unit/sagemaker/test_serializers.py

diff --git a/doc/frameworks/tensorflow/upgrade_from_legacy.rst b/doc/frameworks/tensorflow/upgrade_from_legacy.rst
index f47968d868..84f77c01a2 100644
--- a/doc/frameworks/tensorflow/upgrade_from_legacy.rst
+++ b/doc/frameworks/tensorflow/upgrade_from_legacy.rst
@@ -245,10 +245,10 @@ For example, if you want to use JSON serialization and deserialization:
 
 .. code:: python
 
-    from sagemaker.predictor import json_deserializer, json_serializer
+    from sagemaker.predictor import json_deserializer
+    from sagemaker.serializers import JSONSerializer
 
-    predictor.content_type = "application/json"
-    predictor.serializer = json_serializer
+    predictor.serializer = JSONSerializer()
     predictor.accept = "application/json"
     predictor.deserializer = json_deserializer
 
diff --git a/src/sagemaker/mxnet/model.py b/src/sagemaker/mxnet/model.py
index 202ea6227e..cc1f9396f2 100644
--- a/src/sagemaker/mxnet/model.py
+++ b/src/sagemaker/mxnet/model.py
@@ -26,7 +26,8 @@
 )
 from sagemaker.model import FrameworkModel, MODEL_SERVER_WORKERS_PARAM_NAME
 from sagemaker.mxnet import defaults
-from sagemaker.predictor import Predictor, json_serializer, json_deserializer
+from sagemaker.predictor import Predictor, json_deserializer
+from sagemaker.serializers import JSONSerializer
 
 logger = logging.getLogger("sagemaker")
 
@@ -50,7 +51,7 @@ def __init__(self, endpoint_name, sagemaker_session=None):
                 using the default AWS configuration chain.
         """
         super(MXNetPredictor, self).__init__(
-            endpoint_name, sagemaker_session, json_serializer, json_deserializer
+            endpoint_name, sagemaker_session, JSONSerializer(), json_deserializer
         )
 
 
diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py
index 04ccc36a69..5a65559301 100644
--- a/src/sagemaker/predictor.py
+++ b/src/sagemaker/predictor.py
@@ -16,7 +16,6 @@
 import codecs
 import csv
 import json
-import six
 from six import StringIO, BytesIO
 import numpy as np
 
@@ -623,55 +622,6 @@ def __call__(self, stream, content_type):
 csv_deserializer = _CsvDeserializer()
 
 
-class _JsonSerializer(object):
-    """Placeholder docstring"""
-
-    def __init__(self):
-        """Placeholder docstring"""
-        self.content_type = CONTENT_TYPE_JSON
-
-    def __call__(self, data):
-        """Take data of various formats and serialize them into the expected
-        request body. This uses information about supported input formats for
-        the deployed model.
-
-        Args:
-            data (object): Data to be serialized.
-
-        Returns:
-            object: Serialized data used for the request.
-        """
-        if isinstance(data, dict):
-            # convert each value in dict from a numpy array to a list if necessary, so they can be
-            # json serialized
-            return json.dumps({k: _ndarray_to_list(v) for k, v in six.iteritems(data)})
-
-        # files and buffers
-        if hasattr(data, "read"):
-            return _json_serialize_from_buffer(data)
-
-        return json.dumps(_ndarray_to_list(data))
-
-
-json_serializer = _JsonSerializer()
-
-
-def _ndarray_to_list(data):
-    """
-    Args:
-        data:
-    """
-    return data.tolist() if isinstance(data, np.ndarray) else data
-
-
-def _json_serialize_from_buffer(buff):
-    """
-    Args:
-        buff:
-    """
-    return buff.read()
-
-
 class _JsonDeserializer(object):
     """Placeholder docstring"""
 
diff --git a/src/sagemaker/serializers.py b/src/sagemaker/serializers.py
index 15328c28f2..cf7034ad98 100644
--- a/src/sagemaker/serializers.py
+++ b/src/sagemaker/serializers.py
@@ -14,6 +14,9 @@
 from __future__ import absolute_import
 
 import abc
+import json
+
+import numpy as np
 
 
 class BaseSerializer(abc.ABC):
@@ -38,3 +41,34 @@ def serialize(self, data):
     @abc.abstractmethod
     def CONTENT_TYPE(self):
         """The MIME type of the data sent to the inference endpoint."""
+
+
+class JSONSerializer(BaseSerializer):
+    """Serialize data to a JSON formatted string."""
+
+    CONTENT_TYPE = "application/json"
+
+    def serialize(self, data):
+        """Serialize data of various formats to a JSON formatted string.
+
+        Args:
+            data (object): Data to be serialized.
+
+        Returns:
+            str: The data serialized as a JSON string.
+        """
+        if isinstance(data, dict):
+            return json.dumps(
+                {
+                    key: value.tolist() if isinstance(value, np.ndarray) else value
+                    for key, value in data.items()
+                }
+            )
+
+        if hasattr(data, "read"):
+            return data.read()
+
+        if isinstance(data, np.ndarray):
+            return json.dumps(data.tolist())
+
+        return json.dumps(data)
diff --git a/src/sagemaker/tensorflow/model.py b/src/sagemaker/tensorflow/model.py
index fe8e6cfafd..5215f971cc 100644
--- a/src/sagemaker/tensorflow/model.py
+++ b/src/sagemaker/tensorflow/model.py
@@ -18,7 +18,8 @@
 import sagemaker
 from sagemaker.content_types import CONTENT_TYPE_JSON
 from sagemaker.fw_utils import create_image_uri
-from sagemaker.predictor import json_serializer, json_deserializer, Predictor
+from sagemaker.predictor import json_deserializer, Predictor
+from sagemaker.serializers import JSONSerializer
 
 
 class TensorFlowPredictor(Predictor):
@@ -30,7 +31,7 @@ def __init__(
         self,
         endpoint_name,
         sagemaker_session=None,
-        serializer=json_serializer,
+        serializer=JSONSerializer(),
         deserializer=json_deserializer,
         content_type=None,
         model_name=None,
diff --git a/tests/integ/test_inference_pipeline.py b/tests/integ/test_inference_pipeline.py
index ec056d9c92..1cef895b51 100644
--- a/tests/integ/test_inference_pipeline.py
+++ b/tests/integ/test_inference_pipeline.py
@@ -26,7 +26,8 @@
 from sagemaker.content_types import CONTENT_TYPE_CSV
 from sagemaker.model import Model
 from sagemaker.pipeline import PipelineModel
-from sagemaker.predictor import Predictor, json_serializer
+from sagemaker.predictor import Predictor
+from sagemaker.serializers import JSONSerializer
 from sagemaker.sparkml.model import SparkMLModel
 from sagemaker.utils import sagemaker_timestamp
 
@@ -128,7 +129,7 @@ def test_inference_pipeline_model_deploy(sagemaker_session, cpu_instance_type):
         predictor = Predictor(
             endpoint_name=endpoint_name,
             sagemaker_session=sagemaker_session,
-            serializer=json_serializer,
+            serializer=JSONSerializer,
             content_type=CONTENT_TYPE_CSV,
             accept=CONTENT_TYPE_CSV,
         )
diff --git a/tests/integ/test_multidatamodel.py b/tests/integ/test_multidatamodel.py
index 5e29b4607b..572aba9269 100644
--- a/tests/integ/test_multidatamodel.py
+++ b/tests/integ/test_multidatamodel.py
@@ -290,7 +290,7 @@ def test_multi_data_model_deploy_trained_model_from_framework_estimator(
         assert PRETRAINED_MODEL_PATH_2 in endpoint_models
 
         # Define a predictor to set `serializer` parameter with npy_serializer
-        # instead of `json_serializer` in the default predictor returned by `MXNetPredictor`
+        # instead of `JSONSerializer` in the default predictor returned by `MXNetPredictor`
         # Since we are using a placeholder container image the prediction results are not accurate.
         predictor = Predictor(
             endpoint_name=endpoint_name,
@@ -391,7 +391,7 @@ def test_multi_data_model_deploy_train_model_from_amazon_first_party_estimator(
         assert PRETRAINED_MODEL_PATH_2 in endpoint_models
 
         # Define a predictor to set `serializer` parameter with npy_serializer
-        # instead of `json_serializer` in the default predictor returned by `MXNetPredictor`
+        # instead of `JSONSerializer` in the default predictor returned by `MXNetPredictor`
         # Since we are using a placeholder container image the prediction results are not accurate.
         predictor = Predictor(
             endpoint_name=endpoint_name,
diff --git a/tests/unit/sagemaker/test_serializers.py b/tests/unit/sagemaker/test_serializers.py
new file mode 100644
index 0000000000..3e2d14dafb
--- /dev/null
+++ b/tests/unit/sagemaker/test_serializers.py
@@ -0,0 +1,74 @@
+# Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import json
+import os
+
+import numpy as np
+import pytest
+
+from sagemaker.serializers import JSONSerializer
+from tests.unit import DATA_DIR
+
+
+@pytest.fixture
+def json_serializer():
+    return JSONSerializer()
+
+
+def test_json_serializer_numpy_valid(json_serializer):
+    result = json_serializer.serialize(np.array([1, 2, 3]))
+
+    assert result == "[1, 2, 3]"
+
+
+def test_json_serializer_numpy_valid_2dimensional(json_serializer):
+    result = json_serializer.serialize(np.array([[1, 2, 3], [3, 4, 5]]))
+
+    assert result == "[[1, 2, 3], [3, 4, 5]]"
+
+
+def test_json_serializer_empty(json_serializer):
+    assert json_serializer.serialize(np.array([])) == "[]"
+
+
+def test_json_serializer_python_array(json_serializer):
+    result = json_serializer.serialize([1, 2, 3])
+
+    assert result == "[1, 2, 3]"
+
+
+def test_json_serializer_python_dictionary(json_serializer):
+    d = {"gender": "m", "age": 22, "city": "Paris"}
+
+    result = json_serializer.serialize(d)
+
+    assert json.loads(result) == d
+
+
+def test_json_serializer_python_invalid_empty(json_serializer):
+    assert json_serializer.serialize([]) == "[]"
+
+
+def test_json_serializer_python_dictionary_invalid_empty(json_serializer):
+    assert json_serializer.serialize({}) == "{}"
+
+
+def test_json_serializer_csv_buffer(json_serializer):
+    csv_file_path = os.path.join(DATA_DIR, "with_integers.csv")
+    with open(csv_file_path) as csv_file:
+        validation_value = csv_file.read()
+        csv_file.seek(0)
+        result = json_serializer.serialize(csv_file)
+        assert result == validation_value
diff --git a/tests/unit/test_predictor.py b/tests/unit/test_predictor.py
index 0c760a6332..65055d4abc 100644
--- a/tests/unit/test_predictor.py
+++ b/tests/unit/test_predictor.py
@@ -22,64 +22,17 @@
 
 from sagemaker.predictor import Predictor
 from sagemaker.predictor import (
-    json_serializer,
     json_deserializer,
     csv_serializer,
     csv_deserializer,
     npy_serializer,
 )
+from sagemaker.serializers import JSONSerializer
 from tests.unit import DATA_DIR
 
 # testing serialization functions
 
 
-def test_json_serializer_numpy_valid():
-    result = json_serializer(np.array([1, 2, 3]))
-
-    assert result == "[1, 2, 3]"
-
-
-def test_json_serializer_numpy_valid_2dimensional():
-    result = json_serializer(np.array([[1, 2, 3], [3, 4, 5]]))
-
-    assert result == "[[1, 2, 3], [3, 4, 5]]"
-
-
-def test_json_serializer_empty():
-    assert json_serializer(np.array([])) == "[]"
-
-
-def test_json_serializer_python_array():
-    result = json_serializer([1, 2, 3])
-
-    assert result == "[1, 2, 3]"
-
-
-def test_json_serializer_python_dictionary():
-    d = {"gender": "m", "age": 22, "city": "Paris"}
-
-    result = json_serializer(d)
-
-    assert json.loads(result) == d
-
-
-def test_json_serializer_python_invalid_empty():
-    assert json_serializer([]) == "[]"
-
-
-def test_json_serializer_python_dictionary_invalid_empty():
-    assert json_serializer({}) == "{}"
-
-
-def test_json_serializer_csv_buffer():
-    csv_file_path = os.path.join(DATA_DIR, "with_integers.csv")
-    with open(csv_file_path) as csv_file:
-        validation_value = csv_file.read()
-        csv_file.seek(0)
-        result = json_serializer(csv_file)
-        assert result == validation_value
-
-
 def test_csv_serializer_str():
     original = "1,2,3"
     result = csv_serializer("1,2,3")
@@ -404,7 +357,7 @@ def test_predict_call_with_headers_and_json():
         sagemaker_session,
         content_type="not/json",
         accept="also/not-json",
-        serializer=json_serializer,
+        serializer=JSONSerializer(),
     )
 
     data = [1, 2]