Merge branch 'zwei' into add-csv-deserializer

bveeramani · web-flow · commit 72fee9ea7d65 · 2020-07-08T20:54:28.000-05:00
diff --git a/src/sagemaker/cli/compatibility/v2/ast_transformer.py b/src/sagemaker/cli/compatibility/v2/ast_transformer.py
@@ -30,6 +30,10 @@
     modifiers.airflow.ModelConfigImageURIRenamer(),
     modifiers.renamed_params.DistributionParameterRenamer(),
     modifiers.renamed_params.S3SessionRenamer(),
+    modifiers.renamed_params.EstimatorCreateModelImageURIRenamer(),
+    modifiers.renamed_params.SessionCreateModelImageURIRenamer(),
+    modifiers.renamed_params.SessionCreateEndpointImageURIRenamer(),
+    modifiers.training_params.TrainPrefixRemover(),
 ]
 
 IMPORT_MODIFIERS = [modifiers.tfs.TensorFlowServingImportRenamer()]
diff --git a/src/sagemaker/cli/compatibility/v2/modifiers/__init__.py b/src/sagemaker/cli/compatibility/v2/modifiers/__init__.py
@@ -21,4 +21,5 @@
     renamed_params,
     tf_legacy_mode,
     tfs,
+    training_params,
 )
diff --git a/src/sagemaker/cli/compatibility/v2/modifiers/renamed_params.py b/src/sagemaker/cli/compatibility/v2/modifiers/renamed_params.py
@@ -10,9 +10,7 @@
 # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
 # ANY KIND, either express or implied. See the License for the specific
 # language governing permissions and limitations under the License.
-"""Classes to modify Predictor code to be compatible
-with version 2.0 and later of the SageMaker Python SDK.
-"""
+"""Classes to handle renames for version 2.0 and later of the SageMaker Python SDK."""
 from __future__ import absolute_import
 
 import ast
diff --git a/src/sagemaker/cli/compatibility/v2/modifiers/training_params.py b/src/sagemaker/cli/compatibility/v2/modifiers/training_params.py
@@ -0,0 +1,97 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Classes to handle training renames for version 2.0 and later of the SageMaker Python SDK."""
+from __future__ import absolute_import
+
+from sagemaker.cli.compatibility.v2.modifiers import matching
+from sagemaker.cli.compatibility.v2.modifiers.modifier import Modifier
+
+ESTIMATORS = {
+    "AlgorithmEstimator": ("sagemaker", "sagemaker.algorithm"),
+    "AmazonAlgorithmEstimatorBase": ("sagemaker.amazon.amazon_estimator",),
+    "Chainer": ("sagemaker.chainer", "sagemaker.chainer.estimator"),
+    "Estimator": ("sagemaker.estimator",),
+    "EstimatorBase": ("sagemaker.estimator",),
+    "FactorizationMachines": ("sagemaker", "sagemaker.amazon.factorization_machines"),
+    "Framework": ("sagemaker.estimator",),
+    "IPInsights": ("sagemaker", "sagemaker.amazon.ipinsights"),
+    "KMeans": ("sagemaker", "sagemaker.amazon.kmeans"),
+    "KNN": ("sagemaker", "sagemaker.amazon.knn"),
+    "LDA": ("sagemaker", "sagemaker.amazon.lda"),
+    "LinearLearner": ("sagemaker", "sagemaker.amazon.linear_learner"),
+    "MXNet": ("sagemaker.mxnet", "sagemaker.mxnet.estimator"),
+    "NTM": ("sagemaker", "sagemaker.amazon.ntm"),
+    "Object2Vec": ("sagemaker", "sagemaker.amazon.object2vec"),
+    "PCA": ("sagemaker", "sagemaker.amazon.pca"),
+    "PyTorch": ("sagemaker.pytorch", "sagemaker.pytorch.estimator"),
+    "RandomCutForest": ("sagemaker", "sagemaker.amazon.randomcutforest"),
+    "RLEstimator": ("sagemaker.rl", "sagemaker.rl.estimator"),
+    "SKLearn": ("sagemaker.sklearn", "sagemaker.sklearn.estimator"),
+    "TensorFlow": ("sagemaker.tensorflow", "sagemaker.tensorflow.estimator"),
+    "XGBoost": ("sagemaker.xgboost", "sagemaker.xgboost.estimator"),
+}
+
+PARAMS = (
+    "train_instance_count",
+    "train_instance_type",
+    "train_max_run",
+    "train_max_run_wait",
+    "train_use_spot_instances",
+    "train_volume_size",
+    "train_volume_kms_key",
+)
+
+
+class TrainPrefixRemover(Modifier):
+    """A class to remove the redundant 'train' prefix in estimator parameters."""
+
+    def node_should_be_modified(self, node):
+        """Checks if the node is an estimator constructor and contains any relevant parameters.
+
+        This looks for the following parameters:
+
+        - ``train_instance_count``
+        - ``train_instance_type``
+        - ``train_max_run``
+        - ``train_max_run_wait``
+        - ``train_use_spot_instances``
+        - ``train_volume_kms_key``
+        - ``train_volume_size``
+
+        Args:
+            node (ast.Call): a node that represents a function call. For more,
+                see https://docs.python.org/3/library/ast.html#abstract-grammar.
+
+        Returns:
+            bool: If the ``ast.Call`` matches the relevant function calls and
+                contains the parameter to be renamed.
+        """
+        return matching.matches_any(node, ESTIMATORS) and self._has_train_parameter(node)
+
+    def _has_train_parameter(self, node):
+        """Checks if at least one of the node's keywords is prefixed with 'train'."""
+        for kw in node.keywords:
+            if kw.arg in PARAMS:
+                return True
+
+        return False
+
+    def modify_node(self, node):
+        """Modifies the ``ast.Call`` node to remove the 'train' prefix from its keywords.
+
+        Args:
+            node (ast.Call): a node that represents an estimator constructor.
+        """
+        for kw in node.keywords:
+            if kw.arg in PARAMS:
+                kw.arg = kw.arg.replace("train_", "")
diff --git a/src/sagemaker/deserializers.py b/src/sagemaker/deserializers.py
@@ -43,6 +43,35 @@ def ACCEPT(self):
         """The content type that is expected from the inference endpoint."""
 
 
+class StringDeserializer(BaseDeserializer):
+    """Deserialize data from an inference endpoint into a decoded string."""
+
+    ACCEPT = "application/json"
+
+    def __init__(self, encoding="UTF-8"):
+        """Initialize the string encoding.
+
+        Args:
+            encoding (str): The string encoding to use (default: UTF-8).
+        """
+        self.encoding = encoding
+
+    def deserialize(self, data, content_type):
+        """Deserialize data from an inference endpoint into a decoded string.
+
+        Args:
+            data (object): Data to be deserialized.
+            content_type (str): The MIME type of the data.
+
+        Returns:
+            str: The data deserialized into a decoded string.
+        """
+        try:
+            return data.read().decode(self.encoding)
+        finally:
+            data.close()
+
+
 class BytesDeserializer(BaseDeserializer):
     """Deserialize a stream of bytes into a bytes object."""
 
diff --git a/src/sagemaker/predictor.py b/src/sagemaker/predictor.py
@@ -597,35 +597,6 @@ def _row_to_csv(obj):
     return ",".join(obj)
 
 
-class StringDeserializer(object):
-    """Return the response as a decoded string.
-
-    Args:
-        encoding (str): The string encoding to use (default=utf-8).
-        accept (str): The Accept header to send to the server (optional).
-    """
-
-    def __init__(self, encoding="utf-8", accept=None):
-        """
-        Args:
-            encoding:
-            accept:
-        """
-        self.encoding = encoding
-        self.accept = accept
-
-    def __call__(self, stream, content_type):
-        """
-        Args:
-            stream:
-            content_type:
-        """
-        try:
-            return stream.read().decode(self.encoding)
-        finally:
-            stream.close()
-
-
 class StreamDeserializer(object):
     """Returns the tuple of the response stream and the content-type of the response.
        It is the receivers responsibility to close the stream when they're done
diff --git a/tests/integ/test_multidatamodel.py b/tests/integ/test_multidatamodel.py
@@ -24,9 +24,10 @@
 
 from sagemaker import utils
 from sagemaker.amazon.randomcutforest import RandomCutForest
+from sagemaker.deserializers import StringDeserializer
 from sagemaker.multidatamodel import MultiDataModel
 from sagemaker.mxnet import MXNet
-from sagemaker.predictor import Predictor, StringDeserializer, npy_serializer
+from sagemaker.predictor import Predictor, npy_serializer
 from sagemaker.utils import sagemaker_timestamp, unique_name_from_base, get_ecr_image_uri_prefix
 from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
 from tests.integ.retry import retries
diff --git a/tests/unit/sagemaker/cli/compatibility/v2/modifiers/test_training_params.py b/tests/unit/sagemaker/cli/compatibility/v2/modifiers/test_training_params.py
@@ -0,0 +1,102 @@
+# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
+
+import itertools
+
+import pasta
+
+from sagemaker.cli.compatibility.v2.modifiers import training_params
+from tests.unit.sagemaker.cli.compatibility.v2.modifiers.ast_converter import ast_call
+
+ESTIMATORS_TO_NAMESPACES = {
+    "AlgorithmEstimator": ("sagemaker", "sagemaker.algorithm"),
+    "AmazonAlgorithmEstimatorBase": ("sagemaker.amazon.amazon_estimator",),
+    "Chainer": ("sagemaker.chainer", "sagemaker.chainer.estimator"),
+    "Estimator": ("sagemaker.estimator",),
+    "EstimatorBase": ("sagemaker.estimator",),
+    "FactorizationMachines": ("sagemaker", "sagemaker.amazon.factorization_machines"),
+    "Framework": ("sagemaker.estimator",),
+    "IPInsights": ("sagemaker", "sagemaker.amazon.ipinsights"),
+    "KMeans": ("sagemaker", "sagemaker.amazon.kmeans"),
+    "KNN": ("sagemaker", "sagemaker.amazon.knn"),
+    "LDA": ("sagemaker", "sagemaker.amazon.lda"),
+    "LinearLearner": ("sagemaker", "sagemaker.amazon.linear_learner"),
+    "MXNet": ("sagemaker.mxnet", "sagemaker.mxnet.estimator"),
+    "NTM": ("sagemaker", "sagemaker.amazon.ntm"),
+    "Object2Vec": ("sagemaker", "sagemaker.amazon.object2vec"),
+    "PCA": ("sagemaker", "sagemaker.amazon.pca"),
+    "PyTorch": ("sagemaker.pytorch", "sagemaker.pytorch.estimator"),
+    "RandomCutForest": ("sagemaker", "sagemaker.amazon.randomcutforest"),
+    "RLEstimator": ("sagemaker.rl", "sagemaker.rl.estimator"),
+    "SKLearn": ("sagemaker.sklearn", "sagemaker.sklearn.estimator"),
+    "TensorFlow": ("sagemaker.tensorflow", "sagemaker.tensorflow.estimator"),
+    "XGBoost": ("sagemaker.xgboost", "sagemaker.xgboost.estimator"),
+}
+
+PARAMS_WITH_VALUES = (
+    "train_instance_count=1",
+    "train_instance_type='ml.c4.xlarge'",
+    "train_max_run=8 * 60 * 60",
+    "train_max_run_wait=1 * 60 * 60",
+    "train_use_spot_instances=True",
+    "train_volume_size=30",
+    "train_volume_kms_key='key'",
+)
+
+
+def _estimators():
+    for estimator, namespaces in ESTIMATORS_TO_NAMESPACES.items():
+        yield estimator
+
+        for namespace in namespaces:
+            yield ".".join((namespace, estimator))
+
+
+def test_node_should_be_modified():
+    modifier = training_params.TrainPrefixRemover()
+
+    for estimator in _estimators():
+        for param in PARAMS_WITH_VALUES:
+            call = ast_call("{}({})".format(estimator, param))
+            assert modifier.node_should_be_modified(call)
+
+
+def test_node_should_be_modified_no_params():
+    modifier = training_params.TrainPrefixRemover()
+
+    for estimator in _estimators():
+        call = ast_call("{}()".format(estimator))
+        assert not modifier.node_should_be_modified(call)
+
+
+def test_node_should_be_modified_random_function_call():
+    modifier = training_params.TrainPrefixRemover()
+    assert not modifier.node_should_be_modified(ast_call("Session()"))
+
+
+def test_modify_node():
+    modifier = training_params.TrainPrefixRemover()
+
+    for params in _parameter_combinations():
+        node = ast_call("Estimator({})".format(params))
+        modifier.modify_node(node)
+
+        expected = "Estimator({})".format(params).replace("train_", "")
+        assert expected == pasta.dump(node)
+
+
+def _parameter_combinations():
+    for subset_length in range(1, len(PARAMS_WITH_VALUES) + 1):
+        for subset in itertools.combinations(PARAMS_WITH_VALUES, subset_length):
+            yield ", ".join(subset)
diff --git a/tests/unit/sagemaker/test_deserializers.py b/tests/unit/sagemaker/test_deserializers.py
@@ -16,9 +16,17 @@
 
 import pytest
 
-from sagemaker.deserializers import BytesDeserializer, CSVDeserializer
+from sagemaker.deserializers import StringDeserializer, BytesDeserializer, CSVDeserializer
 
 
+def test_string_deserializer():
+    deserializer = StringDeserializer()
+
+    result = deserializer.deserialize(io.BytesIO(b"[1, 2, 3]"), "application/json")
+
+    assert result == "[1, 2, 3]"
+    
+
 def test_bytes_deserializer():
     deserializer = BytesDeserializer()
 
diff --git a/tests/unit/test_predictor.py b/tests/unit/test_predictor.py
@@ -25,7 +25,6 @@
     json_serializer,
     json_deserializer,
     csv_serializer,
-    StringDeserializer,
     StreamDeserializer,
     numpy_deserializer,
     npy_serializer,
@@ -167,12 +166,6 @@ def test_json_deserializer_invalid_data():
     assert "column" in str(error)
 
 
-def test_string_deserializer():
-    result = StringDeserializer()(io.BytesIO(b"[1, 2, 3]"), "application/json")
-
-    assert result == "[1, 2, 3]"
-
-
 def test_stream_deserializer():
     stream, content_type = StreamDeserializer()(io.BytesIO(b"[1, 2, 3]"), "application/json")
     result = stream.read()

Original file line number	Diff line number	Diff line change
`@@ -21,4 +21,5 @@`
`21`	`21`	`renamed_params,`
`22`	`22`	`tf_legacy_mode,`
`23`	`23`	`tfs,`
	`24`	`+ training_params,`
`24`	`25`	`)`