|
| 1 | +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"). You |
| 4 | +# may not use this file except in compliance with the License. A copy of |
| 5 | +# the License is located at |
| 6 | +# |
| 7 | +# http://aws.amazon.com/apache2.0/ |
| 8 | +# |
| 9 | +# or in the "license" file accompanying this file. This file is |
| 10 | +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF |
| 11 | +# ANY KIND, either express or implied. See the License for the specific |
| 12 | +# language governing permissions and limitations under the License. |
| 13 | +"""This module contains code related to HuggingFace Processors which are used for Processing jobs. |
| 14 | +
|
| 15 | +These jobs let customers perform data pre-processing, post-processing, feature engineering, |
| 16 | +data validation, and model evaluation and interpretation on SageMaker. |
| 17 | +""" |
| 18 | +from __future__ import absolute_import |
| 19 | + |
| 20 | +from sagemaker.processing import FrameworkProcessor |
| 21 | +from sagemaker.huggingface.estimator import HuggingFace |
| 22 | + |
| 23 | + |
| 24 | +class HuggingFaceProcessor(FrameworkProcessor): |
| 25 | + """Handles Amazon SageMaker processing tasks for jobs using HuggingFace containers.""" |
| 26 | + |
| 27 | + estimator_cls = HuggingFace |
| 28 | + |
| 29 | + def __init__( |
| 30 | + self, |
| 31 | + role, |
| 32 | + instance_count, |
| 33 | + instance_type, |
| 34 | + transformers_version=None, |
| 35 | + tensorflow_version=None, |
| 36 | + pytorch_version=None, |
| 37 | + py_version="py36", |
| 38 | + image_uri=None, |
| 39 | + command=None, |
| 40 | + volume_size_in_gb=30, |
| 41 | + volume_kms_key=None, |
| 42 | + output_kms_key=None, |
| 43 | + code_location=None, |
| 44 | + max_runtime_in_seconds=None, |
| 45 | + base_job_name=None, |
| 46 | + sagemaker_session=None, |
| 47 | + env=None, |
| 48 | + tags=None, |
| 49 | + network_config=None, |
| 50 | + ): |
| 51 | + """This processor executes a Python script in a HuggingFace execution environment. |
| 52 | +
|
| 53 | + Unless ``image_uri`` is specified, the environment is an Amazon-built Docker container |
| 54 | + that executes functions defined in the supplied ``code`` Python script. |
| 55 | +
|
| 56 | + The arguments have the same meaning as in ``FrameworkProcessor``, with the following |
| 57 | + exceptions. |
| 58 | +
|
| 59 | + Args: |
| 60 | + transformers_version (str): Transformers version you want to use for |
| 61 | + executing your model training code. Defaults to ``None``. Required unless |
| 62 | + ``image_uri`` is provided. The current supported version is ``4.4.2``. |
| 63 | + tensorflow_version (str): TensorFlow version you want to use for |
| 64 | + executing your model training code. Defaults to ``None``. Required unless |
| 65 | + ``pytorch_version`` is provided. The current supported version is ``1.6.0``. |
| 66 | + pytorch_version (str): PyTorch version you want to use for |
| 67 | + executing your model training code. Defaults to ``None``. Required unless |
| 68 | + ``tensorflow_version`` is provided. The current supported version is ``2.4.1``. |
| 69 | + py_version (str): Python version you want to use for executing your model training |
| 70 | + code. Defaults to ``None``. Required unless ``image_uri`` is provided. If |
| 71 | + using PyTorch, the current supported version is ``py36``. If using TensorFlow, |
| 72 | + the current supported version is ``py37``. |
| 73 | +
|
| 74 | + .. tip:: |
| 75 | +
|
| 76 | + You can find additional parameters for initializing this class at |
| 77 | + :class:`~sagemaker.processing.FrameworkProcessor`. |
| 78 | + """ |
| 79 | + self.pytorch_version = pytorch_version |
| 80 | + self.tensorflow_version = tensorflow_version |
| 81 | + super().__init__( |
| 82 | + self.estimator_cls, |
| 83 | + transformers_version, |
| 84 | + role, |
| 85 | + instance_count, |
| 86 | + instance_type, |
| 87 | + py_version, |
| 88 | + image_uri, |
| 89 | + command, |
| 90 | + volume_size_in_gb, |
| 91 | + volume_kms_key, |
| 92 | + output_kms_key, |
| 93 | + code_location, |
| 94 | + max_runtime_in_seconds, |
| 95 | + base_job_name, |
| 96 | + sagemaker_session, |
| 97 | + env, |
| 98 | + tags, |
| 99 | + network_config, |
| 100 | + ) |
| 101 | + |
| 102 | + def _create_estimator( |
| 103 | + self, |
| 104 | + entry_point="", |
| 105 | + source_dir=None, |
| 106 | + dependencies=None, |
| 107 | + git_config=None, |
| 108 | + ): |
| 109 | + """Override default estimator factory function for HuggingFace's different parameters |
| 110 | +
|
| 111 | + HuggingFace estimators have 3 framework version parameters instead of one: The version for |
| 112 | + Transformers, PyTorch, and TensorFlow. |
| 113 | + """ |
| 114 | + return self.estimator_cls( |
| 115 | + transformers_version=self.framework_version, |
| 116 | + tensorflow_version=self.tensorflow_version, |
| 117 | + pytorch_version=self.pytorch_version, |
| 118 | + py_version=self.py_version, |
| 119 | + entry_point=entry_point, |
| 120 | + source_dir=source_dir, |
| 121 | + dependencies=dependencies, |
| 122 | + git_config=git_config, |
| 123 | + code_location=self.code_location, |
| 124 | + enable_network_isolation=False, |
| 125 | + image_uri=self.image_uri, |
| 126 | + role=self.role, |
| 127 | + instance_count=self.instance_count, |
| 128 | + instance_type=self.instance_type, |
| 129 | + sagemaker_session=self.sagemaker_session, |
| 130 | + debugger_hook_config=False, |
| 131 | + disable_profiler=True, |
| 132 | + ) |
0 commit comments