Skip to content

Commit e9cdf75

Browse files
authored
Merge pull request aws#22 from athewsey/feat/fw-processor
Add HuggingFaceProcessor and fix local mode
2 parents 4312db9 + 1ed9349 commit e9cdf75

File tree

10 files changed

+467
-85
lines changed

10 files changed

+467
-85
lines changed

src/sagemaker/huggingface/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
from __future__ import absolute_import
1515

1616
from sagemaker.huggingface.estimator import HuggingFace # noqa: F401
17+
from sagemaker.huggingface.processing import HuggingFaceProcessor # noqa:F401
+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# Copyright 2019-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
"""This module contains code related to HuggingFace Processors which are used for Processing jobs.
14+
15+
These jobs let customers perform data pre-processing, post-processing, feature engineering,
16+
data validation, and model evaluation and interpretation on SageMaker.
17+
"""
18+
from __future__ import absolute_import
19+
20+
from sagemaker.processing import FrameworkProcessor
21+
from sagemaker.huggingface.estimator import HuggingFace
22+
23+
24+
class HuggingFaceProcessor(FrameworkProcessor):
25+
"""Handles Amazon SageMaker processing tasks for jobs using HuggingFace containers."""
26+
27+
estimator_cls = HuggingFace
28+
29+
def __init__(
30+
self,
31+
role,
32+
instance_count,
33+
instance_type,
34+
transformers_version=None,
35+
tensorflow_version=None,
36+
pytorch_version=None,
37+
py_version="py36",
38+
image_uri=None,
39+
command=["python"],
40+
volume_size_in_gb=30,
41+
volume_kms_key=None,
42+
output_kms_key=None,
43+
code_location=None,
44+
max_runtime_in_seconds=None,
45+
base_job_name=None,
46+
sagemaker_session=None,
47+
env=None,
48+
tags=None,
49+
network_config=None,
50+
):
51+
"""This processor executes a Python script in a HuggingFace execution environment.
52+
53+
Unless ``image_uri`` is specified, the environment is an Amazon-built Docker container
54+
that executes functions defined in the supplied ``code`` Python script.
55+
56+
The arguments have the same meaning as in ``FrameworkProcessor``, with the following
57+
exceptions.
58+
59+
Args:
60+
transformers_version (str): Transformers version you want to use for
61+
executing your model training code. Defaults to ``None``. Required unless
62+
``image_uri`` is provided. The current supported version is ``4.4.2``.
63+
tensorflow_version (str): TensorFlow version you want to use for
64+
executing your model training code. Defaults to ``None``. Required unless
65+
``pytorch_version`` is provided. The current supported version is ``1.6.0``.
66+
pytorch_version (str): PyTorch version you want to use for
67+
executing your model training code. Defaults to ``None``. Required unless
68+
``tensorflow_version`` is provided. The current supported version is ``2.4.1``.
69+
py_version (str): Python version you want to use for executing your model training
70+
code. Defaults to ``None``. Required unless ``image_uri`` is provided. If
71+
using PyTorch, the current supported version is ``py36``. If using TensorFlow,
72+
the current supported version is ``py37``.
73+
74+
.. tip::
75+
76+
You can find additional parameters for initializing this class at
77+
:class:`~sagemaker.processing.FrameworkProcessor`.
78+
"""
79+
self.pytorch_version = pytorch_version
80+
self.tensorflow_version = tensorflow_version
81+
super().__init__(
82+
self.estimator_cls,
83+
transformers_version,
84+
role,
85+
instance_count,
86+
instance_type,
87+
py_version,
88+
image_uri,
89+
command,
90+
volume_size_in_gb,
91+
volume_kms_key,
92+
output_kms_key,
93+
code_location,
94+
max_runtime_in_seconds,
95+
base_job_name,
96+
sagemaker_session,
97+
env,
98+
tags,
99+
network_config,
100+
)
101+
102+
def _create_estimator(
103+
self,
104+
entry_point="",
105+
source_dir=None,
106+
dependencies=None,
107+
git_config=None,
108+
):
109+
"""Override default estimator factory function for HuggingFace's different parameters
110+
111+
HuggingFace estimators have 3 framework version parameters instead of one: The version for
112+
Transformers, PyTorch, and TensorFlow.
113+
"""
114+
return self.estimator_cls(
115+
transformers_version=self.framework_version,
116+
tensorflow_version=self.tensorflow_version,
117+
pytorch_version=self.pytorch_version,
118+
py_version=self.py_version,
119+
entry_point=entry_point,
120+
source_dir=source_dir,
121+
dependencies=dependencies,
122+
git_config=git_config,
123+
code_location=self.code_location,
124+
enable_network_isolation=False,
125+
image_uri=self.image_uri,
126+
role=self.role,
127+
instance_count=self.instance_count,
128+
instance_type=self.instance_type,
129+
sagemaker_session=self.sagemaker_session,
130+
debugger_hook_config=False,
131+
disable_profiler=True,
132+
)

src/sagemaker/local/local_session.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -475,10 +475,30 @@ def invoke_endpoint(
475475

476476

477477
class LocalSession(Session):
478-
"""A LocalSession class definition."""
478+
"""A SageMaker ``Session`` class for Local Mode.
479479
480-
def __init__(self, boto_session=None, s3_endpoint_url=None):
480+
This class provides alternative Local Mode implementations for the functionality of
481+
:class:`~sagemaker.session.Session`.
482+
"""
483+
484+
def __init__(self, boto_session=None, s3_endpoint_url=None, disable_local_code=False):
485+
"""Create a Local SageMaker Session.
486+
487+
Args:
488+
boto_session (boto3.session.Session): The underlying Boto3 session which AWS service
489+
calls are delegated to (default: None). If not provided, one is created with
490+
default AWS configuration chain.
491+
s3_endpoint_url (str): Override the default endpoint URL for Amazon S3, if set
492+
(default: None).
493+
disable_local_code (bool): Set ``True`` to override the default AWS configuration
494+
chain to disable the ``local.local_code`` setting, which may not be supported for
495+
some SDK features (default: False).
496+
"""
481497
self.s3_endpoint_url = s3_endpoint_url
498+
# We use this local variable to avoid disrupting the __init__->_initialize API of the
499+
# parent class... But overwriting it after constructor won't do anything, so prefix _ to
500+
# discourage external use:
501+
self._disable_local_code = disable_local_code
482502

483503
super(LocalSession, self).__init__(boto_session)
484504

@@ -530,6 +550,8 @@ def _initialize(
530550
raise e
531551

532552
self.config = yaml.load(open(sagemaker_config_file, "r"))
553+
if self._disable_local_code and "local" in self.config:
554+
self.config["local"]["local_code"] = False
533555

534556
def logs_for_job(self, job_name, wait=False, poll=5, log_type="All"):
535557
"""A no-op method meant to override the sagemaker client.

src/sagemaker/processing.py

+54-50
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ def __init__(
128128

129129
if self.instance_type in ("local", "local_gpu"):
130130
if not isinstance(sagemaker_session, LocalSession):
131-
sagemaker_session = LocalSession()
131+
# Until Local Mode Processing supports local code, we need to disable it:
132+
sagemaker_session = LocalSession(disable_local_code=True)
132133

133134
self.sagemaker_session = sagemaker_session or Session()
134135

@@ -1298,10 +1299,15 @@ def __init__(
12981299
self.framework_version = framework_version
12991300
self.py_version = py_version
13001301

1301-
image_uri, base_job_name = self._pre_init_normalization(
1302-
instance_type, image_uri, base_job_name, sagemaker_session
1303-
)
1304-
1302+
# 1. To finalize/normalize the image_uri or base_job_name, we need to create an
1303+
# estimator_cls instance.
1304+
# 2. We want to make it easy for children of FrameworkProcessor to override estimator
1305+
# creation via a function (to create FrameworkProcessors for Estimators that may have
1306+
# different signatures - like HuggingFace or others in future).
1307+
# 3. Super-class __init__ doesn't (currently) do anything with these params besides
1308+
# storing them
1309+
#
1310+
# Therefore we'll init the superclass first and then customize the setup after:
13051311
super().__init__(
13061312
role=role,
13071313
image_uri=image_uri,
@@ -1318,6 +1324,7 @@ def __init__(
13181324
tags=tags,
13191325
network_config=network_config,
13201326
)
1327+
13211328
# This subclass uses the "code" input for actual payload and the ScriptProcessor parent's
13221329
# functionality for uploading just a small entrypoint script to invoke it.
13231330
self._CODE_CONTAINER_INPUT_NAME = "entrypoint"
@@ -1326,38 +1333,45 @@ def __init__(
13261333
code_location[:-1] if (code_location and code_location.endswith("/")) else code_location
13271334
)
13281335

1329-
def _pre_init_normalization(
1330-
self,
1331-
instance_type: str,
1332-
image_uri: Optional[str] = None,
1333-
base_job_name: Optional[str] = None,
1334-
sagemaker_session: Optional[str] = None,
1335-
) -> Tuple[str, str]:
1336-
"""Normalize job name and container image uri."""
1337-
# Normalize base_job_name
1338-
if base_job_name is None:
1339-
base_job_name = self.estimator_cls._framework_name
1336+
if image_uri is None or base_job_name is None:
1337+
# For these default configuration purposes, we don't need the optional args:
1338+
est = self._create_estimator()
1339+
if image_uri is None:
1340+
self.image_uri = est.training_image_uri()
13401341
if base_job_name is None:
1341-
logger.warning("Framework name is None. Please check with the maintainer.")
1342-
base_job_name = str(base_job_name) # Keep mypy happy.
1343-
1344-
# Normalize image uri.
1345-
if image_uri is None:
1346-
# Estimator used only to probe image uri, so can get away with some dummy values.
1347-
est = self.estimator_cls(
1348-
framework_version=self.framework_version,
1349-
instance_type=instance_type,
1350-
py_version=self.py_version,
1351-
image_uri=image_uri,
1352-
entry_point="",
1353-
role="",
1354-
enable_network_isolation=False,
1355-
instance_count=1, # SKLearn estimator explicitly disables instance_count>1
1356-
sagemaker_session=sagemaker_session,
1357-
)
1358-
image_uri = est.training_image_uri()
1342+
self.base_job_name = est.base_job_name or estimator_cls._framework_name
1343+
if base_job_name is None:
1344+
base_job_name = "framework-processor"
13591345

1360-
return image_uri, base_job_name
1346+
def _create_estimator(
1347+
self,
1348+
entry_point="",
1349+
source_dir=None,
1350+
dependencies=None,
1351+
git_config=None,
1352+
):
1353+
"""Instantiate the Framework Estimator that backs this Processor"""
1354+
return self.estimator_cls(
1355+
framework_version=self.framework_version,
1356+
py_version=self.py_version,
1357+
entry_point=entry_point,
1358+
source_dir=source_dir,
1359+
dependencies=dependencies,
1360+
git_config=git_config,
1361+
code_location=self.code_location,
1362+
enable_network_isolation=False, # True -> uploads to input channel. Not what we want!
1363+
image_uri=self.image_uri,
1364+
role=self.role,
1365+
# Estimator instance_count doesn't currently matter to FrameworkProcessor, and the
1366+
# SKLearn Framework Estimator requires instance_type==1. So here we hard-wire it to 1,
1367+
# but if it matters in future perhaps we could take self.instance_count here and have
1368+
# SKLearnProcessor override this function instead:
1369+
instance_count=1,
1370+
instance_type=self.instance_type,
1371+
sagemaker_session=self.sagemaker_session,
1372+
debugger_hook_config=False,
1373+
disable_profiler=True,
1374+
)
13611375

13621376
def get_run_args(
13631377
self,
@@ -1555,10 +1569,11 @@ def _pack_and_upload_code(self, code, source_dir, dependencies, git_config, job_
15551569

15561570
local_code = get_config_value("local.local_code", self.sagemaker_session.config)
15571571
if self.sagemaker_session.local_mode and local_code:
1558-
# TODO: Can we be more prescriptive about how to not trigger this error?
1559-
# How can user or us force a local mode `Estimator` to run with `local_code=False`?
15601572
raise RuntimeError(
1561-
"Local *code* is not currently supported for SageMaker Processing in Local Mode"
1573+
"SageMaker Processing Local Mode does not currently support 'local code' mode. "
1574+
"Please use a LocalSession created with disable_local_code=True, or leave "
1575+
"sagemaker_session unspecified when creating your Processor to have one set up "
1576+
"automatically."
15621577
)
15631578

15641579
# Upload the bootstrapping code as s3://.../jobname/source/runproc.sh.
@@ -1623,22 +1638,11 @@ def _upload_payload(
16231638
"""Upload payload sourcedir.tar.gz to S3."""
16241639
# A new estimator instance is required, because each call to ScriptProcessor.run() can
16251640
# use different codes.
1626-
estimator = self.estimator_cls(
1641+
estimator = self._create_estimator(
16271642
entry_point=entry_point,
16281643
source_dir=source_dir,
16291644
dependencies=dependencies,
16301645
git_config=git_config,
1631-
framework_version=self.framework_version,
1632-
py_version=self.py_version,
1633-
code_location=self.code_location, # Upload to <code_loc>/jobname/output/source.tar.gz
1634-
enable_network_isolation=False, # If true, uploads to input channel. Not what we want!
1635-
image_uri=self.image_uri, # The image uri is already normalized by this point.
1636-
role=self.role,
1637-
instance_type=self.instance_type,
1638-
instance_count=1,
1639-
sagemaker_session=self.sagemaker_session,
1640-
debugger_hook_config=False,
1641-
disable_profiler=True,
16421646
)
16431647

16441648
estimator._prepare_for_training(job_name=job_name)

tests/integ/test_huggingface.py

+39-3
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,47 @@
1616

1717
import pytest
1818

19-
from sagemaker.huggingface import HuggingFace
19+
from sagemaker.huggingface import HuggingFace, HuggingFaceProcessor
2020
from tests import integ
2121
from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
2222
from tests.integ.timeout import timeout
2323

24+
ROLE = "SageMakerRole"
25+
26+
27+
@pytest.mark.release
28+
@pytest.mark.skipif(
29+
integ.test_region() in integ.TRAINING_NO_P2_REGIONS,
30+
reason="no ml.p2 instances in this region",
31+
)
32+
def test_framework_processing_job_with_deps(
33+
sagemaker_session,
34+
gpu_instance_type,
35+
huggingface_training_latest_version,
36+
huggingface_pytorch_latest_version,
37+
):
38+
with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
39+
code_path = os.path.join(DATA_DIR, "dummy_code_bundle_with_reqs")
40+
entry_point = "main_script.py"
41+
42+
processor = HuggingFaceProcessor(
43+
transformers_version=huggingface_training_latest_version,
44+
pytorch_version=huggingface_pytorch_latest_version,
45+
py_version="py36",
46+
role=ROLE,
47+
instance_count=1,
48+
instance_type=gpu_instance_type,
49+
sagemaker_session=sagemaker_session,
50+
base_job_name="test-huggingface",
51+
)
52+
53+
processor.run(
54+
code=entry_point,
55+
source_dir=code_path,
56+
inputs=[],
57+
wait=True,
58+
)
59+
2460

2561
@pytest.mark.release
2662
@pytest.mark.skipif(
@@ -39,7 +75,7 @@ def test_huggingface_training(
3975
hf = HuggingFace(
4076
py_version="py36",
4177
entry_point="examples/text-classification/run_glue.py",
42-
role="SageMakerRole",
78+
role=ROLE,
4379
transformers_version=huggingface_training_latest_version,
4480
pytorch_version=huggingface_pytorch_latest_version,
4581
instance_count=1,
@@ -86,7 +122,7 @@ def test_huggingface_training_tf(
86122
hf = HuggingFace(
87123
py_version="py37",
88124
entry_point=os.path.join(data_path, "run_tf.py"),
89-
role="SageMakerRole",
125+
role=ROLE,
90126
transformers_version=huggingface_training_latest_version,
91127
tensorflow_version=huggingface_tensorflow_latest_version,
92128
instance_count=1,

0 commit comments

Comments
 (0)