Skip to content

Commit 88d1e80

Browse files
authored
Merge branch 'master' into fix/unique-resource-names-js-integ-tests
2 parents 3864e65 + 41feb4c commit 88d1e80

File tree

10 files changed

+192
-25
lines changed

10 files changed

+192
-25
lines changed

CHANGELOG.md

+14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
# Changelog
22

3+
## v2.189.0 (2023-10-03)
4+
5+
### Features
6+
7+
* add feature processor APIs to public doc
8+
* s3 prefix model data for JumpStartModel
9+
* Model Package support for updating approval
10+
11+
### Bug Fixes and Other Changes
12+
13+
* Add bucket owner check
14+
* transform step unit test
15+
* Release TGI 1.1.0 Image
16+
317
## v2.188.0 (2023-09-26)
418

519
### Features

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.188.1.dev0
1+
2.189.1.dev0

doc/api/prep_data/feature_store.rst

+38
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,41 @@ Feature Store
120120
.. autoclass:: sagemaker.feature_store.feature_store.FeatureStore
121121
:members:
122122
:show-inheritance:
123+
124+
125+
@feature_processor Decorator
126+
****************************
127+
128+
.. autodecorator:: sagemaker.feature_store.feature_processor.feature_processor
129+
130+
131+
Feature Processor Data Source
132+
*****************************
133+
134+
.. autoclass:: sagemaker.feature_store.feature_processor.FeatureGroupDataSource
135+
:members:
136+
:show-inheritance:
137+
138+
.. autoclass:: sagemaker.feature_store.feature_processor.CSVDataSource
139+
:members:
140+
:show-inheritance:
141+
142+
.. autoclass:: sagemaker.feature_store.feature_processor.ParquetDataSource
143+
:members:
144+
:show-inheritance:
145+
146+
147+
Feature Processor Scheduler
148+
***************************
149+
150+
.. automethod:: sagemaker.feature_store.feature_processor.to_pipeline
151+
152+
.. automethod:: sagemaker.feature_store.feature_processor.schedule
153+
154+
.. automethod:: sagemaker.feature_store.feature_processor.execute
155+
156+
.. automethod:: sagemaker.feature_store.feature_processor.delete_schedule
157+
158+
.. automethod:: sagemaker.feature_store.feature_processor.describe
159+
160+
.. automethod:: sagemaker.feature_store.feature_processor.list_pipelines

doc/conf.py

+2
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@
109109
# autosectionlabel
110110
autosectionlabel_prefix_document = True
111111

112+
autodoc_mock_imports = ["pyspark", "feature_store_pyspark", "py4j"]
113+
112114

113115
def setup(app):
114116
sys.stdout.write("Generating JumpStart model table...")

src/sagemaker/feature_store/feature_processor/feature_processor.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,10 @@ def feature_processor(
4949
names nand types in [params: Dict[str, Any], spark: SparkSession]. Outputs: a single return
5050
value of type pyspark.sql.DataFrame. The function can have any name.
5151
52-
Example:
52+
**Example:**
53+
54+
.. code-block:: python
55+
5356
@feature_processor(
5457
inputs=[FeatureGroupDataSource("input-fg"), CSVDataSource("s3://bucket/prefix)],
5558
output='arn:aws:sagemaker:us-west-2:123456789012:feature-group/output-fg'
@@ -60,7 +63,10 @@ def transform(
6063
) -> DataFrame:
6164
return ...
6265
63-
More concisely:
66+
**More concisely:**
67+
68+
.. code-block:: python
69+
6470
@feature_processor(
6571
inputs=[FeatureGroupDataSource("input-fg"), CSVDataSource("s3://bucket/prefix)],
6672
output='arn:aws:sagemaker:us-west-2:123456789012:feature-group/output-fg'

src/sagemaker/image_uri_config/huggingface-llm.json

+44-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
"0.6": "0.6.0",
66
"0.8": "0.8.2",
77
"0.9": "0.9.3",
8-
"1.0": "1.0.3"
8+
"1.0": "1.0.3",
9+
"1.1": "1.1.0"
910
},
1011
"versions": {
1112
"0.6.0": {
@@ -175,6 +176,48 @@
175176
"tag_prefix": "2.0.1-tgi1.0.3",
176177
"repository": "huggingface-pytorch-tgi-inference",
177178
"container_version": {"gpu": "cu118-ubuntu20.04"}
179+
},
180+
"1.1.0": {
181+
"py_versions": ["py39"],
182+
"registries": {
183+
"af-south-1": "626614931356",
184+
"il-central-1": "780543022126",
185+
"ap-east-1": "871362719292",
186+
"ap-northeast-1": "763104351884",
187+
"ap-northeast-2": "763104351884",
188+
"ap-northeast-3": "364406365360",
189+
"ap-south-1": "763104351884",
190+
"ap-south-2": "772153158452",
191+
"ap-southeast-1": "763104351884",
192+
"ap-southeast-2": "763104351884",
193+
"ap-southeast-3": "907027046896",
194+
"ap-southeast-4": "457447274322",
195+
"ca-central-1": "763104351884",
196+
"cn-north-1": "727897471807",
197+
"cn-northwest-1": "727897471807",
198+
"eu-central-1": "763104351884",
199+
"eu-central-2": "380420809688",
200+
"eu-north-1": "763104351884",
201+
"eu-west-1": "763104351884",
202+
"eu-west-2": "763104351884",
203+
"eu-west-3": "763104351884",
204+
"eu-south-1": "692866216735",
205+
"eu-south-2": "503227376785",
206+
"me-south-1": "217643126080",
207+
"me-central-1": "914824155844",
208+
"sa-east-1": "763104351884",
209+
"us-east-1": "763104351884",
210+
"us-east-2": "763104351884",
211+
"us-gov-east-1": "446045086412",
212+
"us-gov-west-1": "442386744353",
213+
"us-iso-east-1": "886529160074",
214+
"us-isob-east-1": "094389454867",
215+
"us-west-1": "763104351884",
216+
"us-west-2": "763104351884"
217+
},
218+
"tag_prefix": "2.0.1-tgi1.1.0",
219+
"repository": "huggingface-pytorch-tgi-inference",
220+
"container_version": {"gpu": "cu118-ubuntu20.04"}
178221
}
179222
}
180223
}

src/sagemaker/session.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ def __init__(
233233
self._default_bucket_name_override = default_bucket
234234
# this may also be set again inside :func:`_initialize` if it is None
235235
self.default_bucket_prefix = default_bucket_prefix
236+
self._default_bucket_set_by_sdk = False
236237

237238
self.s3_resource = None
238239
self.s3_client = None
@@ -545,8 +546,12 @@ def default_bucket(self):
545546
default_bucket = self._default_bucket_name_override
546547
if not default_bucket:
547548
default_bucket = generate_default_sagemaker_bucket_name(self.boto_session)
549+
self._default_bucket_set_by_sdk = True
548550

549-
self._create_s3_bucket_if_it_does_not_exist(bucket_name=default_bucket, region=region)
551+
self._create_s3_bucket_if_it_does_not_exist(
552+
bucket_name=default_bucket,
553+
region=region,
554+
)
550555

551556
self._default_bucket = default_bucket
552557

@@ -620,6 +625,28 @@ def _create_s3_bucket_if_it_does_not_exist(self, bucket_name, region):
620625
else:
621626
raise
622627

628+
if self._default_bucket_set_by_sdk:
629+
# make sure the s3 bucket is configured in users account.
630+
expected_bucket_owner_id = self.account_id()
631+
try:
632+
s3.meta.client.head_bucket(
633+
Bucket=bucket_name, ExpectedBucketOwner=expected_bucket_owner_id
634+
)
635+
except ClientError as e:
636+
error_code = e.response["Error"]["Code"]
637+
message = e.response["Error"]["Message"]
638+
if error_code == "403" and message == "Forbidden":
639+
LOGGER.error(
640+
"Since default_bucket param was not set, SageMaker Python SDK tried to use "
641+
"%s bucket. "
642+
"This bucket cannot be configured to use as it is not owned by Account %s. "
643+
"To unblock it's recommended to use custom default_bucket "
644+
"parameter in sagemaker.Session",
645+
bucket_name,
646+
expected_bucket_owner_id,
647+
)
648+
raise
649+
623650
def _append_sagemaker_config_tags(self, tags: list, config_path_to_tags: str):
624651
"""Appends tags specified in the sagemaker_config to the given list of tags.
625652

tests/unit/sagemaker/image_uris/test_huggingface_llm.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,14 @@
4444
"us-west-1": "763104351884",
4545
"us-west-2": "763104351884",
4646
}
47-
HF_VERSIONS = ["0.6.0", "0.8.2", "0.9.3", "1.0.3"]
47+
HF_VERSIONS = ["0.6.0", "0.8.2", "0.9.3", "1.0.3", "1.1.0"]
4848
LMI_VERSIONS = ["0.23.0"]
4949
HF_VERSIONS_MAPPING = {
5050
"0.6.0": "2.0.0-tgi0.6.0-gpu-py39-cu118-ubuntu20.04",
5151
"0.8.2": "2.0.0-tgi0.8.2-gpu-py39-cu118-ubuntu20.04",
5252
"0.9.3": "2.0.1-tgi0.9.3-gpu-py39-cu118-ubuntu20.04",
5353
"1.0.3": "2.0.1-tgi1.0.3-gpu-py39-cu118-ubuntu20.04",
54+
"1.1.0": "2.0.1-tgi1.1.0-gpu-py39-cu118-ubuntu20.04",
5455
}
5556
LMI_VERSIONS_MAPPING = {"0.23.0": "deepspeed0.9.5-cu118"}
5657

tests/unit/sagemaker/workflow/test_transform_step.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def test_transform_step_with_transformer(model_name, data, output_path, pipeline
127127
)
128128

129129
step_def = json.loads(pipeline.definition())["Steps"][0]
130-
assert step_def["Arguments"]["TransformJobName"] == "TestTransformJobPrefix"
130+
assert step_def["Arguments"]["TransformJobName"].startswith("TestTransformJobPrefix")
131131
assert step_def == {
132132
"Name": "MyTransformStep",
133133
"Type": "Transform",

tests/unit/test_default_bucket.py

+54-18
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,22 @@
1212
# language governing permissions and limitations under the License.
1313
from __future__ import absolute_import
1414

15+
import datetime
1516
import pytest
1617
from botocore.exceptions import ClientError
17-
from mock import MagicMock, patch
18+
from mock import MagicMock
1819
import sagemaker
1920

2021
ACCOUNT_ID = "123"
2122
REGION = "us-west-2"
2223
DEFAULT_BUCKET_NAME = "sagemaker-{}-{}".format(REGION, ACCOUNT_ID)
2324

2425

26+
@pytest.fixture
27+
def datetime_obj():
28+
return datetime.datetime(2017, 6, 16, 15, 55, 0)
29+
30+
2531
@pytest.fixture()
2632
def sagemaker_session():
2733
boto_mock = MagicMock(name="boto_session", region_name=REGION)
@@ -50,23 +56,53 @@ def test_default_bucket_s3_create_call(sagemaker_session):
5056
assert sagemaker_session._default_bucket == bucket_name
5157

5258

53-
def test_default_bucket_s3_needs_access(sagemaker_session):
54-
with patch("logging.Logger.error") as mocked_error_log:
55-
with pytest.raises(ClientError):
56-
error = ClientError(
57-
error_response={"Error": {"Code": "403", "Message": "Forbidden"}},
58-
operation_name="foo",
59-
)
60-
sagemaker_session.boto_session.resource(
61-
"s3"
62-
).meta.client.head_bucket.side_effect = error
63-
sagemaker_session.default_bucket()
64-
mocked_error_log.assert_called_once_with(
65-
"Bucket %s exists, but access is forbidden. Please try again after "
66-
"adding appropriate access.",
67-
DEFAULT_BUCKET_NAME,
68-
)
69-
assert sagemaker_session._default_bucket is None
59+
def test_default_bucket_s3_needs_access(sagemaker_session, caplog):
60+
with pytest.raises(ClientError):
61+
error = ClientError(
62+
error_response={"Error": {"Code": "403", "Message": "Forbidden"}},
63+
operation_name="foo",
64+
)
65+
sagemaker_session.boto_session.resource("s3").meta.client.head_bucket.side_effect = error
66+
sagemaker_session.default_bucket()
67+
error_message = (
68+
" exists, but access is forbidden. Please try again after adding appropriate access."
69+
)
70+
assert error_message in caplog.text
71+
assert sagemaker_session._default_bucket is None
72+
73+
74+
def test_default_bucket_s3_needs_bucket_owner_access(sagemaker_session, datetime_obj, caplog):
75+
with pytest.raises(ClientError):
76+
error = ClientError(
77+
error_response={"Error": {"Code": "403", "Message": "Forbidden"}},
78+
operation_name="foo",
79+
)
80+
sagemaker_session.boto_session.resource("s3").meta.client.head_bucket.side_effect = error
81+
# bucket exists
82+
sagemaker_session.boto_session.resource("s3").Bucket(
83+
name=DEFAULT_BUCKET_NAME
84+
).creation_date = datetime_obj
85+
sagemaker_session.default_bucket()
86+
87+
error_message = "This bucket cannot be configured to use as it is not owned by Account"
88+
assert error_message in caplog.text
89+
assert sagemaker_session._default_bucket is None
90+
91+
92+
def test_default_bucket_s3_custom_bucket_input(sagemaker_session, datetime_obj, caplog):
93+
sagemaker_session._default_bucket_name_override = "custom-bucket-override"
94+
error = ClientError(
95+
error_response={"Error": {"Code": "403", "Message": "Forbidden"}},
96+
operation_name="foo",
97+
)
98+
sagemaker_session.boto_session.resource("s3").meta.client.head_bucket.side_effect = error
99+
# bucket exists
100+
sagemaker_session.boto_session.resource("s3").Bucket(
101+
name=DEFAULT_BUCKET_NAME
102+
).creation_date = datetime_obj
103+
# This should not raise ClientError as no head_bucket call is expected for custom bucket
104+
sagemaker_session.default_bucket()
105+
assert sagemaker_session._default_bucket == "custom-bucket-override"
70106

71107

72108
def test_default_already_cached(sagemaker_session):

0 commit comments

Comments
 (0)