Skip to content

Commit 6bcb5a0

Browse files
claytonparnellmufiAmazon
authored andcommitted
feature: Add support for SparkML v3.3 (aws#3420)
1 parent 832b814 commit 6bcb5a0

File tree

7 files changed

+39
-8
lines changed

7 files changed

+39
-8
lines changed

README.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ In order to host a SparkML model in SageMaker, it should be serialized with ``ML
214214

215215
For more information on MLeap, see https://github.com/combust/mleap .
216216

217-
Supported major version of Spark: 2.4 (MLeap version - 0.9.6)
217+
Supported major version of Spark: 3.3 (MLeap version - 0.20.0)
218218

219219
Here is an example on how to create an instance of ``SparkMLModel`` class and use ``deploy()`` method to create an
220220
endpoint which can be used to perform prediction against your trained SparkML Model.

src/sagemaker/image_uri_config/sparkml-serving.json

+31
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,37 @@
5858
"us-west-2": "246618743249"
5959
},
6060
"repository": "sagemaker-sparkml-serving"
61+
},
62+
"3.3": {
63+
"registries": {
64+
"af-south-1": "510948584623",
65+
"ap-east-1": "651117190479",
66+
"ap-northeast-1": "354813040037",
67+
"ap-northeast-2": "366743142698",
68+
"ap-northeast-3": "867004704886",
69+
"ap-south-1": "720646828776",
70+
"ap-southeast-1": "121021644041",
71+
"ap-southeast-2": "783357654285",
72+
"ap-southeast-3": "951798379941",
73+
"ca-central-1": "341280168497",
74+
"cn-north-1": "450853457545",
75+
"cn-northwest-1": "451049120500",
76+
"eu-central-1": "492215442770",
77+
"eu-north-1": "662702820516",
78+
"eu-west-1": "141502667606",
79+
"eu-west-2": "764974769150",
80+
"eu-west-3": "659782779980",
81+
"eu-south-1": "978288397137",
82+
"me-south-1": "801668240914",
83+
"sa-east-1": "737474898029",
84+
"us-east-1": "683313688378",
85+
"us-east-2": "257758044811",
86+
"us-gov-west-1": "414596584902",
87+
"us-iso-east-1": "833128469047",
88+
"us-west-1": "746614075791",
89+
"us-west-2": "246618743249"
90+
},
91+
"repository": "sagemaker-sparkml-serving"
6192
}
6293
}
6394
}

src/sagemaker/sparkml/model.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def __init__(
7878
self,
7979
model_data: Union[str, PipelineVariable],
8080
role: Optional[str] = None,
81-
spark_version: str = "2.4",
81+
spark_version: str = "3.3",
8282
sagemaker_session: Optional[Session] = None,
8383
**kwargs,
8484
):
@@ -95,7 +95,7 @@ def __init__(
9595
artifacts. After the endpoint is created, the inference code
9696
might use the IAM role, if it needs to access an AWS resource.
9797
spark_version (str): Spark version you want to use for executing the
98-
inference (default: '2.4').
98+
inference (default: '3.3').
9999
sagemaker_session (sagemaker.session.Session): Session object which
100100
manages interactions with Amazon SageMaker APIs and any other
101101
AWS services needed. If not specified, the estimator creates one

tests/unit/sagemaker/image_uris/test_sparkml.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"us-west-1": "746614075791",
4444
"us-west-2": "246618743249",
4545
}
46-
VERSIONS = ["2.2", "2.4"]
46+
VERSIONS = ["2.2", "2.4", "3.3"]
4747

4848

4949
@pytest.mark.parametrize("version", VERSIONS)

tests/unit/sagemaker/workflow/test_steps.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -918,7 +918,7 @@ def test_create_model_step_with_model_pipeline(tfo, time, sagemaker_session):
918918
},
919919
{
920920
"Environment": {"SAGEMAKER_DEFAULT_INVOCATIONS_ACCEPT": "text/csv"},
921-
"Image": "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:2.4",
921+
"Image": "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:3.3",
922922
"ModelDataUrl": "s3://bucket/model_2.tar.gz",
923923
},
924924
],

tests/unit/test_pipeline_model.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def test_prepare_container_def(tfo, time, sagemaker_session):
9797
{
9898
"Environment": {"SAGEMAKER_DEFAULT_INVOCATIONS_ACCEPT": "text/csv"},
9999
"Image": "246618743249.dkr.ecr.us-west-2.amazonaws.com"
100-
+ "/sagemaker-sparkml-serving:2.4",
100+
+ "/sagemaker-sparkml-serving:3.3",
101101
"ModelDataUrl": "s3://bucket/model_2.tar.gz",
102102
},
103103
]
@@ -338,7 +338,7 @@ def test_network_isolation(tfo, time, sagemaker_session):
338338
"ModelDataUrl": "s3://bucket/model_1.tar.gz",
339339
},
340340
{
341-
"Image": "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:2.4",
341+
"Image": "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-sparkml-serving:3.3",
342342
"Environment": {},
343343
"ModelDataUrl": "s3://bucket/model_2.tar.gz",
344344
},

tests/unit/test_sparkml_serving.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def sagemaker_session():
4949

5050
def test_sparkml_model(sagemaker_session):
5151
sparkml = SparkMLModel(sagemaker_session=sagemaker_session, model_data=MODEL_DATA, role=ROLE)
52-
assert sparkml.image_uri == image_uris.retrieve("sparkml-serving", REGION, version="2.4")
52+
assert sparkml.image_uri == image_uris.retrieve("sparkml-serving", REGION, version="3.3")
5353

5454

5555
def test_predictor_type(sagemaker_session):

0 commit comments

Comments
 (0)