Skip to content

Commit bc7573d

Browse files
authored
Merge branch 'master' into feature/large_pipeline
2 parents e777674 + 4571ea2 commit bc7573d

39 files changed

+1053
-99
lines changed

.githooks/pre-push

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/sh
2+
# this pre-push hook runs style checks and unit tests in python 3.6, 3.7, and 3.8 using tox.
3+
4+
set -e
5+
6+
TOX_PARALLEL_NO_SPINNER=1,
7+
PY_COLORS=0
8+
start_time=`date +%s`
9+
tox -e flake8,pylint,docstyle,black-check,twine --parallel all
10+
./ci-scripts/displaytime.sh 'flake8,pylint,docstyle,black-check,twine' $start_time
11+
start_time=`date +%s`
12+
tox -e sphinx,doc8 --parallel all
13+
./ci-scripts/displaytime.sh 'sphinx,doc8' $start_time
14+
start_time=`date +%s`
15+
tox -e py36,py37,py38 --parallel all -- tests/unit
16+
./ci-scripts/displaytime.sh 'py36,py37,py38 unit' $start_time

CHANGELOG.md

+60
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,65 @@
11
# Changelog
22

3+
## v2.66.2 (2021-10-27)
4+
5+
### Bug Fixes and Other Changes
6+
7+
* expose num_clusters parameter for clarify shap in shapconfig
8+
* Update cron job to run hourly
9+
10+
## v2.66.1 (2021-10-26)
11+
12+
### Bug Fixes and Other Changes
13+
14+
* HuggingFace image_uri generation for inference
15+
* Update '_' and '/' with '-' in filename creation
16+
17+
## v2.66.0 (2021-10-25)
18+
19+
### Features
20+
21+
* Add image_uris.retrieve() support for AutoGluon
22+
23+
### Documentation Changes
24+
25+
* fix documentation for input types in estimator.fit
26+
* Add JsonGet v2 deprecation
27+
28+
## v2.65.0 (2021-10-21)
29+
30+
### Features
31+
32+
* modify RLEstimator to use newly generated Ray image (1.6.0)
33+
* network isolation mode for xgboost
34+
* update clarify imageURI for PDT
35+
36+
### Bug Fixes and Other Changes
37+
38+
* retry downstream_trials test
39+
* Add retries to pipeline execution
40+
41+
## v2.64.0 (2021-10-20)
42+
43+
### Deprecations and Removals
44+
45+
* warn for deprecation - Lambda model-predictor
46+
47+
### Features
48+
49+
* Add support for TF 2.5
50+
* Add a pre-push git hook
51+
52+
### Bug Fixes and Other Changes
53+
54+
* add s3_analysis_config_output_path field in DataConfig constructor
55+
* make marketplace jobnames random
56+
57+
## v2.63.2 (2021-10-18)
58+
59+
### Bug Fixes and Other Changes
60+
61+
* Update timeouts for integ tests from 20 to 40
62+
363
## v2.63.1 (2021-10-14)
464

565
### Bug Fixes and Other Changes

README.rst

+12
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,18 @@ You can also run them in parallel:
154154
tox -- -n auto tests/integ
155155

156156

157+
Git Hooks
158+
~~~~~~~~~
159+
160+
to enable all git hooks in the .githooks directory, run these commands in the repository directory:
161+
162+
::
163+
164+
find .git/hooks -type l -exec rm {} \;
165+
find .githooks -type f -exec ln -sf ../../{} .git/hooks/ \;
166+
167+
To enable an individual git hook, simply move it from the .githooks/ directory to the .git/hooks/ directory.
168+
157169
Building Sphinx docs
158170
~~~~~~~~~~~~~~~~~~~~
159171

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.63.2.dev0
1+
2.66.3.dev0

ci-scripts/queue_build.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
from __future__ import absolute_import
1414

1515
import os
16+
import re
1617
import time
18+
1719
import boto3
1820

1921
account = boto3.client(
@@ -23,9 +25,11 @@
2325

2426

2527
def queue_build():
26-
build_id = os.environ.get("CODEBUILD_BUILD_ID", "CODEBUILD-BUILD-ID")
27-
source_version = os.environ.get("CODEBUILD_SOURCE_VERSION", "CODEBUILD-SOURCE-VERSION").replace(
28-
"/", "-"
28+
build_id = re.sub("[_/]", "-", os.environ.get("CODEBUILD_BUILD_ID", "CODEBUILD-BUILD-ID"))
29+
source_version = re.sub(
30+
"[_/]",
31+
"-",
32+
os.environ.get("CODEBUILD_SOURCE_VERSION", "CODEBUILD-SOURCE-VERSION"),
2933
)
3034
ticket_number = int(1000 * time.time())
3135
filename = "%s_%s_%s" % (ticket_number, build_id, source_version)

doc/v2.rst

+6
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,12 @@ Please use :func:`sagemaker.s3.parse_s3_url` instead.
265265

266266
The class ``sagemaker.session.ModelContainer`` has been deprecated, as it is not needed for creating inference pipelines.
267267

268+
``sagemaker.workflow.condition_step.JsonGet``
269+
---------------------------------------------
270+
271+
The class ``sagemaker.workflow.condition_step.JsonGet`` has been deprecated.
272+
Please use :class:`sagemaker.workflow.functions.JsonGet` instead.
273+
268274
Parameter and Class Name Changes
269275
================================
270276

src/sagemaker/clarify.py

+33-12
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def __init__(
3131
self,
3232
s3_data_input_path,
3333
s3_output_path,
34+
s3_analysis_config_output_path=None,
3435
label=None,
3536
headers=None,
3637
features=None,
@@ -43,6 +44,9 @@ def __init__(
4344
Args:
4445
s3_data_input_path (str): Dataset S3 prefix/object URI.
4546
s3_output_path (str): S3 prefix to store the output.
47+
s3_analysis_config_output_path (str): S3 prefix to store the analysis_config output
48+
If this field is None, then the s3_output_path will be used
49+
to store the analysis_config output
4650
label (str): Target attribute of the model required by bias metrics (optional for SHAP)
4751
Specified as column name or index for CSV dataset, or as JSONPath for JSONLines.
4852
headers (list[str]): A list of column names in the input dataset.
@@ -61,6 +65,7 @@ def __init__(
6165
)
6266
self.s3_data_input_path = s3_data_input_path
6367
self.s3_output_path = s3_output_path
68+
self.s3_analysis_config_output_path = s3_analysis_config_output_path
6469
self.s3_data_distribution_type = s3_data_distribution_type
6570
self.s3_compression_type = s3_compression_type
6671
self.label = label
@@ -300,12 +305,13 @@ class SHAPConfig(ExplainabilityConfig):
300305

301306
def __init__(
302307
self,
303-
baseline,
304-
num_samples,
305-
agg_method,
308+
baseline=None,
309+
num_samples=None,
310+
agg_method=None,
306311
use_logit=False,
307312
save_local_shap_values=True,
308313
seed=None,
314+
num_clusters=None,
309315
):
310316
"""Initializes config for SHAP.
311317
@@ -315,34 +321,49 @@ def __init__(
315321
be the same as the dataset format. Each row should contain only the feature
316322
columns/values and omit the label column/values. If None a baseline will be
317323
calculated automatically by using K-means or K-prototypes in the input dataset.
318-
num_samples (int): Number of samples to be used in the Kernel SHAP algorithm.
324+
num_samples (None or int): Number of samples to be used in the Kernel SHAP algorithm.
319325
This number determines the size of the generated synthetic dataset to compute the
320-
SHAP values.
321-
agg_method (str): Aggregation method for global SHAP values. Valid values are
326+
SHAP values. If not provided then Clarify job will choose a proper value according
327+
to the count of features.
328+
agg_method (None or str): Aggregation method for global SHAP values. Valid values are
322329
"mean_abs" (mean of absolute SHAP values for all instances),
323330
"median" (median of SHAP values for all instances) and
324331
"mean_sq" (mean of squared SHAP values for all instances).
332+
If not provided then Clarify job uses method "mean_abs"
325333
use_logit (bool): Indicator of whether the logit function is to be applied to the model
326334
predictions. Default is False. If "use_logit" is true then the SHAP values will
327335
have log-odds units.
328336
save_local_shap_values (bool): Indicator of whether to save the local SHAP values
329337
in the output location. Default is True.
330338
seed (int): seed value to get deterministic SHAP values. Default is None.
339+
num_clusters (None or int): If a baseline is not provided, Clarify automatically
340+
computes a baseline dataset via a clustering algorithm (K-means/K-prototypes).
341+
num_clusters is a parameter for this algorithm. num_clusters will be the resulting
342+
size of the baseline dataset. If not provided, Clarify job will use a default value.
331343
"""
332-
if agg_method not in ["mean_abs", "median", "mean_sq"]:
344+
if agg_method is not None and agg_method not in ["mean_abs", "median", "mean_sq"]:
333345
raise ValueError(
334346
f"Invalid agg_method {agg_method}." f" Please choose mean_abs, median, or mean_sq."
335347
)
336-
348+
if num_clusters is not None and baseline is not None:
349+
raise ValueError(
350+
"Baseline and num_clusters cannot be provided together. "
351+
"Please specify one of the two."
352+
)
337353
self.shap_config = {
338-
"baseline": baseline,
339-
"num_samples": num_samples,
340-
"agg_method": agg_method,
341354
"use_logit": use_logit,
342355
"save_local_shap_values": save_local_shap_values,
343356
}
357+
if baseline is not None:
358+
self.shap_config["baseline"] = baseline
359+
if num_samples is not None:
360+
self.shap_config["num_samples"] = num_samples
361+
if agg_method is not None:
362+
self.shap_config["agg_method"] = agg_method
344363
if seed is not None:
345364
self.shap_config["seed"] = seed
365+
if num_clusters is not None:
366+
self.shap_config["num_clusters"] = num_clusters
346367

347368
def get_explainability_config(self):
348369
"""Returns config."""
@@ -473,7 +494,7 @@ def _run(
473494
json.dump(analysis_config, f)
474495
s3_analysis_config_file = _upload_analysis_config(
475496
analysis_config_file,
476-
data_config.s3_output_path,
497+
data_config.s3_analysis_config_output_path or data_config.s3_output_path,
477498
self.sagemaker_session,
478499
kms_key,
479500
)

src/sagemaker/deprecations.py

+62
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,46 @@ def renamed_warning(phrase):
5050
_warn(f"{phrase} has been renamed")
5151

5252

53+
def deprecation_warn(name, date, msg=None):
54+
"""Raise a warning for soon to be deprecated feature in sagemaker>=2
55+
56+
Args:
57+
name (str): Name of the feature
58+
date (str): the date when the feature will be deprecated
59+
msg (str): the prefix phrase of the warning message.
60+
"""
61+
_warn(f"{name} will be deprecated on {date}.{msg}")
62+
63+
64+
def deprecation_warning(date, msg=None):
65+
"""Decorator for raising deprecation warning for a feature in sagemaker>=2
66+
67+
Args:
68+
date (str): the date when the feature will be deprecated
69+
msg (str): the prefix phrase of the warning message.
70+
71+
Usage:
72+
@deprecation_warning(msg="message", date="date")
73+
def sample_function():
74+
print("xxxx....")
75+
76+
@deprecation_warning(msg="message", date="date")
77+
class SampleClass():
78+
def __init__(self):
79+
print("xxxx....")
80+
81+
"""
82+
83+
def deprecate(obj):
84+
def wrapper(*args, **kwargs):
85+
deprecation_warn(obj.__name__, date, msg)
86+
return obj(*args, **kwargs)
87+
88+
return wrapper
89+
90+
return deprecate
91+
92+
5393
def renamed_kwargs(old_name, new_name, value, kwargs):
5494
"""Checks if the deprecated argument is in kwargs
5595
@@ -106,6 +146,28 @@ def func(*args, **kwargs): # pylint: disable=W0613
106146
return func
107147

108148

149+
def deprecated(obj):
150+
"""Decorator for raising deprecated warning for a feature in sagemaker>=2
151+
152+
Usage:
153+
@deprecated
154+
def sample_function():
155+
print("xxxx....")
156+
157+
@deprecated
158+
class SampleClass():
159+
def __init__(self):
160+
print("xxxx....")
161+
162+
"""
163+
164+
def wrapper(*args, **kwargs):
165+
removed_warning(obj.__name__)
166+
return obj(*args, **kwargs)
167+
168+
return wrapper
169+
170+
109171
def deprecated_function(func, name):
110172
"""Wrap a function with a deprecation warning.
111173

src/sagemaker/estimator.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -650,19 +650,22 @@ def fit(self, inputs=None, wait=True, logs="All", job_name=None, experiment_conf
650650
model using the Amazon SageMaker hosting services.
651651
652652
Args:
653-
inputs (str or dict or sagemaker.inputs.TrainingInput): Information
654-
about the training data. This can be one of three types:
653+
inputs (str or dict or sagemaker.inputs.TrainingInput or
654+
sagemaker.inputs.FileSystemInput): Information about the training data.
655+
This can be one of four types:
655656
656657
* (str) the S3 location where training data is saved, or a file:// path in
657658
local mode.
658-
* (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) If using multiple
659-
channels for training data, you can specify a dict mapping channel names to
660-
strings or :func:`~sagemaker.inputs.TrainingInput` objects.
659+
* (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput] or
660+
dict[str, sagemaker.inputs.FileSystemInput]) If using multiple channels for
661+
training data, you can specify a dict mapping channel names to strings or
662+
:func:`~sagemaker.inputs.TrainingInput` objects or
663+
:func:`~sagemaker.inputs.FileSystemInput` objects.
661664
* (sagemaker.inputs.TrainingInput) - channel configuration for S3 data sources
662665
that can provide additional information as well as the path to the training
663666
dataset.
664667
See :func:`sagemaker.inputs.TrainingInput` for full details.
665-
* (sagemaker.session.FileSystemInput) - channel configuration for
668+
* (sagemaker.inputs.FileSystemInput) - channel configuration for
666669
a file system data source that can provide additional information as well as
667670
the path to the training dataset.
668671

src/sagemaker/fw_utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
"local_gpu",
6060
)
6161
SM_DATAPARALLEL_SUPPORTED_FRAMEWORK_VERSIONS = {
62-
"tensorflow": ["2.3", "2.3.1", "2.3.2", "2.4", "2.4.1"],
62+
"tensorflow": ["2.3", "2.3.1", "2.3.2", "2.4", "2.4.1", "2.4.3", "2.5", "2.5.0", "2.5.1"],
6363
"pytorch": ["1.6", "1.6.0", "1.7", "1.7.1", "1.8", "1.8.0", "1.8.1", "1.9", "1.9.0"],
6464
}
6565
SMDISTRIBUTED_SUPPORTED_STRATEGIES = ["dataparallel", "modelparallel"]
@@ -533,7 +533,7 @@ def _validate_smdataparallel_args(
533533
if "py3" not in py_version:
534534
err_msg += (
535535
f"Provided py_version {py_version} is not supported by smdataparallel.\n"
536-
"Please specify py_version=py3"
536+
"Please specify py_version>=py3"
537537
)
538538

539539
if err_msg:

src/sagemaker/huggingface/model.py

-5
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,6 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
293293
str: The appropriate image URI based on the given parameters.
294294
295295
"""
296-
if image_uris._processor(instance_type, ["cpu", "gpu"]) == "gpu":
297-
container_version = "cu110-ubuntu18.04"
298-
else:
299-
container_version = "ubuntu18.04"
300296
if self.tensorflow_version is not None: # pylint: disable=no-member
301297
base_framework_version = (
302298
f"tensorflow{self.tensorflow_version}" # pylint: disable=no-member
@@ -312,5 +308,4 @@ def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
312308
accelerator_type=accelerator_type,
313309
image_scope="inference",
314310
base_framework_version=base_framework_version,
315-
container_version=container_version,
316311
)

0 commit comments

Comments
 (0)