Skip to content

Commit 85ae329

Browse files
authored
Merge branch 'dev' into patch-1
2 parents e949d57 + 0bae071 commit 85ae329

File tree

97 files changed

+17328
-503
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+17328
-503
lines changed

CHANGELOG.md

+46
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,51 @@
11
# Changelog
22

3+
## v2.72.0 (2021-12-13)
4+
5+
### Features
6+
7+
* allow conditional parellel builds
8+
9+
### Bug Fixes and Other Changes
10+
11+
* local mode - support relative file structure
12+
* fix endpoint bug
13+
14+
## v2.71.0 (2021-12-06)
15+
16+
### Features
17+
18+
* Add support for TF 2.6
19+
* Adding PT 17/18 Repo
20+
* Add profile_name support for Feature Store ingestion
21+
22+
### Bug Fixes and Other Changes
23+
24+
* Fix non-existent variable name
25+
* Add TF 2.6.2 on training
26+
* Recreate static lineage test data
27+
28+
## v2.70.0 (2021-12-02)
29+
30+
### Features
31+
32+
* update boto3 minor version >= 1.20.18
33+
* Add support for SageMaker lineage queries
34+
* add CV shap explainability for SageMaker Clarify
35+
* add NLP support for SageMaker Clarify
36+
* Add support for ModelMonitor/Clarify integration in model building pipelines
37+
* adding support for transformers 4.11 for SM Training Compiler
38+
* SM Training Compiler with an UI to enable/disable compilation for HuggingFace DLCs to speedup training
39+
40+
### Bug Fixes and Other Changes
41+
42+
* pin coveragepy
43+
* Add support for PyTorch 1.9.1
44+
* Update s3 path of scheduling analysis config on ClarifyCheckStep
45+
* documentation/logging to indicate correct place for DEBUG artifacts from SM trcomp
46+
* validate requested transformers version and use the best available version
47+
* Install custom pkgs
48+
349
## v2.69.0 (2021-11-12)
450

551
### Features

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.69.1.dev0
1+
2.72.1.dev0

ci-scripts/queue_build.py

+94-56
Original file line numberDiff line numberDiff line change
@@ -23,100 +23,138 @@
2323
).get_caller_identity()["Account"]
2424
bucket_name = "sagemaker-us-west-2-%s" % account
2525

26+
MAX_IN_PROGRESS_BUILDS = 3
27+
INTERVAL_BETWEEN_CONCURRENT_RUNS = 15 # minutes
28+
CLEAN_UP_TICKETS_OLDER_THAN = 8 # hours
29+
2630

2731
def queue_build():
28-
build_id = re.sub("[_/]", "-", os.environ.get("CODEBUILD_BUILD_ID", "CODEBUILD-BUILD-ID"))
29-
source_version = re.sub(
30-
"[_/]",
31-
"-",
32-
os.environ.get("CODEBUILD_SOURCE_VERSION", "CODEBUILD-SOURCE-VERSION"),
33-
)
3432
ticket_number = int(1000 * time.time())
35-
filename = "%s_%s_%s" % (ticket_number, build_id, source_version)
36-
37-
print("Created queue ticket %s" % ticket_number)
38-
39-
_write_ticket(filename)
4033
files = _list_tickets()
41-
_cleanup_tickets_older_than_8_hours(files)
42-
_wait_for_other_builds(files, ticket_number)
34+
_cleanup_tickets_older_than(files)
35+
_wait_for_other_builds(ticket_number)
4336

4437

4538
def _build_info_from_file(file):
46-
filename = file.key.split("/")[1]
39+
filename = file.key.split("/")[2]
4740
ticket_number, build_id, source_version = filename.split("_")
4841
return int(ticket_number), build_id, source_version
4942

5043

51-
def _wait_for_other_builds(files, ticket_number):
52-
newfiles = list(filter(lambda file: not _file_older_than(file), files))
53-
sorted_files = list(sorted(newfiles, key=lambda y: y.key))
44+
def _wait_for_other_builds(ticket_number):
45+
sorted_files = _list_tickets()
5446

5547
print("build queue status:")
5648
print()
5749

5850
for order, file in enumerate(sorted_files):
5951
file_ticket_number, build_id, source_version = _build_info_from_file(file)
6052
print(
61-
"%s -> %s %s, ticket number: %s" % (order, build_id, source_version, file_ticket_number)
53+
"%s -> %s %s, ticket number: %s status: %s"
54+
% (order, build_id, source_version, file_ticket_number, file.key.split("/")[1])
6255
)
56+
print()
57+
build_id = re.sub("[_/]", "-", os.environ.get("CODEBUILD_BUILD_ID", "CODEBUILD-BUILD-ID"))
58+
source_version = re.sub(
59+
"[_/]",
60+
"-",
61+
os.environ.get("CODEBUILD_SOURCE_VERSION", "CODEBUILD-SOURCE-VERSION"),
62+
)
63+
filename = "%s_%s_%s" % (ticket_number, build_id, source_version)
64+
s3_file_obj = _write_ticket(filename, status="waiting")
65+
print("Build %s waiting to be scheduled" % filename)
66+
67+
while True:
68+
_cleanup_tickets_with_terminal_states()
69+
waiting_tickets = _list_tickets("waiting")
70+
if waiting_tickets:
71+
first_waiting_ticket_number, _, _ = _build_info_from_file(_list_tickets("waiting")[0])
72+
else:
73+
first_waiting_ticket_number = ticket_number
74+
75+
if (
76+
len(_list_tickets(status="in-progress")) < 3
77+
and last_in_progress_elapsed_time_check()
78+
and first_waiting_ticket_number == ticket_number
79+
):
80+
# put the build in progress
81+
print("Scheduling build %s for running.." % filename)
82+
s3_file_obj.delete()
83+
_write_ticket(filename, status="in-progress")
84+
break
85+
else:
86+
# wait
87+
time.sleep(30)
6388

64-
for file in sorted_files:
65-
file_ticket_number, build_id, source_version = _build_info_from_file(file)
6689

67-
if file_ticket_number == ticket_number:
90+
def last_in_progress_elapsed_time_check():
91+
in_progress_tickets = _list_tickets("in-progress")
92+
if not in_progress_tickets:
93+
return True
94+
last_in_progress_ticket, _, _ = _build_info_from_file(_list_tickets("in-progress")[-1])
95+
_elapsed_time = int(1000 * time.time()) - last_in_progress_ticket
96+
last_in_progress_elapsed_time = int(_elapsed_time / (1000 * 60)) # in minutes
97+
return last_in_progress_elapsed_time > INTERVAL_BETWEEN_CONCURRENT_RUNS
6898

69-
break
70-
else:
71-
while True:
72-
client = boto3.client("codebuild")
73-
response = client.batch_get_builds(ids=[build_id])
74-
build_status = response["builds"][0]["buildStatus"]
75-
76-
if build_status == "IN_PROGRESS":
77-
print(
78-
"waiting on build %s %s %s" % (build_id, source_version, file_ticket_number)
79-
)
80-
time.sleep(30)
81-
else:
82-
print("build %s finished, deleting lock" % build_id)
83-
file.delete()
84-
break
85-
86-
87-
def _cleanup_tickets_older_than_8_hours(files):
99+
100+
def _cleanup_tickets_with_terminal_states():
101+
files = _list_tickets()
102+
build_ids = []
103+
for file in files:
104+
_, build_id, _ = _build_info_from_file(file)
105+
build_ids.append(build_id)
106+
107+
client = boto3.client("codebuild")
108+
response = client.batch_get_builds(ids=build_ids)
109+
110+
for file, build_details in zip(files, response["builds"]):
111+
_, _build_id_from_file, _ = _build_info_from_file(file)
112+
build_status = build_details["buildStatus"]
113+
114+
if build_status != "IN_PROGRESS" and _build_id_from_file == build_details["id"]:
115+
print(
116+
"Build %s in terminal state: %s, deleting lock"
117+
% (_build_id_from_file, build_status)
118+
)
119+
file.delete()
120+
121+
122+
def _cleanup_tickets_older_than(files):
88123
oldfiles = list(filter(_file_older_than, files))
89124
for file in oldfiles:
90125
print("object %s older than 8 hours. Deleting" % file.key)
91126
file.delete()
92127
return files
93128

94129

95-
def _list_tickets():
130+
def _list_tickets(status=None):
96131
s3 = boto3.resource("s3")
97132
bucket = s3.Bucket(bucket_name)
98-
objects = [file for file in bucket.objects.filter(Prefix="ci-lock/")]
99-
files = list(filter(lambda x: x != "ci-lock/", objects))
100-
return files
133+
prefix = "ci-integ-queue/{}/".format(status) if status else "ci-integ-queue/"
134+
objects = [file for file in bucket.objects.filter(Prefix=prefix)]
135+
files = list(filter(lambda x: x != prefix, objects))
136+
sorted_files = list(sorted(files, key=lambda y: y.key))
137+
return sorted_files
101138

102139

103140
def _file_older_than(file):
104-
timelimit = 1000 * 60 * 60 * 8
105-
141+
timelimit = 1000 * 60 * 60 * CLEAN_UP_TICKETS_OLDER_THAN
106142
file_ticket_number, build_id, source_version = _build_info_from_file(file)
143+
return int(1000 * time.time()) - file_ticket_number > timelimit
107144

108-
return int(time.time()) - file_ticket_number > timelimit
109-
110-
111-
def _write_ticket(ticket_number):
112145

113-
if not os.path.exists("ci-lock"):
114-
os.mkdir("ci-lock")
146+
def _write_ticket(filename, status="waiting"):
147+
file_path = "ci-integ-queue/{}".format(status)
148+
if not os.path.exists(file_path):
149+
os.makedirs(file_path)
115150

116-
filename = "ci-lock/" + ticket_number
117-
with open(filename, "w") as file:
118-
file.write(ticket_number)
119-
boto3.Session().resource("s3").Object(bucket_name, filename).upload_file(filename)
151+
file_full_path = file_path + "/" + filename
152+
with open(file_full_path, "w") as file:
153+
file.write(filename)
154+
s3_file_obj = boto3.Session().resource("s3").Object(bucket_name, file_full_path)
155+
s3_file_obj.upload_file(file_full_path)
156+
print("Build %s is now in state %s" % (filename, status))
157+
return s3_file_obj
120158

121159

122160
if __name__ == "__main__":

doc/api/inference/model_monitor.rst

+10
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,13 @@ Model Monitor
3131
:members:
3232
:undoc-members:
3333
:show-inheritance:
34+
35+
.. automodule:: sagemaker.model_metrics
36+
:members:
37+
:undoc-members:
38+
:show-inheritance:
39+
40+
.. automodule:: sagemaker.drift_check_baselines
41+
:members:
42+
:undoc-members:
43+
:show-inheritance:

doc/frameworks/huggingface/sagemaker.huggingface.rst

+11-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,15 @@ Hugging Face
44
Hugging Face Estimator
55
----------------------
66

7-
.. autoclass:: sagemaker.huggingface.estimator.HuggingFace
7+
.. autoclass:: sagemaker.huggingface.HuggingFace
8+
:members:
9+
:undoc-members:
10+
:show-inheritance:
11+
12+
Hugging Face Training Compiler Configuration
13+
--------------------------------------------
14+
15+
.. autoclass:: sagemaker.huggingface.TrainingCompilerConfig
816
:members:
917
:undoc-members:
1018
:show-inheritance:
@@ -17,8 +25,8 @@ Hugging Face Model
1725
:undoc-members:
1826
:show-inheritance:
1927

20-
HuggingFace Predictor
21-
---------------------
28+
Hugging Face Predictor
29+
----------------------
2230

2331
.. autoclass:: sagemaker.huggingface.model.HuggingFacePredictor
2432
:members:

doc/workflows/pipelines/sagemaker.workflow.pipelines.rst

+15
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ Conditions
3232

3333
.. autoclass:: sagemaker.workflow.conditions.ConditionOr
3434

35+
CheckJobConfig
36+
--------------
37+
38+
.. autoclass:: sagemaker.workflow.check_job_config.CheckJobConfig
39+
3540
Entities
3641
--------
3742

@@ -128,3 +133,13 @@ Steps
128133
.. autoclass:: sagemaker.workflow.steps.CacheConfig
129134

130135
.. autoclass:: sagemaker.workflow.lambda_step.LambdaStep
136+
137+
.. autoclass:: sagemaker.workflow.steps.CompilationStep
138+
139+
.. autoclass:: sagemaker.workflow.quality_check_step.QualityCheckConfig
140+
141+
.. autoclass:: sagemaker.workflow.quality_check_step.QualityCheckStep
142+
143+
.. autoclass:: sagemaker.workflow.clarify_check_step.ClarifyCheckConfig
144+
145+
.. autoclass:: sagemaker.workflow.clarify_check_step.ClarifyCheckStep

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def read_version():
3434
# Declare minimal set for installation
3535
required_packages = [
3636
"attrs",
37-
"boto3>=1.16.32",
37+
"boto3>=1.20.18",
3838
"google-pasta",
3939
"numpy>=1.9.0",
4040
"protobuf>=3.1",
@@ -69,6 +69,7 @@ def read_version():
6969
"pytest-rerunfailures",
7070
"pytest-timeout",
7171
"pytest-xdist",
72+
"coverage<6.2",
7273
"mock",
7374
"contextlib2",
7475
"awslogs",

src/sagemaker/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from sagemaker.local.local_session import LocalSession # noqa: F401
5050

5151
from sagemaker.model import Model, ModelPackage # noqa: F401
52-
from sagemaker.model_metrics import ModelMetrics, MetricsSource # noqa: F401
52+
from sagemaker.model_metrics import ModelMetrics, MetricsSource, FileSource # noqa: F401
5353
from sagemaker.pipeline import PipelineModel # noqa: F401
5454
from sagemaker.predictor import Predictor # noqa: F401
5555
from sagemaker.processing import Processor, ScriptProcessor # noqa: F401

0 commit comments

Comments
 (0)