|
56 | 56 | "\n",
|
57 | 57 | "- Access to the SageMaker default S3 bucket\n",
|
58 | 58 | "- Access to Elastic Container Registry (ECR)\n",
|
59 |
| - "- For the optional portion of this lab, you will need access to CloudFormation, Service Catelog, and Cost Explore\n", |
| 59 | + "- For the optional portion of this lab, you will need access to CloudFormation, Service Catalog, and Cost Explorer\n", |
60 | 60 | "- Familiarity with Training on Amazon SageMaker\n",
|
61 | 61 | "- Familiarity with Python\n",
|
62 | 62 | "- Familiarity with AWS S3\n",
|
|
72 | 72 | "source": [
|
73 | 73 | "## Setup\n",
|
74 | 74 | "\n",
|
75 |
| - "Here we define the sagemaker session, default bucket, job prefixes, pipeline and model group names\n", |
76 |
| - "\n", |
77 |
| - "We are using some of the newly released SageMaker Pipeline features. Please make sure you ugrade your sageMaker version by running the cell below." |
| 75 | + "Here we define the sagemaker session, default bucket, job prefixes, pipeline and model group names." |
78 | 76 | ],
|
79 | 77 | "metadata": {}
|
80 | 78 | },
|
|
189 | 187 | "cell_type": "code",
|
190 | 188 | "execution_count": null,
|
191 | 189 | "source": [
|
192 |
| - "from sagemaker.workflow.parameters import (\r\n", |
193 |
| - " ParameterInteger,\r\n", |
194 |
| - " ParameterString,\r\n", |
195 |
| - ")\r\n", |
196 |
| - "\r\n", |
197 |
| - "# Parameters for pipeline execution\r\n", |
198 |
| - "processing_instance_count = ParameterInteger(\r\n", |
199 |
| - " name=\"ProcessingInstanceCount\", default_value=1\r\n", |
200 |
| - ")\r\n", |
201 |
| - "\r\n", |
202 |
| - "model_approval_status = ParameterString(\r\n", |
203 |
| - " name=\"ModelApprovalStatus\",\r\n", |
204 |
| - " default_value=\"PendingManualApproval\" # ModelApprovalStatus can be set to a default of \"Approved\" if you don't want manual approval.\r\n", |
205 |
| - ")\r\n", |
206 |
| - "\r\n", |
207 |
| - "input_data = ParameterString(\r\n", |
208 |
| - " name=\"InputDataUrl\",\r\n", |
209 |
| - " default_value=s3_raw_data\r\n", |
210 |
| - ")\r\n", |
211 |
| - "\r\n", |
212 |
| - "input_annotation = ParameterString(\r\n", |
213 |
| - " name=\"AnnotationFileName\",\r\n", |
214 |
| - " default_value=\"classes.txt\"\r\n", |
215 |
| - ")\r\n", |
216 |
| - "\r\n", |
217 |
| - "# This is a large dataset, we are only going to train a subset of the classes\r\n", |
218 |
| - "class_selection = ParameterString(\r\n", |
219 |
| - " name=\"ClassSelection\",\r\n", |
220 |
| - " default_value=\"13, 17, 35, 36, 47, 68, 73, 87\" #If use the mini dataset, please make sure to use the class index with the available list\r\n", |
221 |
| - ")\r\n", |
222 |
| - "\r\n", |
223 |
| - "processing_instance_type = \"ml.m5.xlarge\"\r\n", |
224 |
| - "training_instance_count = 1\r\n", |
| 190 | + "from sagemaker.workflow.parameters import (\n", |
| 191 | + " ParameterInteger,\n", |
| 192 | + " ParameterString,\n", |
| 193 | + ")\n", |
| 194 | + "\n", |
| 195 | + "# Parameters for pipeline execution\n", |
| 196 | + "processing_instance_count = ParameterInteger(\n", |
| 197 | + " name=\"ProcessingInstanceCount\", default_value=1\n", |
| 198 | + ")\n", |
| 199 | + "\n", |
| 200 | + "input_data = ParameterString(\n", |
| 201 | + " name=\"InputDataUrl\",\n", |
| 202 | + " default_value=s3_raw_data\n", |
| 203 | + ")\n", |
| 204 | + "\n", |
| 205 | + "input_annotation = ParameterString(\n", |
| 206 | + " name=\"AnnotationFileName\",\n", |
| 207 | + " default_value=\"classes.txt\"\n", |
| 208 | + ")\n", |
| 209 | + "\n", |
| 210 | + "# This is a large dataset, we are only going to train a subset of the classes\n", |
| 211 | + "class_selection = ParameterString(\n", |
| 212 | + " name=\"ClassSelection\",\n", |
| 213 | + " default_value=\"13, 17, 35, 36, 47, 68, 73, 87\" #If use the mini dataset, please make sure to use the class index with the available list\n", |
| 214 | + ")\n", |
| 215 | + "\n", |
| 216 | + "processing_instance_type = \"ml.m5.xlarge\"\n", |
| 217 | + "training_instance_count = 1\n", |
225 | 218 | "training_instance_type = \"ml.c5.4xlarge\""
|
226 | 219 | ],
|
227 | 220 | "outputs": [],
|
|
261 | 254 | "cell_type": "markdown",
|
262 | 255 | "source": [
|
263 | 256 | "### Preprocess data step\n",
|
264 |
| - "We are taking the original code in Jupyter notebook and containerized script to run in a preprocessing job.\n", |
| 257 | + "We are taking the original code in Jupyter notebook and create a containerized script to run in a preprocessing job.\n", |
265 | 258 | "\n",
|
266 | 259 | "The [preprocess.py](./preprocess.py) script takes in the raw images files and splits them into training, validation and test sets by class.\n",
|
267 | 260 | "It merges the class annotation files so that you have a manifest file for each separate data set. And exposes two parameters: classes (allows you to filter the number of classes you want to train the model on; default is all classes) and input-data (the human readable name of the classes).\n",
|
|
478 | 471 | "from sagemaker.processing import (\r\n",
|
479 | 472 | " ProcessingInput,\r\n",
|
480 | 473 | " ProcessingOutput,\r\n",
|
481 |
| - " FrameworkProcessor,\r\n", |
482 | 474 | " ScriptProcessor,\r\n",
|
483 | 475 | ")\r\n",
|
484 | 476 | "\r\n",
|
485 |
| - "\r\n", |
486 |
| - "\r\n", |
487 | 477 | "eval_steps = dict()\r\n",
|
488 | 478 | "eval_reports = dict()\r\n",
|
489 | 479 | "\r\n",
|
|
527 | 517 | " property_files=[evaluation_report],\r\n",
|
528 | 518 | " cache_config=cache_config\r\n",
|
529 | 519 | " )\r\n",
|
530 |
| - " \r\n", |
| 520 | + "\r\n", |
531 | 521 | " eval_steps[t] = step_eval\r\n",
|
532 | 522 | " eval_reports[t] = evaluation_report"
|
533 | 523 | ],
|
|
583 | 573 | " inference_instances=[\"ml.t2.medium\", \"ml.m5.large\"],\r\n",
|
584 | 574 | " transform_instances=[\"ml.m5.large\"],\r\n",
|
585 | 575 | " model_package_group_name=model_package_group_name,\r\n",
|
586 |
| - " approval_status=model_approval_status,\r\n", |
587 | 576 | " model_metrics=model_metrics,\r\n",
|
588 | 577 | " )\r\n",
|
589 | 578 | " \r\n",
|
|
607 | 596 | "execution_count": null,
|
608 | 597 | "source": [
|
609 | 598 | "from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo\r\n",
|
610 |
| - "from sagemaker.workflow.condition_step import (\r\n", |
611 |
| - " ConditionStep,\r\n", |
612 |
| - " JsonGet,\r\n", |
613 |
| - ")\r\n", |
| 599 | + "from sagemaker.workflow.condition_step import ConditionStep\r\n", |
| 600 | + "from sagemaker.workflow.functions import JsonGet\r\n", |
614 | 601 | "\r\n",
|
615 | 602 | "condition_steps = dict()\r\n",
|
616 | 603 | "\r\n",
|
|
620 | 607 | " # Models with a test accuracy lower than the condition will not be registered with the model registry.\r\n",
|
621 | 608 | " cond_gte = ConditionGreaterThanOrEqualTo(\r\n",
|
622 | 609 | " left=JsonGet(\r\n",
|
623 |
| - " step=eval_steps[t],\r\n", |
| 610 | + " step_name=eval_steps[t].name,\r\n", |
624 | 611 | " property_file=eval_reports[t],\r\n",
|
625 | 612 | " json_path=\"multiclass_classification_metrics.accuracy.value\",\r\n",
|
626 | 613 | " ),\r\n",
|
|
677 | 664 | " name=pipeline_name,\r\n",
|
678 | 665 | " parameters=[\r\n",
|
679 | 666 | " processing_instance_count,\r\n",
|
680 |
| - " model_approval_status,\r\n", |
681 | 667 | " input_data,\r\n",
|
682 | 668 | " input_annotation,\r\n",
|
683 | 669 | " class_selection\r\n",
|
|
817 | 803 | "# ProcessingInstanceType=\"ml.m5.xlarge\",\r\n",
|
818 | 804 | "# TrainingInstanceCount=1,\r\n",
|
819 | 805 | "# TrainingInstanceType=\"ml.c5.4xlarge\",#\"ml.p3.2xlarge\",#\r\n",
|
820 |
| - " ModelApprovalStatus=\"PendingManualApproval\",\r\n", |
821 | 806 | " AnnotationFileName=\"classes.txt\",\r\n",
|
822 | 807 | " ClassSelection=\"13, 17, 35, 36\"\r\n",
|
823 | 808 | " )\r\n",
|
|
848 | 833 | "To automate the deployment process, You can use event bridge to Invoke a **deployment Lambda function** that checks the `ModelApprovalStatus` attribute in the event. If the status is **Approved** the Lambda will continue with the deployement."
|
849 | 834 | ],
|
850 | 835 | "metadata": {}
|
851 |
| - }, |
852 |
| - { |
853 |
| - "cell_type": "markdown", |
854 |
| - "source": [ |
855 |
| - "## Clean up\n", |
856 |
| - "Delete the model registry and the pipeline after you complete the lab." |
857 |
| - ], |
858 |
| - "metadata": {} |
859 |
| - }, |
860 |
| - { |
861 |
| - "cell_type": "code", |
862 |
| - "execution_count": null, |
863 |
| - "source": [ |
864 |
| - "def delete_model_package_group(sm_client, package_group_name):\r\n", |
865 |
| - " try:\r\n", |
866 |
| - " model_versions = sm_client.list_model_packages(ModelPackageGroupName=package_group_name)\r\n", |
867 |
| - "\r\n", |
868 |
| - " except Exception as e:\r\n", |
869 |
| - " print(\"{} \\n\".format(e))\r\n", |
870 |
| - " return\r\n", |
871 |
| - "\r\n", |
872 |
| - " for model_version in model_versions[\"ModelPackageSummaryList\"]:\r\n", |
873 |
| - " try:\r\n", |
874 |
| - " sm_client.delete_model_package(ModelPackageName=model_version[\"ModelPackageArn\"])\r\n", |
875 |
| - " except Exception as e:\r\n", |
876 |
| - " print(\"{} \\n\".format(e))\r\n", |
877 |
| - " time.sleep(0.5) # Ensure requests aren't throttled\r\n", |
878 |
| - "\r\n", |
879 |
| - " try:\r\n", |
880 |
| - " sm_client.delete_model_package_group(ModelPackageGroupName=package_group_name)\r\n", |
881 |
| - " print(\"{} model package group deleted\".format(package_group_name))\r\n", |
882 |
| - " except Exception as e:\r\n", |
883 |
| - " print(\"{} \\n\".format(e))\r\n", |
884 |
| - " return\r\n", |
885 |
| - "\r\n", |
886 |
| - "\r\n", |
887 |
| - "def delete_sagemaker_pipeline(sm_client, pipeline_name):\r\n", |
888 |
| - " try:\r\n", |
889 |
| - " sm_client.delete_pipeline(\r\n", |
890 |
| - " PipelineName=pipeline_name,\r\n", |
891 |
| - " )\r\n", |
892 |
| - " print(\"{} pipeline deleted\".format(pipeline_name))\r\n", |
893 |
| - " except Exception as e:\r\n", |
894 |
| - " print(\"{} \\n\".format(e))\r\n", |
895 |
| - " return\r\n", |
896 |
| - " \r\n", |
897 |
| - "def delete_sagemaker_project(sm_client, project_name):\r\n", |
898 |
| - " try:\r\n", |
899 |
| - " sm_client.delete_project(\r\n", |
900 |
| - " ProjectName=project_name,\r\n", |
901 |
| - " )\r\n", |
902 |
| - " print(\"{} project deleted\".format(project_name))\r\n", |
903 |
| - " except Exception as e:\r\n", |
904 |
| - " print(\"{} \\n\".format(e))\r\n", |
905 |
| - " return" |
906 |
| - ], |
907 |
| - "outputs": [], |
908 |
| - "metadata": {} |
909 |
| - }, |
910 |
| - { |
911 |
| - "cell_type": "code", |
912 |
| - "execution_count": null, |
913 |
| - "source": [ |
914 |
| - "import boto3\n", |
915 |
| - "import time\n", |
916 |
| - "\n", |
917 |
| - "client = boto3.client(\"sagemaker\")\n", |
918 |
| - "\n", |
919 |
| - "# Uncomment the lines below to clean the pipeline.\n", |
920 |
| - "#delete_model_package_group(client, model_package_group_name)\n", |
921 |
| - "#delete_sagemaker_pipeline(client, pipeline_name)\n", |
922 |
| - "\n", |
923 |
| - "#delete_model_package_group(client, model_package_group_name2)\n", |
924 |
| - "#delete_sagemaker_pipeline(client, pipeline_name2)\n", |
925 |
| - "\n", |
926 |
| - "# delete_sagemaker_project(client, \"<Your-Project-Name>\")#\"cv-week4-training\") #" |
927 |
| - ], |
928 |
| - "outputs": [], |
929 |
| - "metadata": {} |
930 | 836 | }
|
931 | 837 | ],
|
932 | 838 | "metadata": {
|
933 | 839 | "instance_type": "ml.t3.medium",
|
934 | 840 | "kernelspec": {
|
935 | 841 | "display_name": "Python 3 (Data Science)",
|
936 | 842 | "language": "python",
|
937 |
| - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" |
| 843 | + "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-1:470317259841:image/datascience-1.0" |
938 | 844 | },
|
939 | 845 | "language_info": {
|
940 | 846 | "codemirror_mode": {
|
|
0 commit comments