|
29 | 29 |
|
30 | 30 | from schema import Schema, And, Use, Or, Optional, Regex
|
31 | 31 |
|
32 |
| -from sagemaker import image_uris, s3, utils, Session |
| 32 | +from sagemaker import image_uris, s3, utils |
| 33 | +from sagemaker.session import Session |
33 | 34 | from sagemaker.network import NetworkConfig
|
34 | 35 | from sagemaker.processing import ProcessingInput, ProcessingOutput, Processor
|
35 | 36 |
|
36 | 37 | logger = logging.getLogger(__name__)
|
37 | 38 |
|
38 | 39 |
|
39 |
| -ENDPOINT_NAME_PREFIX_PATTERN = "^[a-zA-Z0-9](-*[a-zA-Z0-9])" |
| 40 | +ENDPOINT_NAME_PREFIX_PATTERN = r"^[a-zA-Z0-9](-*[a-zA-Z0-9])" |
| 41 | +MODEL_NAME_PATTERN = r"^[a-zA-Z0-9]([\-a-zA-Z0-9]*[a-zA-Z0-9])?" |
| 42 | + |
| 43 | + |
| 44 | +def _validate_s3_path(path: str) -> str: |
| 45 | + """Validates s3 path is correct""" |
| 46 | + prefix = "s3://" |
| 47 | + assert path.startswith(prefix) |
| 48 | + |
| 49 | + assert "//" not in path[len(prefix) :] |
| 50 | + assert not path.startswith(f"{prefix}/") |
| 51 | + return path |
40 | 52 |
|
41 | 53 |
|
42 | 54 | ANALYSIS_CONFIG_SCHEMA_V1_0 = Schema(
|
|
54 | 66 | "application/x-image",
|
55 | 67 | ),
|
56 | 68 | ),
|
57 |
| - Optional("dataset_uri"): str, |
| 69 | + Optional("dataset_uri"): And(str, _validate_s3_path), |
58 | 70 | Optional("headers"): [str],
|
59 | 71 | Optional("label"): Or(str, int),
|
60 | 72 | # this field indicates user provides predicted_label in dataset
|
|
65 | 77 | Optional("facet"): [
|
66 | 78 | {"name_or_index": Or(str, int), Optional("value_or_threshold"): [Or(int, float, str)]}
|
67 | 79 | ],
|
68 |
| - Optional("facet_dataset_uri"): str, |
| 80 | + Optional("facet_dataset_uri"): And(str, _validate_s3_path), |
69 | 81 | Optional("facet_headers"): [str],
|
70 |
| - Optional("predicted_label_dataset_uri"): str, |
| 82 | + Optional("predicted_label_dataset_uri"): And(str, _validate_s3_path), |
71 | 83 | Optional("predicted_label_headers"): [str],
|
72 | 84 | Optional("excluded_columns"): [Or(int, str)],
|
73 | 85 | Optional("joinsource_name_or_index"): Or(str, int),
|
|
82 | 94 | Or(
|
83 | 95 | # CSV row
|
84 | 96 | [Or(int, float, str, None)],
|
85 |
| - # JSON row (any JSON object). As I write this only |
86 |
| - # SageMaker JSONLines Dense Format ([1]) |
87 |
| - # is supported and the validation is NOT done |
88 |
| - # by the schema but by the data loader. |
89 |
| - # [1] https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-inference.html#cm-jsonlines |
| 97 | + # JSON row (any JSON object). As I write this only |
| 98 | + # SageMaker JSONLines Dense Format ([1]) |
| 99 | + # is supported and the validation is NOT done |
| 100 | + # by the schema but by the data loader. |
| 101 | + # [1] https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-inference.html#cm-jsonlines |
90 | 102 | {object: object},
|
91 | 103 | )
|
92 | 104 | ],
|
|
266 | 278 | Optional("predictor"): {
|
267 | 279 | Optional("endpoint_name"): str,
|
268 | 280 | Optional("endpoint_name_prefix"): And(str, Regex(ENDPOINT_NAME_PREFIX_PATTERN)),
|
269 |
| - Optional("model_name"): str, |
| 281 | + Optional("model_name"): And(str, Regex(MODEL_NAME_PATTERN)), |
270 | 282 | Optional("target_model"): str,
|
271 | 283 | Optional("instance_type"): str,
|
272 | 284 | Optional("initial_instance_count"): int,
|
|
0 commit comments