24
24
from textwrap import dedent
25
25
from typing import Dict , List , Optional , Union
26
26
from copy import copy
27
+ import re
27
28
28
29
import attr
29
30
@@ -1659,6 +1660,7 @@ def run( # type: ignore[override]
1659
1660
job_name : Optional [str ] = None ,
1660
1661
experiment_config : Optional [Dict [str , str ]] = None ,
1661
1662
kms_key : Optional [str ] = None ,
1663
+ codeartifact_repo_arn : Optional [str ] = None ,
1662
1664
):
1663
1665
"""Runs a processing job.
1664
1666
@@ -1759,12 +1761,21 @@ def run( # type: ignore[override]
1759
1761
However, the value of `TrialComponentDisplayName` is honored for display in Studio.
1760
1762
kms_key (str): The ARN of the KMS key that is used to encrypt the
1761
1763
user code file (default: None).
1764
+ codeartifact_repo_arn (str): The ARN of the CodeArtifact repository that should be
1765
+ logged into before installing dependencies (default: None).
1762
1766
Returns:
1763
1767
None or pipeline step arguments in case the Processor instance is built with
1764
1768
:class:`~sagemaker.workflow.pipeline_context.PipelineSession`
1765
1769
"""
1766
1770
s3_runproc_sh , inputs , job_name = self ._pack_and_upload_code (
1767
- code , source_dir , dependencies , git_config , job_name , inputs , kms_key
1771
+ code ,
1772
+ source_dir ,
1773
+ dependencies ,
1774
+ git_config ,
1775
+ job_name ,
1776
+ inputs ,
1777
+ kms_key ,
1778
+ codeartifact_repo_arn ,
1768
1779
)
1769
1780
1770
1781
# Submit a processing job.
@@ -1781,7 +1792,15 @@ def run( # type: ignore[override]
1781
1792
)
1782
1793
1783
1794
def _pack_and_upload_code (
1784
- self , code , source_dir , dependencies , git_config , job_name , inputs , kms_key = None
1795
+ self ,
1796
+ code ,
1797
+ source_dir ,
1798
+ dependencies ,
1799
+ git_config ,
1800
+ job_name ,
1801
+ inputs ,
1802
+ kms_key = None ,
1803
+ codeartifact_repo_arn = None ,
1785
1804
):
1786
1805
"""Pack local code bundle and upload to Amazon S3."""
1787
1806
if code .startswith ("s3://" ):
@@ -1822,12 +1841,65 @@ def _pack_and_upload_code(
1822
1841
script = estimator .uploaded_code .script_name
1823
1842
evaluated_kms_key = kms_key if kms_key else self .output_kms_key
1824
1843
s3_runproc_sh = self ._create_and_upload_runproc (
1825
- script , evaluated_kms_key , entrypoint_s3_uri
1844
+ script , evaluated_kms_key , entrypoint_s3_uri , codeartifact_repo_arn
1826
1845
)
1827
1846
1828
1847
return s3_runproc_sh , inputs , job_name
1829
1848
1830
- def _generate_framework_script (self , user_script : str ) -> str :
1849
+ def _get_codeartifact_index (self , codeartifact_repo_arn : str ):
1850
+ """
1851
+ Build the authenticated codeartifact index url based on the arn provided
1852
+ via codeartifact_repo_arn property following the form
1853
+ # `arn:${Partition}:codeartifact:${Region}:${Account}:repository/${Domain}/${Repository}`
1854
+ https://docs.aws.amazon.com/codeartifact/latest/ug/python-configure-pip.html
1855
+ https://docs.aws.amazon.com/service-authorization/latest/reference/list_awscodeartifact.html#awscodeartifact-resources-for-iam-policies
1856
+ :return: authenticated codeartifact index url
1857
+ """
1858
+
1859
+ arn_regex = (
1860
+ "arn:(?P<partition>[^:]+):codeartifact:(?P<region>[^:]+):(?P<account>[^:]+)"
1861
+ ":repository/(?P<domain>[^/]+)/(?P<repository>.+)"
1862
+ )
1863
+ m = re .match (arn_regex , codeartifact_repo_arn )
1864
+ if not m :
1865
+ raise Exception ("invalid CodeArtifact repository arn {}" .format (codeartifact_repo_arn ))
1866
+ domain = m .group ("domain" )
1867
+ owner = m .group ("account" )
1868
+ repository = m .group ("repository" )
1869
+ region = m .group ("region" )
1870
+
1871
+ logger .info (
1872
+ "configuring pip to use codeartifact "
1873
+ "(domain: %s, domain owner: %s, repository: %s, region: %s)" ,
1874
+ domain ,
1875
+ owner ,
1876
+ repository ,
1877
+ region ,
1878
+ )
1879
+ try :
1880
+ client = self .sagemaker_session .boto_session .client ("codeartifact" , region_name = region )
1881
+ auth_token_response = client .get_authorization_token (domain = domain , domainOwner = owner )
1882
+ token = auth_token_response ["authorizationToken" ]
1883
+ endpoint_response = client .get_repository_endpoint (
1884
+ domain = domain , domainOwner = owner , repository = repository , format = "pypi"
1885
+ )
1886
+ unauthenticated_index = endpoint_response ["repositoryEndpoint" ]
1887
+ return re .sub (
1888
+ "https://" ,
1889
+ "https://aws:{}@" .format (token ),
1890
+ re .sub (
1891
+ "{}/?$" .format (repository ),
1892
+ "{}/simple/" .format (repository ),
1893
+ unauthenticated_index ,
1894
+ ),
1895
+ )
1896
+ except Exception :
1897
+ logger .error ("failed to configure pip to use codeartifact" )
1898
+ raise Exception ("failed to configure pip to use codeartifact" )
1899
+
1900
+ def _generate_framework_script (
1901
+ self , user_script : str , codeartifact_repo_arn : str = None
1902
+ ) -> str :
1831
1903
"""Generate the framework entrypoint file (as text) for a processing job.
1832
1904
1833
1905
This script implements the "framework" functionality for setting up your code:
@@ -1838,7 +1910,15 @@ def _generate_framework_script(self, user_script: str) -> str:
1838
1910
Args:
1839
1911
user_script (str): Relative path to ```code``` in the source bundle
1840
1912
- e.g. 'process.py'.
1913
+ codeartifact_repo_arn (str): The ARN of the CodeArtifact repository that should be
1914
+ logged into before installing dependencies (default: None).
1841
1915
"""
1916
+ if codeartifact_repo_arn :
1917
+ index = self ._get_codeartifact_index (codeartifact_repo_arn )
1918
+ index_option = "-i {}" .format (index )
1919
+ else :
1920
+ index_option = ""
1921
+
1842
1922
return dedent (
1843
1923
"""\
1844
1924
#!/bin/bash
@@ -1853,12 +1933,13 @@ def _generate_framework_script(self, user_script: str) -> str:
1853
1933
# Some py3 containers has typing, which may breaks pip install
1854
1934
pip uninstall --yes typing
1855
1935
1856
- pip install -r requirements.txt
1936
+ pip install -r requirements.txt {index_option}
1857
1937
fi
1858
1938
1859
1939
{entry_point_command} {entry_point} "$@"
1860
1940
"""
1861
1941
).format (
1942
+ index_option = index_option ,
1862
1943
entry_point_command = " " .join (self .command ),
1863
1944
entry_point = user_script ,
1864
1945
)
@@ -1934,7 +2015,9 @@ def _set_entrypoint(self, command, user_script_name):
1934
2015
)
1935
2016
self .entrypoint = self .framework_entrypoint_command + [user_script_location ]
1936
2017
1937
- def _create_and_upload_runproc (self , user_script , kms_key , entrypoint_s3_uri ):
2018
+ def _create_and_upload_runproc (
2019
+ self , user_script , kms_key , entrypoint_s3_uri , codeartifact_repo_arn = None
2020
+ ):
1938
2021
"""Create runproc shell script and upload to S3 bucket.
1939
2022
1940
2023
If leveraging a pipeline session with optimized S3 artifact paths,
@@ -1950,7 +2033,7 @@ def _create_and_upload_runproc(self, user_script, kms_key, entrypoint_s3_uri):
1950
2033
from sagemaker .workflow .utilities import _pipeline_config , hash_object
1951
2034
1952
2035
if _pipeline_config and _pipeline_config .pipeline_name :
1953
- runproc_file_str = self ._generate_framework_script (user_script )
2036
+ runproc_file_str = self ._generate_framework_script (user_script , codeartifact_repo_arn )
1954
2037
runproc_file_hash = hash_object (runproc_file_str )
1955
2038
s3_uri = s3 .s3_path_join (
1956
2039
"s3://" ,
@@ -1969,7 +2052,7 @@ def _create_and_upload_runproc(self, user_script, kms_key, entrypoint_s3_uri):
1969
2052
)
1970
2053
else :
1971
2054
s3_runproc_sh = S3Uploader .upload_string_as_file_body (
1972
- self ._generate_framework_script (user_script ),
2055
+ self ._generate_framework_script (user_script , codeartifact_repo_arn ),
1973
2056
desired_s3_uri = entrypoint_s3_uri ,
1974
2057
kms_key = kms_key ,
1975
2058
sagemaker_session = self .sagemaker_session ,
0 commit comments