@@ -163,12 +163,14 @@ class IngestionManagerPandas:
163
163
max_workers (int): number of threads to create.
164
164
max_processes (int): number of processes to create. Each process spawns
165
165
``max_workers`` threads.
166
+ profile_name (str): the profile credential should be used for ``PutRecord``.
166
167
"""
167
168
168
169
feature_group_name : str = attr .ib ()
169
170
sagemaker_fs_runtime_client_config : Config = attr .ib ()
170
171
max_workers : int = attr .ib (default = 1 )
171
172
max_processes : int = attr .ib (default = 1 )
173
+ profile_name : str = attr .ib (default = None )
172
174
_async_result : AsyncResult = attr .ib (default = None )
173
175
_processing_pool : ProcessingPool = attr .ib (default = None )
174
176
_failed_indices : List [int ] = attr .ib (factory = list )
@@ -180,6 +182,7 @@ def _ingest_single_batch(
180
182
client_config : Config ,
181
183
start_index : int ,
182
184
end_index : int ,
185
+ profile_name : str = None ,
183
186
) -> List [int ]:
184
187
"""Ingest a single batch of DataFrame rows into FeatureStore.
185
188
@@ -190,6 +193,7 @@ def _ingest_single_batch(
190
193
client to perform boto calls.
191
194
start_index (int): starting position to ingest in this batch.
192
195
end_index (int): ending position to ingest in this batch.
196
+ profile_name (str): the profile credential should be used for ``PutRecord``.
193
197
194
198
Returns:
195
199
List of row indices that failed to be ingested.
@@ -198,7 +202,7 @@ def _ingest_single_batch(
198
202
if "max_attempts" not in retry_config and "total_max_attempts" not in retry_config :
199
203
client_config = copy .deepcopy (client_config )
200
204
client_config .retries = {"max_attempts" : 10 , "mode" : "standard" }
201
- sagemaker_featurestore_runtime_client = boto3 .Session ().client (
205
+ sagemaker_featurestore_runtime_client = boto3 .Session (profile_name = profile_name ).client (
202
206
service_name = "sagemaker-featurestore-runtime" , config = client_config
203
207
)
204
208
@@ -287,6 +291,7 @@ def _run_multi_process(self, data_frame: DataFrame, wait=True, timeout=None):
287
291
data_frame [start_index :end_index ],
288
292
start_index ,
289
293
timeout ,
294
+ self .profile_name ,
290
295
)
291
296
]
292
297
@@ -311,6 +316,7 @@ def _run_multi_threaded(
311
316
data_frame : DataFrame ,
312
317
row_offset = 0 ,
313
318
timeout = None ,
319
+ profile_name = None ,
314
320
) -> List [int ]:
315
321
"""Start the ingestion process.
316
322
@@ -321,6 +327,7 @@ def _run_multi_threaded(
321
327
wait (bool): whether to wait for the ingestion to finish or not.
322
328
timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
323
329
if timeout is reached.
330
+ profile_name (str): the profile credential should be used for ``PutRecord``.
324
331
325
332
Returns:
326
333
List of row indices that failed to be ingested.
@@ -342,6 +349,7 @@ def _run_multi_threaded(
342
349
start_index = start_index ,
343
350
end_index = end_index ,
344
351
client_config = sagemaker_fs_runtime_client_config ,
352
+ profile_name = profile_name ,
345
353
)
346
354
] = (start_index + row_offset , end_index + row_offset )
347
355
@@ -581,6 +589,7 @@ def ingest(
581
589
max_processes : int = 1 ,
582
590
wait : bool = True ,
583
591
timeout : Union [int , float ] = None ,
592
+ profile_name : str = None ,
584
593
) -> IngestionManagerPandas :
585
594
"""Ingest the content of a pandas DataFrame to feature store.
586
595
@@ -599,6 +608,11 @@ def ingest(
599
608
They can also be found from the IngestionManagerPandas' ``failed_rows`` function after
600
609
the exception is thrown.
601
610
611
+ `profile_name` argument is an optional one. It will use the default credential if None is
612
+ passed. This `profile_name` is used in the sagemaker_featurestore_runtime client only. See
613
+ https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html for more
614
+ about the default credential.
615
+
602
616
Args:
603
617
data_frame (DataFrame): data_frame to be ingested to feature store.
604
618
max_workers (int): number of threads to be created.
@@ -607,6 +621,8 @@ def ingest(
607
621
wait (bool): whether to wait for the ingestion to finish or not.
608
622
timeout (Union[int, float]): ``concurrent.futures.TimeoutError`` will be raised
609
623
if timeout is reached.
624
+ profile_name (str): the profile credential should be used for ``PutRecord``
625
+ (default: None).
610
626
611
627
Returns:
612
628
An instance of IngestionManagerPandas.
@@ -622,6 +638,7 @@ def ingest(
622
638
sagemaker_fs_runtime_client_config = self .sagemaker_session .sagemaker_featurestore_runtime_client .meta .config ,
623
639
max_workers = max_workers ,
624
640
max_processes = max_processes ,
641
+ profile_name = profile_name ,
625
642
)
626
643
627
644
manager .run (data_frame = data_frame , wait = wait , timeout = timeout )
0 commit comments