@@ -255,6 +255,8 @@ def __init__(
255
255
"num_workers" : dataloader_kwargs .get ("num_workers" , 0 ),
256
256
"pin_memory" : dataloader_kwargs .get ("pin_memory" , False ),
257
257
}
258
+ # starting with HF Datasets v3.x, trust_remote_code must be `True` https://bit.ly/hf_datasets_trust_remote_req
259
+ self .trust_remote_code = True
258
260
self .save_hyperparameters ()
259
261
os .environ ["TOKENIZERS_PARALLELISM" ] = "true" if self .hparams .tokenizers_parallelism else "false"
260
262
self .tokenizer = AutoTokenizer .from_pretrained (
@@ -265,11 +267,13 @@ def prepare_data(self):
265
267
"""Load the SuperGLUE dataset."""
266
268
# N.B. PL calls prepare_data from a single process (rank 0) so do not use it to assign
267
269
# state (e.g. self.x=y)
268
- datasets .load_dataset ("super_glue" , self .hparams .task_name )
270
+ datasets .load_dataset ("super_glue" , self .hparams .task_name , trust_remote_code = self . trust_remote_code )
269
271
270
272
def setup (self , stage ):
271
273
"""Setup our dataset splits for training/validation."""
272
- self .dataset = datasets .load_dataset ("super_glue" , self .hparams .task_name )
274
+ self .dataset = datasets .load_dataset (
275
+ "super_glue" , self .hparams .task_name , trust_remote_code = self .trust_remote_code
276
+ )
273
277
for split in self .dataset .keys ():
274
278
self .dataset [split ] = self .dataset [split ].map (
275
279
self ._convert_to_features , batched = True , remove_columns = ["label" ]
@@ -385,7 +389,7 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0):
385
389
self .log_dict (metric_dict , prog_bar = True )
386
390
387
391
def configure_optimizers (self ):
388
- # With FTS >= 2.0, ``FinetuningScheduler`` simplifies initial optimizer configuration by ensuring the optimizer
392
+ # ``FinetuningScheduler`` simplifies initial optimizer configuration by ensuring the optimizer
389
393
# configured here will optimize the parameters (and only those parameters) scheduled to be optimized in phase 0
390
394
# of the current fine-tuning schedule. This auto-configuration can be disabled if desired by setting
391
395
# ``enforce_phase0_params`` to ``False``.
0 commit comments