Skip to content

Commit 5a402f9

Browse files
author
Azure Pipelines
committed
Merge remote-tracking branch 'origin/main' into publication
2 parents 7d241ab + 36f10f1 commit 5a402f9

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

lightning_examples/finetuning-scheduler/finetuning-scheduler.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,8 @@ def __init__(
255255
"num_workers": dataloader_kwargs.get("num_workers", 0),
256256
"pin_memory": dataloader_kwargs.get("pin_memory", False),
257257
}
258+
# starting with HF Datasets v3.x, trust_remote_code must be `True` https://bit.ly/hf_datasets_trust_remote_req
259+
self.trust_remote_code = True
258260
self.save_hyperparameters()
259261
os.environ["TOKENIZERS_PARALLELISM"] = "true" if self.hparams.tokenizers_parallelism else "false"
260262
self.tokenizer = AutoTokenizer.from_pretrained(
@@ -265,11 +267,13 @@ def prepare_data(self):
265267
"""Load the SuperGLUE dataset."""
266268
# N.B. PL calls prepare_data from a single process (rank 0) so do not use it to assign
267269
# state (e.g. self.x=y)
268-
datasets.load_dataset("super_glue", self.hparams.task_name)
270+
datasets.load_dataset("super_glue", self.hparams.task_name, trust_remote_code=self.trust_remote_code)
269271

270272
def setup(self, stage):
271273
"""Setup our dataset splits for training/validation."""
272-
self.dataset = datasets.load_dataset("super_glue", self.hparams.task_name)
274+
self.dataset = datasets.load_dataset(
275+
"super_glue", self.hparams.task_name, trust_remote_code=self.trust_remote_code
276+
)
273277
for split in self.dataset.keys():
274278
self.dataset[split] = self.dataset[split].map(
275279
self._convert_to_features, batched=True, remove_columns=["label"]
@@ -385,7 +389,7 @@ def validation_step(self, batch, batch_idx, dataloader_idx=0):
385389
self.log_dict(metric_dict, prog_bar=True)
386390

387391
def configure_optimizers(self):
388-
# With FTS >= 2.0, ``FinetuningScheduler`` simplifies initial optimizer configuration by ensuring the optimizer
392+
# ``FinetuningScheduler`` simplifies initial optimizer configuration by ensuring the optimizer
389393
# configured here will optimize the parameters (and only those parameters) scheduled to be optimized in phase 0
390394
# of the current fine-tuning schedule. This auto-configuration can be disabled if desired by setting
391395
# ``enforce_phase0_params`` to ``False``.
Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,2 @@
1-
lightning # todo: the tuner depends on L so later using with PL crash
2-
finetuning-scheduler[examples] ==2.3.*
3-
datasets ==2.17.*
4-
# todo: pin version intill reinstall with PT-eco alignment
5-
torch ==2.1.*
6-
torchvision ==0.16.*
1+
datasets >=2.17.0 # to allow explicitly setting `trust_remote_code`
2+
finetuning-scheduler[examples] <=2.4.0

0 commit comments

Comments
 (0)