fix diffusers offline (#363)

oOraph · web-flow · commit d7cf1432b506 · 2023-12-08T14:52:33.000+01:00
diff --git a/docker_images/diffusers/app/lora.py b/docker_images/diffusers/app/lora.py
@@ -1,25 +1,19 @@
 import logging
 
 import torch.nn as nn
-from huggingface_hub import hf_hub_download, model_info
+from app import offline
 from safetensors.torch import load_file
 
 
 logger = logging.getLogger(__name__)
 
 
-class LoRAPipelineMixin(object):
-    def __init__(self):
-        if not hasattr(self, "current_lora_adapter"):
-            self.current_lora_adapter = None
-        if not hasattr(self, "model_id"):
-            self.model_id = None
-        if not hasattr(self, "current_tokens_loaded"):
-            self.current_tokens_loaded = 0
-
+class LoRAPipelineMixin(offline.OfflineBestEffortMixin):
     @staticmethod
     def _get_lora_weight_name(model_data):
-        is_diffusers_lora = LoRAPipelineMixin._is_diffusers_lora(model_data)
+        weight_name_candidate = LoRAPipelineMixin._lora_weights_candidates(model_data)
+        if weight_name_candidate:
+            return weight_name_candidate
         file_to_load = next(
             (
                 file.rfilename
@@ -28,25 +22,28 @@ def _get_lora_weight_name(model_data):
             ),
             None,
         )
-        if not file_to_load and not is_diffusers_lora:
+        if not file_to_load and not weight_name_candidate:
             raise ValueError("No *.safetensors file found for your LoRA")
-        weight_name = file_to_load if not is_diffusers_lora else None
-        return weight_name
+        return file_to_load
 
     @staticmethod
     def _is_lora(model_data):
-        return LoRAPipelineMixin._is_diffusers_lora(
-            model_data
-        ) or "lora" in model_data.cardData.get("tags", [])
+        return LoRAPipelineMixin._lora_weights_candidates(model_data) or (
+            model_data.cardData.get("tags")
+            and "lora" in model_data.cardData.get("tags", [])
+        )
 
     @staticmethod
-    def _is_diffusers_lora(model_data):
-        is_diffusers_lora = any(
-            file.rfilename
-            in ("pytorch_lora_weights.bin", "pytorch_lora_weights.safetensors")
-            for file in model_data.siblings
-        )
-        return is_diffusers_lora
+    def _lora_weights_candidates(model_data):
+        candidate = None
+        for file in model_data.siblings:
+            rfilename = str(file.rfilename)
+            if rfilename.endswith("pytorch_lora_weights.bin"):
+                candidate = rfilename
+            elif rfilename.endswith("pytorch_lora_weights.safetensors"):
+                candidate = rfilename
+                break
+        return candidate
 
     @staticmethod
     def _is_safetensors_pivotal(model_data):
@@ -72,7 +69,8 @@ def _fuse_or_raise(self):
             self.current_lora_adapter = None
             raise
 
-    def _reset_tokenizer_and_encoder(self, tokenizer, text_encoder, token_to_remove):
+    @staticmethod
+    def _reset_tokenizer_and_encoder(tokenizer, text_encoder, token_to_remove):
         token_id = tokenizer(token_to_remove)["input_ids"][1]
         del tokenizer._added_tokens_decoder[token_id]
         del tokenizer._added_tokens_encoder[token_to_remove]
@@ -101,13 +99,14 @@ def _unload_textual_embeddings(self):
 
     def _load_textual_embeddings(self, adapter, model_data):
         if self._is_pivotal_tuning_lora(model_data):
-            embedding_path = hf_hub_download(
+            embedding_path = self._hub_repo_file(
                 repo_id=adapter,
                 filename="embeddings.safetensors"
                 if self._is_safetensors_pivotal(model_data)
                 else "embeddings.pti",
                 repo_type="model",
             )
+
             embeddings = load_file(embedding_path)
             state_dict_clip_l = (
                 embeddings.get("text_encoders_0")
@@ -152,7 +151,7 @@ def _load_lora_adapter(self, kwargs):
         if adapter is not None:
             logger.info("LoRA adapter %s requested", adapter)
             if adapter != self.current_lora_adapter:
-                model_data = model_info(adapter, token=self.use_auth_token)
+                model_data = self._hub_model_info(adapter)
                 if not self._is_lora(model_data):
                     msg = f"Requested adapter {adapter:s} is not a LoRA adapter"
                     logger.error(msg)
@@ -167,10 +166,11 @@ def _load_lora_adapter(self, kwargs):
                     self.current_lora_adapter,
                     adapter,
                 )
-                self.ldm.unfuse_lora()
-                self.ldm.unload_lora_weights()
-                self._unload_textual_embeddings()
-                self.current_lora_adapter = None
+                if self.current_lora_adapter is not None:
+                    self.ldm.unfuse_lora()
+                    self.ldm.unload_lora_weights()
+                    self._unload_textual_embeddings()
+                    self.current_lora_adapter = None
                 logger.info("LoRA weights unloaded, loading new weights")
                 weight_name = self._get_lora_weight_name(model_data=model_data)
 
@@ -184,7 +184,7 @@ def _load_lora_adapter(self, kwargs):
             else:
                 logger.info("LoRA adapter %s already loaded", adapter)
                 # Needed while a LoRA is loaded w/ model
-                model_data = model_info(adapter, token=self.use_auth_token)
+                model_data = self._hub_model_info(adapter)
                 if (
                     self._is_pivotal_tuning_lora(model_data)
                     and self.current_tokens_loaded == 0
diff --git a/docker_images/diffusers/app/offline.py b/docker_images/diffusers/app/offline.py
@@ -0,0 +1,59 @@
+import json
+import logging
+import os
+
+from huggingface_hub import file_download, hf_api, hf_hub_download, model_info, utils
+
+
+logger = logging.getLogger(__name__)
+
+
+class OfflineBestEffortMixin(object):
+    def _hub_repo_file(self, repo_id, filename, repo_type="model"):
+        if self.offline_preferred:
+            try:
+                config_file = hf_hub_download(
+                    repo_id,
+                    filename,
+                    token=self.use_auth_token,
+                    local_files_only=True,
+                    repo_type=repo_type,
+                )
+            except utils.LocalEntryNotFoundError:
+                logger.info("Unable to fetch model index in local cache")
+            else:
+                return config_file
+
+        return hf_hub_download(
+            repo_id, filename, token=self.use_auth_token, repo_type=repo_type
+        )
+
+    def _hub_model_info(self, model_id):
+        """
+        This method tries to fetch locally cached model_info if any.
+        If none, it requests the Hub. Useful for pre cached private models when no token is available
+        """
+        if self.offline_preferred:
+            cache_root = os.getenv(
+                "DIFFUSERS_CACHE", os.getenv("HUGGINGFACE_HUB_CACHE", "")
+            )
+            folder_name = file_download.repo_folder_name(
+                repo_id=model_id, repo_type="model"
+            )
+            folder_path = os.path.join(cache_root, folder_name)
+            logger.debug("Cache folder path %s", folder_path)
+            filename = os.path.join(folder_path, "hub_model_info.json")
+            try:
+                with open(filename, "r") as f:
+                    model_data = json.load(f)
+            except OSError:
+                logger.info(
+                    "No cached model info found in file %s found for model %s. Fetching on the hub",
+                    filename,
+                    model_id,
+                )
+            else:
+                model_data = hf_api.ModelInfo(**model_data)
+                return model_data
+        model_data = model_info(model_id, token=self.use_auth_token)
+        return model_data
diff --git a/docker_images/diffusers/app/pipelines/image_to_image.py b/docker_images/diffusers/app/pipelines/image_to_image.py
@@ -3,7 +3,7 @@
 import os
 
 import torch
-from app import idle, timing, validation
+from app import idle, offline, timing, validation
 from app.pipelines import Pipeline
 from diffusers import (
     AltDiffusionImg2ImgPipeline,
@@ -26,47 +26,20 @@
     StableUnCLIPImg2ImgPipeline,
     StableUnCLIPPipeline,
 )
-from huggingface_hub import file_download, hf_api, hf_hub_download, model_info, utils
 from PIL import Image
 
 
 logger = logging.getLogger(__name__)
 
 
-class ImageToImagePipeline(Pipeline):
+class ImageToImagePipeline(Pipeline, offline.OfflineBestEffortMixin):
     def __init__(self, model_id: str):
         use_auth_token = os.getenv("HF_API_TOKEN")
         self.use_auth_token = use_auth_token
         # This should allow us to make the image work with private models when no token is provided, if the said model
         # is already in local cache
         self.offline_preferred = validation.str_to_bool(os.getenv("OFFLINE_PREFERRED"))
-        fetched = False
-        if self.offline_preferred:
-            cache_root = os.getenv(
-                "DIFFUSERS_CACHE", os.getenv("HUGGINGFACE_HUB_CACHE", "")
-            )
-            folder_name = file_download.repo_folder_name(
-                repo_id=model_id, repo_type="model"
-            )
-            folder_path = os.path.join(cache_root, folder_name)
-            logger.debug("Cache folder path %s", folder_path)
-            filename = os.path.join(folder_path, "hub_model_info.json")
-            try:
-                with open(filename, "r") as f:
-                    model_data = json.load(f)
-            except OSError:
-                logger.info(
-                    "No cached model info found in file %s found for model %s. Fetching on the hub",
-                    filename,
-                    model_id,
-                )
-            else:
-                model_data = hf_api.ModelInfo(**model_data)
-                fetched = True
-
-        if not fetched:
-            model_data = model_info(model_id, token=self.use_auth_token)
-
+        model_data = self._hub_model_info(model_id)
         kwargs = (
             {"safety_checker": None}
             if model_id.startswith("hf-internal-testing/")
@@ -84,25 +57,7 @@ def __init__(self, model_id: str):
                 config_file_name = file_name
                 break
         if config_file_name:
-            fetched = False
-            if self.offline_preferred:
-                try:
-                    config_file = hf_hub_download(
-                        model_id,
-                        config_file_name,
-                        token=self.use_auth_token,
-                        local_files_only=True,
-                    )
-                except utils.LocalEntryNotFoundError:
-                    logger.info("Unable to fetch model index in local cache")
-                else:
-                    fetched = True
-            if not fetched:
-                config_file = hf_hub_download(
-                    model_id,
-                    config_file_name,
-                    token=self.use_auth_token,
-                )
+            config_file = self._hub_repo_file(model_id, config_file_name)
 
             with open(config_file, "r") as f:
                 config_dict = json.load(f)
diff --git a/docker_images/diffusers/app/pipelines/text_to_image.py b/docker_images/diffusers/app/pipelines/text_to_image.py
@@ -5,7 +5,7 @@
 from typing import TYPE_CHECKING
 
 import torch
-from app import idle, lora, timing, validation
+from app import idle, lora, offline, timing, validation
 from app.pipelines import Pipeline
 from diffusers import (
     AutoencoderKL,
@@ -14,7 +14,6 @@
     EulerAncestralDiscreteScheduler,
 )
 from diffusers.schedulers.scheduling_utils import KarrasDiffusionSchedulers
-from huggingface_hub import file_download, hf_api, hf_hub_download, model_info, utils
 
 
 logger = logging.getLogger(__name__)
@@ -23,7 +22,9 @@
     from PIL import Image
 
 
-class TextToImagePipeline(Pipeline, lora.LoRAPipelineMixin):
+class TextToImagePipeline(
+    Pipeline, lora.LoRAPipelineMixin, offline.OfflineBestEffortMixin
+):
     def __init__(self, model_id: str):
         self.current_lora_adapter = None
         self.model_id = None
@@ -32,32 +33,7 @@ def __init__(self, model_id: str):
         # This should allow us to make the image work with private models when no token is provided, if the said model
         # is already in local cache
         self.offline_preferred = validation.str_to_bool(os.getenv("OFFLINE_PREFERRED"))
-        fetched = False
-        if self.offline_preferred:
-            cache_root = os.getenv(
-                "DIFFUSERS_CACHE", os.getenv("HUGGINGFACE_HUB_CACHE", "")
-            )
-            folder_name = file_download.repo_folder_name(
-                repo_id=model_id, repo_type="model"
-            )
-            folder_path = os.path.join(cache_root, folder_name)
-            logger.debug("Cache folder path %s", folder_path)
-            filename = os.path.join(folder_path, "hub_model_info.json")
-            try:
-                with open(filename, "r") as f:
-                    model_data = json.load(f)
-            except OSError:
-                logger.info(
-                    "No cached model info found in file %s found for model %s. Fetching on the hub",
-                    filename,
-                    model_id,
-                )
-            else:
-                model_data = hf_api.ModelInfo(**model_data)
-                fetched = True
-
-        if not fetched:
-            model_data = model_info(model_id, token=self.use_auth_token)
+        model_data = self._hub_model_info(model_id)
 
         kwargs = (
             {"safety_checker": None}
@@ -74,26 +50,7 @@ def __init__(self, model_id: str):
         if self._is_lora(model_data):
             model_type = "LoraModel"
         elif has_model_index:
-            fetched = False
-            if self.offline_preferred:
-                try:
-                    config_file = hf_hub_download(
-                        model_id,
-                        "model_index.json",
-                        token=self.use_auth_token,
-                        local_files_only=True,
-                    )
-                except utils.LocalEntryNotFoundError:
-                    logger.info("Unable to fetch model index in local cache")
-                else:
-                    fetched = True
-
-            if not fetched:
-                config_file = hf_hub_download(
-                    model_id,
-                    "model_index.json",
-                    token=self.use_auth_token,
-                )
+            config_file = self._hub_repo_file(model_id, "model_index.json")
             with open(config_file, "r") as f:
                 config_dict = json.load(f)
             model_type = config_dict.get("_class_name", None)
@@ -107,15 +64,9 @@ def __init__(self, model_id: str):
                 raise ValueError(
                     "No `base_model` found. Please include a `base_model` on your README.md tags"
                 )
-
-            weight_name = self._get_lora_weight_name(model_data)
             self._load_sd_with_sdxl_fix(model_to_load, **kwargs)
-            self.ldm.load_lora_weights(
-                model_id, weight_name=weight_name, use_auth_token=self.use_auth_token
-            )
-            self.current_lora_adapter = model_id
-            self._fuse_or_raise()
-            logger.info("LoRA adapter %s loaded", model_id)
+            # The lora will actually be lazily loaded on the fly per request
+            self.current_lora_adapter = None
         else:
             if model_id == "stabilityai/stable-diffusion-xl-base-1.0":
                 self._load_sd_with_sdxl_fix(model_id, **kwargs)