huggingface · TParcollet · Feb 27, 2024 · Feb 27, 2024 · Feb 27, 2024 · Feb 27, 2024
diff --git a/docker_images/speechbrain/Dockerfile b/docker_images/speechbrain/Dockerfile
@@ -1,11 +1,11 @@
-FROM tiangolo/uvicorn-gunicorn:python3.8
+FROM tiangolo/uvicorn-gunicorn:python3.9
 LABEL maintainer="me <[email protected]>"
 
 # Add any system dependency here
 # RUN apt-get update -y && apt-get install libXXX -y
 RUN apt-get update -y && apt-get install ffmpeg -y
 
-RUN pip install --no-cache-dir torch==1.11
+RUN pip install --no-cache-dir torch==2.0
 COPY ./requirements.txt /app
 RUN pip install --no-cache-dir -r requirements.txt
 COPY ./prestart.sh /app/

diff --git a/docker_images/speechbrain/app/pipelines/audio_classification.py b/docker_images/speechbrain/app/pipelines/audio_classification.py
@@ -4,7 +4,7 @@
 import torch
 from app.common import ModelType, get_type
 from app.pipelines import Pipeline
-from speechbrain.pretrained import EncoderClassifier
+from speechbrain.inference import EncoderClassifier
 
 
 class AudioClassificationPipeline(Pipeline):

diff --git a/docker_images/speechbrain/app/pipelines/audio_to_audio.py b/docker_images/speechbrain/app/pipelines/audio_to_audio.py
@@ -4,7 +4,7 @@
 import torch
 from app.common import ModelType, get_type
 from app.pipelines import Pipeline
-from speechbrain.pretrained import (
+from speechbrain.inference import (
     SepformerSeparation,
     SpectralMaskEnhancement,
     WaveformEnhancement,

diff --git a/docker_images/speechbrain/app/pipelines/automatic_speech_recognition.py b/docker_images/speechbrain/app/pipelines/automatic_speech_recognition.py
@@ -4,7 +4,7 @@
 import torch
 from app.common import ModelType, get_type
 from app.pipelines import Pipeline
-from speechbrain.pretrained import EncoderASR, EncoderDecoderASR, WhisperASR
+from speechbrain.inference import EncoderASR, EncoderDecoderASR, WhisperASR
 
 
 class AutomaticSpeechRecognitionPipeline(Pipeline):

diff --git a/docker_images/speechbrain/app/pipelines/text2text_generation.py b/docker_images/speechbrain/app/pipelines/text2text_generation.py
@@ -2,7 +2,7 @@
 
 from app.common import ModelType, get_type
 from app.pipelines import Pipeline
-from speechbrain.pretrained import GraphemeToPhoneme
+from speechbrain.inference import GraphemeToPhoneme
 
 
 POSTPROCESSING = {ModelType.GRAPHEMETOPHONEME: lambda output: "-".join(output)}

diff --git a/docker_images/speechbrain/app/pipelines/text_to_speech.py b/docker_images/speechbrain/app/pipelines/text_to_speech.py
@@ -3,7 +3,7 @@
 import numpy as np
 from app.common import ModelType, get_type, get_vocoder_model_id
 from app.pipelines import Pipeline
-from speechbrain.pretrained import HIFIGAN, FastSpeech2, Tacotron2
+from speechbrain.inference import HIFIGAN, FastSpeech2, Tacotron2
 
 
 class TextToSpeechPipeline(Pipeline):
@@ -37,6 +37,8 @@ def __call__(self, inputs: str) -> Tuple[np.array, int]:
         Return:
             A :obj:`np.array` and a :obj:`int`: The raw waveform as a numpy array, and the sampling rate as an int.
         """
+        if not inputs.replace("\0", "").strip():
+            inputs = "Empty query"
         if self.type == "tacotron2":
             mel_output, _, _ = self.model.encode_text(inputs)
         elif self.type == "fastspeech2":

diff --git a/docker_images/speechbrain/requirements.txt b/docker_images/speechbrain/requirements.txt
@@ -3,5 +3,5 @@ starlette==0.27.0
 api-inference-community==0.0.32
 huggingface_hub>=0.7
 transformers==4.30.0
-git+https://github.com/speechbrain/speechbrain@v0.5.15
+git+https://github.com/speechbrain/speechbrain@v1.0.0
 #Dummy.