diff --git a/docker_images/speechbrain/Dockerfile b/docker_images/speechbrain/Dockerfile index ac610d21..412c0e81 100644 --- a/docker_images/speechbrain/Dockerfile +++ b/docker_images/speechbrain/Dockerfile @@ -1,11 +1,11 @@ -FROM tiangolo/uvicorn-gunicorn:python3.8 +FROM tiangolo/uvicorn-gunicorn:python3.9 LABEL maintainer="me " # Add any system dependency here # RUN apt-get update -y && apt-get install libXXX -y RUN apt-get update -y && apt-get install ffmpeg -y -RUN pip install --no-cache-dir torch==1.11 +RUN pip install --no-cache-dir torch==2.0 COPY ./requirements.txt /app RUN pip install --no-cache-dir -r requirements.txt COPY ./prestart.sh /app/ diff --git a/docker_images/speechbrain/app/pipelines/audio_classification.py b/docker_images/speechbrain/app/pipelines/audio_classification.py index 469b4936..cccab61c 100644 --- a/docker_images/speechbrain/app/pipelines/audio_classification.py +++ b/docker_images/speechbrain/app/pipelines/audio_classification.py @@ -4,7 +4,7 @@ import torch from app.common import ModelType, get_type from app.pipelines import Pipeline -from speechbrain.pretrained import EncoderClassifier +from speechbrain.inference import EncoderClassifier class AudioClassificationPipeline(Pipeline): diff --git a/docker_images/speechbrain/app/pipelines/audio_to_audio.py b/docker_images/speechbrain/app/pipelines/audio_to_audio.py index 4703d054..f5fbf093 100644 --- a/docker_images/speechbrain/app/pipelines/audio_to_audio.py +++ b/docker_images/speechbrain/app/pipelines/audio_to_audio.py @@ -4,7 +4,7 @@ import torch from app.common import ModelType, get_type from app.pipelines import Pipeline -from speechbrain.pretrained import ( +from speechbrain.inference import ( SepformerSeparation, SpectralMaskEnhancement, WaveformEnhancement, diff --git a/docker_images/speechbrain/app/pipelines/automatic_speech_recognition.py b/docker_images/speechbrain/app/pipelines/automatic_speech_recognition.py index ff37c809..eb6789b8 100644 --- a/docker_images/speechbrain/app/pipelines/automatic_speech_recognition.py +++ b/docker_images/speechbrain/app/pipelines/automatic_speech_recognition.py @@ -4,7 +4,7 @@ import torch from app.common import ModelType, get_type from app.pipelines import Pipeline -from speechbrain.pretrained import EncoderASR, EncoderDecoderASR, WhisperASR +from speechbrain.inference import EncoderASR, EncoderDecoderASR, WhisperASR class AutomaticSpeechRecognitionPipeline(Pipeline): diff --git a/docker_images/speechbrain/app/pipelines/text2text_generation.py b/docker_images/speechbrain/app/pipelines/text2text_generation.py index fa13f8a8..5f698ba0 100644 --- a/docker_images/speechbrain/app/pipelines/text2text_generation.py +++ b/docker_images/speechbrain/app/pipelines/text2text_generation.py @@ -2,7 +2,7 @@ from app.common import ModelType, get_type from app.pipelines import Pipeline -from speechbrain.pretrained import GraphemeToPhoneme +from speechbrain.inference import GraphemeToPhoneme POSTPROCESSING = {ModelType.GRAPHEMETOPHONEME: lambda output: "-".join(output)} diff --git a/docker_images/speechbrain/app/pipelines/text_to_speech.py b/docker_images/speechbrain/app/pipelines/text_to_speech.py index f73e1ce8..5a64bd60 100644 --- a/docker_images/speechbrain/app/pipelines/text_to_speech.py +++ b/docker_images/speechbrain/app/pipelines/text_to_speech.py @@ -3,7 +3,7 @@ import numpy as np from app.common import ModelType, get_type, get_vocoder_model_id from app.pipelines import Pipeline -from speechbrain.pretrained import HIFIGAN, FastSpeech2, Tacotron2 +from speechbrain.inference import HIFIGAN, FastSpeech2, Tacotron2 class TextToSpeechPipeline(Pipeline): @@ -37,6 +37,8 @@ def __call__(self, inputs: str) -> Tuple[np.array, int]: Return: A :obj:`np.array` and a :obj:`int`: The raw waveform as a numpy array, and the sampling rate as an int. """ + if not inputs.replace("\0", "").strip(): + inputs = "Empty query" if self.type == "tacotron2": mel_output, _, _ = self.model.encode_text(inputs) elif self.type == "fastspeech2": diff --git a/docker_images/speechbrain/requirements.txt b/docker_images/speechbrain/requirements.txt index 3f0104a2..73900b2e 100644 --- a/docker_images/speechbrain/requirements.txt +++ b/docker_images/speechbrain/requirements.txt @@ -3,5 +3,5 @@ starlette==0.27.0 api-inference-community==0.0.32 huggingface_hub>=0.7 transformers==4.30.0 -git+https://github.com/speechbrain/speechbrain@v0.5.15 +git+https://github.com/speechbrain/speechbrain@v1.0.0 #Dummy.