Fix loading Bark (#2893)

* Fixup hubert path * Make style
2023-08-26 11:59:00 +02:00 · 2023-08-26 11:59:00 +02:00 · a7a96d08dd
parent 04a36a727b
commit a7a96d08dd
3 changed files with 13 additions and 4 deletions
--- a/TTS/.models.json
+++ b/TTS/.models.json
@ -15,8 +15,10 @@
                    "hf_url": [
                        "https://coqui.gateway.scarf.sh/hf/bark/coarse_2.pt",
                        "https://coqui.gateway.scarf.sh/hf/bark/fine_2.pt",
-                        "https://app.coqui.ai/tts_model/bark_text_2",
-                        "https://coqui.gateway.scarf.sh/hf/bark/config.json"
+                        "https://app.coqui.ai/tts_model/text_2.pt",
+                        "https://coqui.gateway.scarf.sh/hf/bark/config.json",
+                        "https://coqui.gateway.scarf.sh/hf/bark/hubert.pt",
+                        "https://coqui.gateway.scarf.sh/hf/bark/tokenizer.pth"
                    ],
                    "default_vocoder": null,
                    "commit": "e9a1953e",
@ -238,7 +240,7 @@
                "tortoise-v2": {
                    "description": "Tortoise tts model https://github.com/neonbjb/tortoise-tts",
                    "github_rls_url": [
-                        "https://app.coqui.ai/tts_model/tortoise",
+                        "https://app.coqui.ai/tts_model/autoregressive.pth",
                        "https://coqui.gateway.scarf.sh/v0.14.1_models/clvp2.pth",
                        "https://coqui.gateway.scarf.sh/v0.14.1_models/cvvp.pth",
                        "https://coqui.gateway.scarf.sh/v0.14.1_models/diffusion_decoder.pth",
--- a/TTS/tts/models/bark.py
+++ b/TTS/tts/models/bark.py
@ -246,6 +246,8 @@ class Bark(BaseTTS):
        text_model_path=None,
        coarse_model_path=None,
        fine_model_path=None,
+        hubert_model_path=None,
+        hubert_tokenizer_path=None,
        eval=False,
        strict=True,
        **kwargs,
@ -267,10 +269,14 @@ class Bark(BaseTTS):
        text_model_path = text_model_path or os.path.join(checkpoint_dir, "text_2.pt")
        coarse_model_path = coarse_model_path or os.path.join(checkpoint_dir, "coarse_2.pt")
        fine_model_path = fine_model_path or os.path.join(checkpoint_dir, "fine_2.pt")
+        hubert_model_path = hubert_model_path or os.path.join(checkpoint_dir, "hubert.pt")
+        hubert_tokenizer_path = hubert_tokenizer_path or os.path.join(checkpoint_dir, "tokenizer.pth")

        self.config.LOCAL_MODEL_PATHS["text"] = text_model_path
        self.config.LOCAL_MODEL_PATHS["coarse"] = coarse_model_path
        self.config.LOCAL_MODEL_PATHS["fine"] = fine_model_path
+        self.config.LOCAL_MODEL_PATHS["hubert"] = hubert_model_path
+        self.config.LOCAL_MODEL_PATHS["hubert_tokenizer"] = hubert_tokenizer_path

        self.load_bark_models()

--- a/TTS/tts/utils/synthesis.py
+++ b/TTS/tts/utils/synthesis.py
@ -18,7 +18,8 @@ def compute_style_mel(style_wav, ap, cuda=False, device="cpu"):
    if cuda:
        device = "cuda"
    style_mel = torch.FloatTensor(
-        ap.melspectrogram(ap.load_wav(style_wav, sr=ap.sample_rate)), device=device,
+        ap.melspectrogram(ap.load_wav(style_wav, sr=ap.sample_rate)),
+        device=device,
    ).unsqueeze(0)
    return style_mel