Add docs and missing files

2023-05-23 13:24:48 +02:00 · 2023-05-23 13:24:48 +02:00 · 92d4823ad4
parent fb31ce4b0a
commit 92d4823ad4
2 changed files with 66 additions and 3 deletions
--- a/TTS/tts/models/vits.py
+++ b/TTS/tts/models/vits.py
@ -1725,22 +1725,37 @@ class Vits(BaseTTS):
            assert not self.training

    def load_fairseq_checkpoint(self, config, checkpoint_dir, eval=False):
+        """Load VITS checkpoints released by fairseq here: https://github.com/facebookresearch/fairseq/tree/main/examples/mms
+
+        Performs some changes for compatibility.
+
+        Args:
+            config (Coqpit): 🐸TTS model config.
+            checkpoint_dir (str): Path to the checkpoint directory.
+            eval (bool, optional): Set to True for evaluation. Defaults to False.
+        """
        import json
+
        # set paths
        config_file = os.path.join(checkpoint_dir, "config.json")
        checkpoint_file = os.path.join(checkpoint_dir, "G_100000.pth")
        vocab_file = os.path.join(checkpoint_dir, "vocab.txt")
        # set config params
-        with open(config_file, 'r') as file:
+        with open(config_file, "r") as file:
            # Load the JSON data as a dictionary
            config_org = json.load(file)
-        self.config.audio.sample_rate = config_org['data']['sampling_rate']
+        self.config.audio.sample_rate = config_org["data"]["sampling_rate"]
        # self.config.add_blank = config['add_blank']
        # set tokenizer
        vocab = FairseqVocab(vocab_file)
        self.text_encoder.emb = nn.Embedding(vocab.num_chars, config.model_args.hidden_channels)
        self.tokenizer = TTSTokenizer(
-            use_phonemes=False, text_cleaner=None, characters=vocab, phonemizer=None, add_blank=config_org['data']['add_blank'], use_eos_bos=False
+            use_phonemes=False,
+            text_cleaner=None,
+            characters=vocab,
+            phonemizer=None,
+            add_blank=config_org["data"]["add_blank"],
+            use_eos_bos=False,
        )
        # load fairseq checkpoint
        new_chk = rehash_fairseq_vits_checkpoint(checkpoint_file)
--- a/TTS/tts/utils/fairseq.py
+++ b/TTS/tts/utils/fairseq.py
@ -0,0 +1,48 @@
+import torch
+
+
+def rehash_fairseq_vits_checkpoint(checkpoint_file):
+    chk = torch.load(checkpoint_file)["model"]
+    new_chk = {}
+    for k, v in chk.items():
+        if "enc_p." in k:
+            new_chk[k.replace("enc_p.", "text_encoder.")] = v
+        elif "dec." in k:
+            new_chk[k.replace("dec.", "waveform_decoder.")] = v
+        elif "enc_q." in k:
+            new_chk[k.replace("enc_q.", "posterior_encoder.")] = v
+        elif "flow.flows.2." in k:
+            new_chk[k.replace("flow.flows.2.", "flow.flows.1.")] = v
+        elif "flow.flows.4." in k:
+            new_chk[k.replace("flow.flows.4.", "flow.flows.2.")] = v
+        elif "flow.flows.6." in k:
+            new_chk[k.replace("flow.flows.6.", "flow.flows.3.")] = v
+        elif "dp.flows.0.m" in k:
+            new_chk[k.replace("dp.flows.0.m", "duration_predictor.flows.0.translation")] = v
+        elif "dp.flows.0.logs" in k:
+            new_chk[k.replace("dp.flows.0.logs", "duration_predictor.flows.0.log_scale")] = v
+        elif "dp.flows.1" in k:
+            new_chk[k.replace("dp.flows.1", "duration_predictor.flows.1")] = v
+        elif "dp.flows.3" in k:
+            new_chk[k.replace("dp.flows.3", "duration_predictor.flows.2")] = v
+        elif "dp.flows.5" in k:
+            new_chk[k.replace("dp.flows.5", "duration_predictor.flows.3")] = v
+        elif "dp.flows.7" in k:
+            new_chk[k.replace("dp.flows.7", "duration_predictor.flows.4")] = v
+        elif "dp.post_flows.0.m" in k:
+            new_chk[k.replace("dp.post_flows.0.m", "duration_predictor.post_flows.0.translation")] = v
+        elif "dp.post_flows.0.logs" in k:
+            new_chk[k.replace("dp.post_flows.0.logs", "duration_predictor.post_flows.0.log_scale")] = v
+        elif "dp.post_flows.1" in k:
+            new_chk[k.replace("dp.post_flows.1", "duration_predictor.post_flows.1")] = v
+        elif "dp.post_flows.3" in k:
+            new_chk[k.replace("dp.post_flows.3", "duration_predictor.post_flows.2")] = v
+        elif "dp.post_flows.5" in k:
+            new_chk[k.replace("dp.post_flows.5", "duration_predictor.post_flows.3")] = v
+        elif "dp.post_flows.7" in k:
+            new_chk[k.replace("dp.post_flows.7", "duration_predictor.post_flows.4")] = v
+        elif "dp." in k:
+            new_chk[k.replace("dp.", "duration_predictor.")] = v
+        else:
+            new_chk[k] = v
+    return new_chk