mirror of https://github.com/coqui-ai/TTS.git
Add docs and missing files
This commit is contained in:
parent
fb31ce4b0a
commit
92d4823ad4
|
@ -1725,22 +1725,37 @@ class Vits(BaseTTS):
|
||||||
assert not self.training
|
assert not self.training
|
||||||
|
|
||||||
def load_fairseq_checkpoint(self, config, checkpoint_dir, eval=False):
|
def load_fairseq_checkpoint(self, config, checkpoint_dir, eval=False):
|
||||||
|
"""Load VITS checkpoints released by fairseq here: https://github.com/facebookresearch/fairseq/tree/main/examples/mms
|
||||||
|
|
||||||
|
Performs some changes for compatibility.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
config (Coqpit): 🐸TTS model config.
|
||||||
|
checkpoint_dir (str): Path to the checkpoint directory.
|
||||||
|
eval (bool, optional): Set to True for evaluation. Defaults to False.
|
||||||
|
"""
|
||||||
import json
|
import json
|
||||||
|
|
||||||
# set paths
|
# set paths
|
||||||
config_file = os.path.join(checkpoint_dir, "config.json")
|
config_file = os.path.join(checkpoint_dir, "config.json")
|
||||||
checkpoint_file = os.path.join(checkpoint_dir, "G_100000.pth")
|
checkpoint_file = os.path.join(checkpoint_dir, "G_100000.pth")
|
||||||
vocab_file = os.path.join(checkpoint_dir, "vocab.txt")
|
vocab_file = os.path.join(checkpoint_dir, "vocab.txt")
|
||||||
# set config params
|
# set config params
|
||||||
with open(config_file, 'r') as file:
|
with open(config_file, "r") as file:
|
||||||
# Load the JSON data as a dictionary
|
# Load the JSON data as a dictionary
|
||||||
config_org = json.load(file)
|
config_org = json.load(file)
|
||||||
self.config.audio.sample_rate = config_org['data']['sampling_rate']
|
self.config.audio.sample_rate = config_org["data"]["sampling_rate"]
|
||||||
# self.config.add_blank = config['add_blank']
|
# self.config.add_blank = config['add_blank']
|
||||||
# set tokenizer
|
# set tokenizer
|
||||||
vocab = FairseqVocab(vocab_file)
|
vocab = FairseqVocab(vocab_file)
|
||||||
self.text_encoder.emb = nn.Embedding(vocab.num_chars, config.model_args.hidden_channels)
|
self.text_encoder.emb = nn.Embedding(vocab.num_chars, config.model_args.hidden_channels)
|
||||||
self.tokenizer = TTSTokenizer(
|
self.tokenizer = TTSTokenizer(
|
||||||
use_phonemes=False, text_cleaner=None, characters=vocab, phonemizer=None, add_blank=config_org['data']['add_blank'], use_eos_bos=False
|
use_phonemes=False,
|
||||||
|
text_cleaner=None,
|
||||||
|
characters=vocab,
|
||||||
|
phonemizer=None,
|
||||||
|
add_blank=config_org["data"]["add_blank"],
|
||||||
|
use_eos_bos=False,
|
||||||
)
|
)
|
||||||
# load fairseq checkpoint
|
# load fairseq checkpoint
|
||||||
new_chk = rehash_fairseq_vits_checkpoint(checkpoint_file)
|
new_chk = rehash_fairseq_vits_checkpoint(checkpoint_file)
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
def rehash_fairseq_vits_checkpoint(checkpoint_file):
|
||||||
|
chk = torch.load(checkpoint_file)["model"]
|
||||||
|
new_chk = {}
|
||||||
|
for k, v in chk.items():
|
||||||
|
if "enc_p." in k:
|
||||||
|
new_chk[k.replace("enc_p.", "text_encoder.")] = v
|
||||||
|
elif "dec." in k:
|
||||||
|
new_chk[k.replace("dec.", "waveform_decoder.")] = v
|
||||||
|
elif "enc_q." in k:
|
||||||
|
new_chk[k.replace("enc_q.", "posterior_encoder.")] = v
|
||||||
|
elif "flow.flows.2." in k:
|
||||||
|
new_chk[k.replace("flow.flows.2.", "flow.flows.1.")] = v
|
||||||
|
elif "flow.flows.4." in k:
|
||||||
|
new_chk[k.replace("flow.flows.4.", "flow.flows.2.")] = v
|
||||||
|
elif "flow.flows.6." in k:
|
||||||
|
new_chk[k.replace("flow.flows.6.", "flow.flows.3.")] = v
|
||||||
|
elif "dp.flows.0.m" in k:
|
||||||
|
new_chk[k.replace("dp.flows.0.m", "duration_predictor.flows.0.translation")] = v
|
||||||
|
elif "dp.flows.0.logs" in k:
|
||||||
|
new_chk[k.replace("dp.flows.0.logs", "duration_predictor.flows.0.log_scale")] = v
|
||||||
|
elif "dp.flows.1" in k:
|
||||||
|
new_chk[k.replace("dp.flows.1", "duration_predictor.flows.1")] = v
|
||||||
|
elif "dp.flows.3" in k:
|
||||||
|
new_chk[k.replace("dp.flows.3", "duration_predictor.flows.2")] = v
|
||||||
|
elif "dp.flows.5" in k:
|
||||||
|
new_chk[k.replace("dp.flows.5", "duration_predictor.flows.3")] = v
|
||||||
|
elif "dp.flows.7" in k:
|
||||||
|
new_chk[k.replace("dp.flows.7", "duration_predictor.flows.4")] = v
|
||||||
|
elif "dp.post_flows.0.m" in k:
|
||||||
|
new_chk[k.replace("dp.post_flows.0.m", "duration_predictor.post_flows.0.translation")] = v
|
||||||
|
elif "dp.post_flows.0.logs" in k:
|
||||||
|
new_chk[k.replace("dp.post_flows.0.logs", "duration_predictor.post_flows.0.log_scale")] = v
|
||||||
|
elif "dp.post_flows.1" in k:
|
||||||
|
new_chk[k.replace("dp.post_flows.1", "duration_predictor.post_flows.1")] = v
|
||||||
|
elif "dp.post_flows.3" in k:
|
||||||
|
new_chk[k.replace("dp.post_flows.3", "duration_predictor.post_flows.2")] = v
|
||||||
|
elif "dp.post_flows.5" in k:
|
||||||
|
new_chk[k.replace("dp.post_flows.5", "duration_predictor.post_flows.3")] = v
|
||||||
|
elif "dp.post_flows.7" in k:
|
||||||
|
new_chk[k.replace("dp.post_flows.7", "duration_predictor.post_flows.4")] = v
|
||||||
|
elif "dp." in k:
|
||||||
|
new_chk[k.replace("dp.", "duration_predictor.")] = v
|
||||||
|
else:
|
||||||
|
new_chk[k] = v
|
||||||
|
return new_chk
|
Loading…
Reference in New Issue