From 435943ba39379d0d954d8b003f0ca1389542d549 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 15 Jan 2021 16:57:52 +0000 Subject: [PATCH 01/44] update issue template --- .github/ISSUE_TEMPLATE.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 694bb36f..a7ceffb3 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,9 +6,11 @@ labels: '' assignees: '' --- -Questions will not be answered here!! +Questions will not be answered here!! -Please consider posting on [TTS Discourse](https://discourse.mozilla.org/c/tts) page if your issue is not directly related to TTS development (Bugs, code updates etc.). +Help is much more valuable if it's shared publicly, so that more people can benefit from it. + +Please consider posting on [TTS Discourse](https://discourse.mozilla.org/c/tts) page or matrix [chat room](https://matrix.to/#/!KTePhNahjgiVumkqca:matrix.org?via=matrix.org) if your issue is not directly related to TTS development (Bugs, code updates etc.). You can also check https://github.com/mozilla/TTS/wiki/FAQ for common questions and answers. From c96f7a2614ae336ac8b4c1657af444846719cd83 Mon Sep 17 00:00:00 2001 From: gerazov Date: Sat, 16 Jan 2021 12:21:16 +0100 Subject: [PATCH 02/44] TorchSTFT to device fix --- TTS/vocoder/layers/losses.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py index e705b1e0..d49f2725 100644 --- a/TTS/vocoder/layers/losses.py +++ b/TTS/vocoder/layers/losses.py @@ -4,13 +4,14 @@ from torch import nn from torch.nn import functional as F -class TorchSTFT(): +class TorchSTFT(nn.Module): def __init__(self, n_fft, hop_length, win_length, window='hann_window'): """ Torch based STFT operation """ + super(TorchSTFT, self).__init__() self.n_fft = n_fft self.hop_length = hop_length self.win_length = win_length - self.window = getattr(torch, window)(win_length) + self.window = nn.Parameter(getattr(torch, window)(win_length)) def __call__(self, x): # B x D x T x 2 @@ -22,7 +23,8 @@ class TorchSTFT(): center=True, pad_mode="reflect", # compatible with audio.py normalized=False, - onesided=True) + onesided=True, + return_complex=False) M = o[:, :, :, 0] P = o[:, :, :, 1] return torch.sqrt(torch.clamp(M ** 2 + P ** 2, min=1e-8)) From b2b4828f17f745856d6abb642f5cb8991da0edfe Mon Sep 17 00:00:00 2001 From: gerazov Date: Sat, 16 Jan 2021 19:46:04 +0100 Subject: [PATCH 03/44] set requires_grad=False --- TTS/vocoder/layers/losses.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py index d49f2725..1107b3c5 100644 --- a/TTS/vocoder/layers/losses.py +++ b/TTS/vocoder/layers/losses.py @@ -11,7 +11,8 @@ class TorchSTFT(nn.Module): self.n_fft = n_fft self.hop_length = hop_length self.win_length = win_length - self.window = nn.Parameter(getattr(torch, window)(win_length)) + self.window = nn.Parameter(getattr(torch, window)(win_length), + requires_grad=False) def __call__(self, x): # B x D x T x 2 From 3d30dae8f32c2631af60ccd57afb00b2e6e8e84d Mon Sep 17 00:00:00 2001 From: root Date: Wed, 20 Jan 2021 02:08:58 +0000 Subject: [PATCH 04/44] .models.json and synthesize.py update for interfacing with model manager --- .models.json | 77 +++++++++ TTS/bin/synthesize.py | 363 ++++++++++++++++++++++++++++++------------ 2 files changed, 334 insertions(+), 106 deletions(-) create mode 100644 .models.json mode change 100644 => 100755 TTS/bin/synthesize.py diff --git a/.models.json b/.models.json new file mode 100644 index 00000000..075861db --- /dev/null +++ b/.models.json @@ -0,0 +1,77 @@ +{ + "tts_models":{ + "en":{ + "ljspeech":{ + "glow-tts":{ + "description": "", + "model_file": "1NFsfhH8W8AgcfJ-BsL8CYAwQfZ5k4T-n", + "config_file": "1IAROF3yy9qTK43vG_-R67y3Py9yYbD6t", + "stats_file": null, + "commit": "" + }, + "tacotron2-DCA": { + "description": "", + "model_file": "1CFoPDQBnhfBFu2Gc0TBSJn8o-TuNKQn7", + "config_file": "1lWSscNfKet1zZSJCNirOn7v9bigUZ8C1", + "stats_file": "1qevpGRVHPmzfiRBNuugLMX62x1k7B5vK", + "commit": "" + }, + "speedy-speech-wn":{ + "description": "Speedy Speech model with wavenet decoder.", + "model_file": "1VXAwiq6N-Viq3rsSXlf43bdoi0jSvMAJ", + "config_file": "1KvZilhsNP3EumVggDcD46yd834eO5hR3", + "stats_file": "1Ju7apZ5JlgsVECcETL-GEx3DRoNzWfkR", + "commit": "77b6145" + } + } + }, + "es":{ + "mai":{ + "tacotron2-DDC":{ + "model_file": "1jZ4HvYcAXI5ZClke2iGA7qFQQJBXIovw", + "config_file": "1s7g4n-B73ChCB48AQ88_DV_8oyLth8r0", + "stats_file": "13st0CZ743v6Br5R5Qw_lH1OPQOr3M-Jv", + "commit": "" + } + } + }, + "fr":{ + "mai":{ + "tacotron2-DDC":{ + "model_file": "1qyxrrCyoXUvBG2lqVd0KqAlHj-2nZCgS", + "config_file": "1yECKeP2LI7tNv4E8yVNx1yLmCfTCpkqG", + "stats_file": "13st0CZ743v6Br5R5Qw_lH1OPQOr3M-Jv", + "commit": "" + } + } + } + }, + "vocoder_models":{ + "universal":{ + "libri-tts":{ + "wavegrad":{ + "model_file": "1r2g90JaZsfCj9dJkI9ioIU6JCFMPRqi6", + "config_file": "1POrrLf5YEpZyjvWyMccj1nGCVc94mR6s", + "stats_file": "1Vwbv4t-N1i3jXqI0bgKAhShAEO097sK0", + "commit": "ea976b0" + }, + "fullband-melgan":{ + "model_file": "1Ty5DZdOc0F7OTGj9oJThYbL5iVu_2G0K", + "config_file": "1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu", + "stats_file": "11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU", + "commit": "4132240" + } + } + }, + "en": { + "ljspeech":{ + "mulitband-melgan":{ + "model_file": "1Ty5DZdOc0F7OTGj9oJThYbL5iVu_2G0K", + "config_file": "1Rd0R_nRCrbjEdpOwq6XwZAktvugiBvmu", + "stats_file": "11oY3Tv0kQtxK_JPgxrfesa99maVXHNxU", + "commit": "ea976b0" + } + } + } + } +} \ No newline at end of file diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py old mode 100644 new mode 100755 index 25459f79..c0da6952 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -3,50 +3,140 @@ import argparse import json -# pylint: disable=redefined-outer-name, unused-argument import os +import sys import string import time +from argparse import RawTextHelpFormatter +# pylint: disable=redefined-outer-name, unused-argument +from pathlib import Path -import torch import numpy as np - -from TTS.tts.utils.generic_utils import setup_model, is_tacotron +import torch +from TTS.tts.utils.generic_utils import is_tacotron, setup_model from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols +from TTS.tts.utils.io import load_checkpoint from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config -from TTS.vocoder.utils.generic_utils import setup_generator +from TTS.utils.manage import ModelManager +from TTS.vocoder.utils.generic_utils import setup_generator, interpolate_vocoder_input -def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_fileid, speaker_embedding=None, gst_style=None): +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + + +def load_tts_model(model_path, config_path, use_cuda, speakers_json=None, speaker_idx=None): + global phonemes + global symbols + + # load the config + model_config = load_config(config_path) + + # load the audio processor + ap = AudioProcessor(**model_config.audio) + + # if the vocabulary was passed, replace the default + if 'characters' in model_config.keys(): + symbols, phonemes = make_symbols(**model_config.characters) + + # load speakers + speaker_embedding = None + speaker_embedding_dim = None + num_speakers = 0 + if speakers_json is not None: + speaker_mapping = json.load(open(speakers_json, 'r')) + num_speakers = len(speaker_mapping) + if model_config.use_external_speaker_embedding_file: + if speaker_idx is not None: + speaker_embedding = speaker_mapping[speaker_idx]['embedding'] + else: # if speaker_idx is not specificated use the first sample in speakers.json + speaker_embedding = speaker_mapping[list(speaker_mapping.keys())[0]]['embedding'] + speaker_embedding_dim = len(speaker_embedding) + + # load tts model + num_chars = len(phonemes) if model_config.use_phonemes else len(symbols) + model = setup_model(num_chars, num_speakers, model_config, speaker_embedding_dim) + model.load_checkpoint(model_config, model_path, eval=True) + if use_cuda: + model.cuda() + return model, model_config, ap, speaker_embedding + + +def load_vocoder_model(model_path, config_path, use_cuda): + vocoder_config = load_config(vocoder_config_path) + vocoder_ap = AudioProcessor(**vocoder_config['audio']) + vocoder_model = setup_generator(vocoder_config) + vocoder_model.load_checkpoint(vocoder_config, model_path, eval=True) + if use_cuda: + vocoder_model.cuda() + return vocoder_model, vocoder_config, vocoder_ap + + +def tts(model, + vocoder_model, + text, + model_config, + vocoder_config, + use_cuda, + ap, + vocoder_ap, + use_gl, + speaker_fileid, + speaker_embedding=None, + gst_style=None): t_1 = time.time() - waveform, _, _, mel_postnet_spec, _, _ = synthesis(model, text, CONFIG, use_cuda, ap, speaker_fileid, gst_style, False, CONFIG.enable_eos_bos_chars, use_gl, speaker_embedding=speaker_embedding) + waveform, _, _, mel_postnet_spec, _, _ = synthesis( + model, + text, + model_config, + use_cuda, + ap, + speaker_fileid, + gst_style, + False, + model_config.enable_eos_bos_chars, + use_gl, + speaker_embedding=speaker_embedding) - # grab spectrogram (thx to the nice guys at mozilla discourse for codesnipplet) + # grab spectrogram (thx to the nice guys at mozilla discourse for codesnippet) if args.save_spectogram: spec_file_name = args.text.replace(" ", "_")[0:10] spec_file_name = spec_file_name.translate( str.maketrans('', '', string.punctuation.replace('_', ''))) + '.npy' spec_file_name = os.path.join(args.out_path, spec_file_name) - spectrogram = torch.FloatTensor(mel_postnet_spec.T) - spectrogram = spectrogram.unsqueeze(0) + spectrogram = mel_postnet_spec.T + spectrogram = spectrogram[0] np.save(spec_file_name, spectrogram) print(" > Saving raw spectogram to " + spec_file_name) - - if CONFIG.model == "Tacotron" and not use_gl: - mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T).T + # convert linear spectrogram to melspectrogram for tacotron + if model_config.model == "Tacotron" and not use_gl: + mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T) + # run vocoder_model if not use_gl: - # Use if not computed noise schedule with tune_wavegrad - beta = np.linspace(1e-6, 0.01, 50) - vocoder_model.compute_noise_level(beta) - - # Use alternative when using output npy file from tune_wavegrad - # beta = np.load("output-tune-wavegrad.npy", allow_pickle=True).item() - # vocoder_model.compute_noise_level(beta['beta']) - + # denormalize tts output based on tts audio config + mel_postnet_spec = ap._denormalize(mel_postnet_spec.T).T device_type = "cuda" if use_cuda else "cpu" - waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).to(device_type).unsqueeze(0)) + # renormalize spectrogram based on vocoder config + vocoder_input = vocoder_ap._normalize(mel_postnet_spec.T) + # compute scale factor for possible sample rate mismatch + scale_factor = [1, vocoder_config['audio']['sample_rate'] / ap.sample_rate] + if scale_factor[1] != 1: + print(" > interpolating tts model output.") + vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input) + else: + vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) + # run vocoder model + # [1, T, C] + waveform = vocoder_model.inference(vocoder_input.to(device_type)) if use_cuda and not use_gl: waveform = waveform.cpu() if not use_gl: @@ -62,54 +152,115 @@ def tts(model, vocoder_model, text, CONFIG, use_cuda, ap, use_gl, speaker_fileid if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument('text', type=str, help='Text to generate speech.') - parser.add_argument('config_path', - type=str, - help='Path to model config file.') + parser = argparse.ArgumentParser(description='''Synthesize speech on command line.\n\n''' + + '''You can either use your trained model or choose a model from the provided list.\n''' + + ''' +Example runs: + + # list provided models + ./TTS/bin/synthesize.py --list_models + + # run a model from the list + ./TTS/bin/synthesize.py --text "Text for TTS" --model_name "//" --vocoder_name "//" --output_path + + # run your own TTS model (Using Griffin-Lim Vocoder) + ./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav + + # run your own TTS and Vocoder models + ./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav + --vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json + +''', + formatter_class=RawTextHelpFormatter) + parser.add_argument( - 'model_path', + '--list_models', + type=str2bool, + nargs='?', + const=True, + default=False, + help='list available pre-trained tts and vocoder models.' + ) + parser.add_argument( + '--text', type=str, + default=None, + help='Text to generate speech.' + ) + + # Args for running pre-trained TTS models. + parser.add_argument( + '--model_name', + type=str, + default=None, + help= + 'Name of one of the pre-trained tts models in format //' + ) + parser.add_argument( + '--vocoder_name', + type=str, + default=None, + help= + 'Name of one of the pre-trained vocoder models in format //' + ) + + # Args for running custom models + parser.add_argument( + '--config_path', + default=None, + type=str, + help='Path to model config file.' + ) + parser.add_argument( + '--model_path', + type=str, + default=None, help='Path to model file.', ) parser.add_argument( - 'out_path', + '--out_path', type=str, - help='Path to save final wav file. Wav file will be names as the text given.', + default=Path(__file__).resolve().parent, + help='Path to save final wav file. Wav file will be named as the given text.', ) - parser.add_argument('--use_cuda', - type=bool, - help='Run model on CUDA.', - default=False) + parser.add_argument( + '--use_cuda', + type=bool, + help='Run model on CUDA.', + default=False + ) parser.add_argument( '--vocoder_path', type=str, help= 'Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).', - default="", + default=None, ) - parser.add_argument('--vocoder_config_path', - type=str, - help='Path to vocoder model config file.', - default="") parser.add_argument( - '--batched_vocoder', - type=bool, - help="If True, vocoder model uses faster batch processing.", - default=True) - parser.add_argument('--speakers_json', - type=str, - help="JSON file for multi-speaker model.", - default="") - parser.add_argument( - '--speaker_fileid', + '--vocoder_config_path', type=str, - help="if CONFIG.use_external_speaker_embedding_file is true, name of speaker embedding reference file present in speakers.json, else target speaker_fileid if the model is multi-speaker.", + help='Path to vocoder model config file.', + default=None) + + # args for multi-speaker synthesis + parser.add_argument( + '--speakers_json', + type=str, + help="JSON file for multi-speaker model.", + default=None) + parser.add_argument( + '--speaker_idx', + type=str, + help="if the tts model is trained with x-vectors, then speaker_idx is a file present in speakers.json else speaker_idx is the speaker id corresponding to a speaker in the speaker embedding layer.", default=None) parser.add_argument( '--gst_style', help="Wav path file for GST stylereference.", default=None) + + # aux args parser.add_argument( '--save_spectogram', type=bool, @@ -118,86 +269,86 @@ if __name__ == "__main__": args = parser.parse_args() - # load the config - C = load_config(args.config_path) - C.forward_attn_mask = True + # load model manager + path = Path(__file__).parent / "../../.models.json" + manager = ModelManager(path) - # load the audio processor - ap = AudioProcessor(**C.audio) + model_path = None + vocoder_path = None + model = None + vocoder_model = None + vocoder_config = None + vocoder_ap = None - # if the vocabulary was passed, replace the default - if 'characters' in C.keys(): - symbols, phonemes = make_symbols(**C.characters) + # CASE1: list pre-trained TTS models + if args.list_models: + manager.list_models() + sys.exit() - speaker_embedding = None - speaker_embedding_dim = None - num_speakers = 0 + # CASE2: load pre-trained models + if args.model_name is not None: + model_path, config_path = manager.download_model(args.model_name) - # load speakers - if args.speakers_json != '': - speaker_mapping = json.load(open(args.speakers_json, 'r')) - num_speakers = len(speaker_mapping) - if C.use_external_speaker_embedding_file: - if args.speaker_fileid is not None: - speaker_embedding = speaker_mapping[args.speaker_fileid]['embedding'] - else: # if speaker_fileid is not specificated use the first sample in speakers.json - speaker_embedding = speaker_mapping[list(speaker_mapping.keys())[0]]['embedding'] - speaker_embedding_dim = len(speaker_embedding) + if args.vocoder_name is not None: + vocoder_path, vocoder_config_path = manager.download_model(args.vocoder_name) - # load the model - num_chars = len(phonemes) if C.use_phonemes else len(symbols) - model = setup_model(num_chars, num_speakers, C, speaker_embedding_dim) - cp = torch.load(args.model_path, map_location=torch.device('cpu')) - model.load_state_dict(cp['model']) - model.eval() - if args.use_cuda: - model.cuda() - if is_tacotron(C): - model.decoder.set_r(cp['r']) + # CASE3: load custome models + if args.model_path is not None: + model_path = args.model_path + config_path = args.config_path - # load vocoder model - if args.vocoder_path != "": - VC = load_config(args.vocoder_config_path) - vocoder_model = setup_generator(VC) - vocoder_model.load_state_dict(torch.load(args.vocoder_path, map_location="cpu")["model"]) - vocoder_model.remove_weight_norm() - if args.use_cuda: - vocoder_model.cuda() - vocoder_model.eval() - else: - vocoder_model = None - VC = None + if args.vocoder_path is not None: + vocoder_path = args.vocoder_path + vocoder_config_path = args.vocoder_config_path - # synthesize voice - use_griffin_lim = args.vocoder_path == "" + # RUN THE SYNTHESIS + # load models + model, model_config, ap, speaker_embedding = load_tts_model(model_path, config_path, args.use_cuda, args.speaker_idx) + if vocoder_path is not None: + vocoder_model, vocoder_config, vocoder_ap = load_vocoder_model(vocoder_path, vocoder_config_path, use_cuda=args.use_cuda) + + use_griffin_lim = vocoder_path is None print(" > Text: {}".format(args.text)) - if not C.use_external_speaker_embedding_file: - if args.speaker_fileid.isdigit(): - args.speaker_fileid = int(args.speaker_fileid) + # handle multi-speaker setting + if not model_config.use_external_speaker_embedding_file and args.speaker_idx is not None: + if args.speaker_idx.isdigit(): + args.speaker_idx = int(args.speaker_idx) else: - args.speaker_fileid = None + args.speaker_idx = None else: - args.speaker_fileid = None + args.speaker_idx = None if args.gst_style is None: - if is_tacotron(C): - gst_style = C.gst['gst_style_input'] + if 'gst' in model_config.keys() and model_config.gst['gst_style_input'] is not None: + gst_style = model_config.gst['gst_style_input'] else: gst_style = None else: # check if gst_style string is a dict, if is dict convert else use string try: gst_style = json.loads(args.gst_style) - if max(map(int, gst_style.keys())) >= C.gst['gst_style_tokens']: - raise RuntimeError("The highest value of the gst_style dictionary key must be less than the number of GST Tokens, \n Highest dictionary key value: {} \n Number of GST tokens: {}".format(max(map(int, gst_style.keys())), C.gst['gst_style_tokens'])) + if max(map(int, gst_style.keys())) >= model_config.gst['gst_style_tokens']: + raise RuntimeError("The highest value of the gst_style dictionary key must be less than the number of GST Tokens, \n Highest dictionary key value: {} \n Number of GST tokens: {}".format(max(map(int, gst_style.keys())), model_config.gst['gst_style_tokens'])) except ValueError: gst_style = args.gst_style - wav = tts(model, vocoder_model, args.text, C, args.use_cuda, ap, use_griffin_lim, args.speaker_fileid, speaker_embedding=speaker_embedding, gst_style=gst_style) + # kick it + wav = tts(model, + vocoder_model, + args.text, + model_config, + vocoder_config, + args.use_cuda, + ap, + vocoder_ap, + use_griffin_lim, + args.speaker_idx, + speaker_embedding=speaker_embedding, + gst_style=gst_style) # save the results - file_name = args.text.replace(" ", "_")[0:10] + file_name = args.text.replace(" ", "_")[0:20] file_name = file_name.translate( str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav' out_path = os.path.join(args.out_path, file_name) From 5c87753e88b681ef71688b73c68f84eea456d53c Mon Sep 17 00:00:00 2001 From: root Date: Wed, 20 Jan 2021 02:09:42 +0000 Subject: [PATCH 05/44] glow-tts fix for saving inverse weight --- TTS/tts/layers/glow_tts/glow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TTS/tts/layers/glow_tts/glow.py b/TTS/tts/layers/glow_tts/glow.py index f6385747..c8ad410d 100644 --- a/TTS/tts/layers/glow_tts/glow.py +++ b/TTS/tts/layers/glow_tts/glow.py @@ -128,8 +128,9 @@ class InvConvNear(nn.Module): return z, logdet def store_inverse(self): - self.weight_inv = torch.inverse( + weight_inv = torch.inverse( self.weight.float()).to(dtype=self.weight.dtype) + self.weight_inv = nn.Parameter(weight_inv, requires_grad=False) class CouplingBlock(nn.Module): From 1faf565e3ae2270e7c603368afa7e2234a5877f2 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 20 Jan 2021 02:10:56 +0000 Subject: [PATCH 06/44] add load_checkpoint func to tts models --- TTS/tts/models/glow_tts.py | 8 ++++++++ TTS/tts/models/speedy_speech.py | 7 +++++++ TTS/tts/models/tacotron_abstract.py | 8 ++++++++ 3 files changed, 23 insertions(+) diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index b55ba1b1..c978e4fa 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -223,3 +223,11 @@ class GlowTts(nn.Module): def store_inverse(self): self.decoder.store_inverse() + + def load_checkpoint(self, config, checkpoint_path, eval=False): + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + self.store_inverse() + assert not self.training diff --git a/TTS/tts/models/speedy_speech.py b/TTS/tts/models/speedy_speech.py index 2e7d0a5f..7f5c660e 100644 --- a/TTS/tts/models/speedy_speech.py +++ b/TTS/tts/models/speedy_speech.py @@ -190,3 +190,10 @@ class SpeedySpeech(nn.Module): y_lengths = o_dr.sum(1) o_de, attn= self._forward_decoder(o_en, o_en_dp, o_dr, x_mask, y_lengths, g=g) return o_de, attn + + def load_checkpoint(self, config, checkpoint_path, eval=False): + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + assert not self.training diff --git a/TTS/tts/models/tacotron_abstract.py b/TTS/tts/models/tacotron_abstract.py index 54c46be2..0a63b871 100644 --- a/TTS/tts/models/tacotron_abstract.py +++ b/TTS/tts/models/tacotron_abstract.py @@ -121,6 +121,14 @@ class TacotronAbstract(ABC, nn.Module): def inference(self): pass + def load_checkpoint(self, config, checkpoint_path, eval=False): + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + self.decoder.set_r(state['r']) + if eval: + self.eval() + assert not self.training + ############################# # COMMON COMPUTE FUNCTIONS ############################# From 563bc921d884996113d4311b040e2ff5f93dec87 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 20 Jan 2021 02:11:24 +0000 Subject: [PATCH 07/44] optional verbose for audio.py init --- TTS/utils/audio.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index 9d25aeb7..786f57b0 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -35,9 +35,9 @@ class AudioProcessor(object): trim_db=60, do_sound_norm=False, stats_path=None, + verbose=True, **_): - print(" > Setting up Audio Processor...") # setup class attributed self.sample_rate = sample_rate self.resample = resample @@ -73,8 +73,10 @@ class AudioProcessor(object): assert min_level_db != 0.0, " [!] min_level_db is 0" assert self.win_length <= self.fft_size, " [!] win_length cannot be larger than fft_size" members = vars(self) - for key, value in members.items(): - print(" | > {}:{}".format(key, value)) + if verbose: + print(" > Setting up Audio Processor...") + for key, value in members.items(): + print(" | > {}:{}".format(key, value)) # create spectrogram utils self.mel_basis = self._build_mel_basis() self.inv_mel_basis = np.linalg.pinv(self._build_mel_basis()) From ea39715305b2b835757bc45b7bc1e64d82516a48 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 20 Jan 2021 02:11:55 +0000 Subject: [PATCH 08/44] read_json_with_comments --- TTS/utils/io.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/TTS/utils/io.py b/TTS/utils/io.py index 2c5c8e49..46abf1c8 100644 --- a/TTS/utils/io.py +++ b/TTS/utils/io.py @@ -20,6 +20,16 @@ class AttrDict(dict): self.__dict__ = self +def read_json_with_comments(json_path): + # fallback to json + with open(json_path, "r") as f: + input_str = f.read() + # handle comments + input_str = re.sub(r'\\\n', '', input_str) + input_str = re.sub(r'//.*\n', '\n', input_str) + data = json.loads(input_str) + return data + def load_config(config_path: str) -> AttrDict: """Load config files and discard comments @@ -33,14 +43,7 @@ def load_config(config_path: str) -> AttrDict: with open(config_path, "r") as f: data = yaml.safe_load(f) else: - # fallback to json - with open(config_path, "r") as f: - input_str = f.read() - # handle comments - input_str = re.sub(r'\\\n', '', input_str) - input_str = re.sub(r'//.*\n', '\n', input_str) - data = json.loads(input_str) - + data = read_json_with_comments(config_path) config.update(data) return config From ca3743539a85f753f64405ba77179ee2047297df Mon Sep 17 00:00:00 2001 From: root Date: Wed, 20 Jan 2021 02:12:29 +0000 Subject: [PATCH 09/44] load_checkpoint func for vocoder models --- TTS/vocoder/models/melgan_generator.py | 8 ++++++++ .../models/parallel_wavegan_generator.py | 10 ++++++++++ TTS/vocoder/models/wavegrad.py | 19 +++++++++++++++++++ TTS/vocoder/utils/generic_utils.py | 1 + 4 files changed, 38 insertions(+) diff --git a/TTS/vocoder/models/melgan_generator.py b/TTS/vocoder/models/melgan_generator.py index 9ab98cef..e5fd46eb 100644 --- a/TTS/vocoder/models/melgan_generator.py +++ b/TTS/vocoder/models/melgan_generator.py @@ -95,3 +95,11 @@ class MelganGenerator(nn.Module): nn.utils.remove_weight_norm(layer) except ValueError: layer.remove_weight_norm() + + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + assert not self.training + self.remove_weight_norm() diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py index d703f327..f5ed7712 100644 --- a/TTS/vocoder/models/parallel_wavegan_generator.py +++ b/TTS/vocoder/models/parallel_wavegan_generator.py @@ -39,6 +39,7 @@ class ParallelWaveganGenerator(torch.nn.Module): self.upsample_factors = upsample_factors self.upsample_scale = np.prod(upsample_factors) self.inference_padding = inference_padding + self.use_weight_norm = use_weight_norm # check the number of layers and stacks assert num_res_blocks % stacks == 0 @@ -156,3 +157,12 @@ class ParallelWaveganGenerator(torch.nn.Module): def receptive_field_size(self): return self._get_receptive_field_size(self.layers, self.stacks, self.kernel_size) + + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + assert not self.training + if self.use_weight_norm: + self.remove_weight_norm() diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py index da491771..bb9d04b8 100644 --- a/TTS/vocoder/models/wavegrad.py +++ b/TTS/vocoder/models/wavegrad.py @@ -175,3 +175,22 @@ class Wavegrad(nn.Module): self.x_conv = weight_norm(self.x_conv) self.out_conv = weight_norm(self.out_conv) self.y_conv = weight_norm(self.y_conv) + + + def load_checkpoint(self, config, checkpoint_path, eval=False): + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + assert not self.training + if self.use_weight_norm: + self.remove_weight_norm() + betas = np.linspace(config['test_noise_schedule']['min_val'], + config['test_noise_schedule']['max_val'], + config['test_noise_schedule']['num_steps']) + self.compute_noise_level(betas) + else: + betas = np.linspace(config['train_noise_schedule']['min_val'], + config['train_noise_schedule']['max_val'], + config['train_noise_schedule']['num_steps']) + self.compute_noise_level(betas) diff --git a/TTS/vocoder/utils/generic_utils.py b/TTS/vocoder/utils/generic_utils.py index d6e2e13b..d17c84d4 100644 --- a/TTS/vocoder/utils/generic_utils.py +++ b/TTS/vocoder/utils/generic_utils.py @@ -1,4 +1,5 @@ import re +import torch import importlib import numpy as np from matplotlib import pyplot as plt From 5bd7238153c67aa79a51a7353fb7acce06c80f88 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 20 Jan 2021 02:13:01 +0000 Subject: [PATCH 10/44] interpolate spectrogram in vocoder generic utils for matching sample rates --- TTS/vocoder/utils/generic_utils.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/TTS/vocoder/utils/generic_utils.py b/TTS/vocoder/utils/generic_utils.py index d17c84d4..478d9c00 100644 --- a/TTS/vocoder/utils/generic_utils.py +++ b/TTS/vocoder/utils/generic_utils.py @@ -7,6 +7,29 @@ from matplotlib import pyplot as plt from TTS.tts.utils.visual import plot_spectrogram +def interpolate_vocoder_input(scale_factor, spec): + """Interpolate spectrogram by the scale factor. + It is mainly used to match the sampling rates of + the tts and vocoder models. + + Args: + scale_factor (float): scale factor to interpolate the spectrogram + spec (np.array): spectrogram to be interpolated + + Returns: + torch.tensor: interpolated spectrogram. + """ + print(" > before interpolation :", spec.shape) + spec = torch.tensor(spec).unsqueeze(0).unsqueeze(0) + spec = torch.nn.functional.interpolate(spec, + scale_factor=scale_factor, + recompute_scale_factor=True, + mode='bilinear', + align_corners=False).squeeze(0) + print(" > after interpolation :", spec.shape) + return spec + + def plot_results(y_hat, y, ap, global_step, name_prefix): """ Plot vocoder model results """ From 1bc8fbbd3cb95941e15c5006f78f0f985cc48ce2 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 20 Jan 2021 02:14:18 +0000 Subject: [PATCH 11/44] set eval mode whe nloading models --- TTS/tts/utils/io.py | 4 +++- TTS/vocoder/utils/io.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/TTS/tts/utils/io.py b/TTS/tts/utils/io.py index 830529a3..63e04283 100644 --- a/TTS/tts/utils/io.py +++ b/TTS/tts/utils/io.py @@ -7,7 +7,7 @@ from TTS.utils.io import RenamingUnpickler -def load_checkpoint(model, checkpoint_path, amp=None, use_cuda=False): +def load_checkpoint(model, checkpoint_path, amp=None, use_cuda=False, eval=False): """Load ```TTS.tts.models``` checkpoints. Args: @@ -33,6 +33,8 @@ def load_checkpoint(model, checkpoint_path, amp=None, use_cuda=False): if hasattr(model.decoder, 'r'): model.decoder.set_r(state['r']) print(" > Model r: ", state['r']) + if eval: + model.eval() return model, state diff --git a/TTS/vocoder/utils/io.py b/TTS/vocoder/utils/io.py index c33d2cb9..5c42dfca 100644 --- a/TTS/vocoder/utils/io.py +++ b/TTS/vocoder/utils/io.py @@ -6,7 +6,7 @@ import pickle as pickle_tts from TTS.utils.io import RenamingUnpickler -def load_checkpoint(model, checkpoint_path, use_cuda=False): +def load_checkpoint(model, checkpoint_path, use_cuda=False, eval=False): try: state = torch.load(checkpoint_path, map_location=torch.device('cpu')) except ModuleNotFoundError: @@ -15,6 +15,8 @@ def load_checkpoint(model, checkpoint_path, use_cuda=False): model.load_state_dict(state['model']) if use_cuda: model.cuda() + if eval: + model.eval() return model, state From e414582be658778861b37f55f9c1bdc57d7a8651 Mon Sep 17 00:00:00 2001 From: Thorsten Mueller Date: Wed, 20 Jan 2021 21:56:40 +0100 Subject: [PATCH 12/44] Added option for server ui details page. --- TTS/server/server.py | 18 ++++ TTS/server/templates/details.html | 131 ++++++++++++++++++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 TTS/server/templates/details.html diff --git a/TTS/server/server.py b/TTS/server/server.py index bd23ea9c..f2412bb8 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -4,6 +4,7 @@ import os from flask import Flask, request, render_template, send_file from TTS.server.synthesizer import Synthesizer +from TTS.utils.io import load_config def create_argparser(): @@ -23,6 +24,7 @@ def create_argparser(): parser.add_argument('--port', type=int, default=5002, help='port to listen on.') parser.add_argument('--use_cuda', type=convert_boolean, default=False, help='true to use CUDA.') parser.add_argument('--debug', type=convert_boolean, default=False, help='true to enable Flask debug mode.') + parser.add_argument('--show_details', type=convert_boolean, default=False, help='Generate model detail page.') return parser @@ -69,6 +71,22 @@ app = Flask(__name__) def index(): return render_template('index.html') +@app.route('/details') +def details(): + if args.tts_config is not None and os.path.isfile(args.tts_config): + taco2_config = load_config(args.tts_config) + + if args.vocoder_config is not None and os.path.isfile(args.vocoder_config): + vocoder_config = load_config(args.vocoder_config) + else: + vocoder_config = None + + return render_template('details.html', + show_details=args.show_details + , taco2_config=taco2_config + , vocoder_config=vocoder_config + , args=args.__dict__ + ) @app.route('/api/tts', methods=['GET']) def tts(): diff --git a/TTS/server/templates/details.html b/TTS/server/templates/details.html new file mode 100644 index 00000000..2db60657 --- /dev/null +++ b/TTS/server/templates/details.html @@ -0,0 +1,131 @@ + + + + + + + + + + + TTS engine + + + + + + + + + + Fork me on GitHub + + {% if show_details == true %} + +
+ Model details +
+ +
+
+ CLI arguments: + + + + + + + {% for key, value in args.items() %} + + + + + + + {% endfor %} +
CLI key Value
{{ key }}{{ value }}
+
+

+ +
+ + {% if taco2_config != None %} + +
+ Tacotron2 model config: + + + + + + + + + {% for key, value in taco2_config.items() %} + + + + + + + {% endfor %} + +
Key Value
{{ key }}{{ value }}
+
+ + {% endif %} + +

+ + + +
+ {% if vocoder_config != None %} +
+ Vocoder model config: + + + + + + + + + {% for key, value in vocoder_config.items() %} + + + + + + + {% endfor %} + + +
Key Value
{{ key }}{{ value }}
+
+ {% endif %} +

+ + {% else %} +
+ Please start server with --show_details=true to see details. +
+ + {% endif %} + + + + \ No newline at end of file From 8cfed633981a8687838d5060513e5e37c54c36da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Thu, 21 Jan 2021 15:29:21 +0100 Subject: [PATCH 13/44] update install instructions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0d5e6991..723568e1 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ You can also help us implement more models. Some TTS related work can be found [ ## Install TTS TTS supports **python >= 3.6**. -```python setup.py install``` or ```python setup.py develop``` to keep your installation in your working directory. +```pip install .``` or ```pip install -e .``` to keep your installation in your working directory. ## Directory Structure ``` From 6b6e989fd2d11df5644d86141e558bb44b06e61c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Thu, 21 Jan 2021 15:29:46 +0100 Subject: [PATCH 14/44] update server readme --- TTS/bin/synthesize.py | 5 ++--- TTS/server/README.md | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index c0da6952..a4d70324 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -106,7 +106,6 @@ def tts(model, model_config.enable_eos_bos_chars, use_gl, speaker_embedding=speaker_embedding) - # grab spectrogram (thx to the nice guys at mozilla discourse for codesnippet) if args.save_spectogram: spec_file_name = args.text.replace(" ", "_")[0:10] @@ -157,7 +156,7 @@ if __name__ == "__main__": '''You can either use your trained model or choose a model from the provided list.\n''' ''' -Example runs: + Example runs: # list provided models ./TTS/bin/synthesize.py --list_models @@ -172,7 +171,7 @@ Example runs: ./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav --vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json -''', + ''', formatter_class=RawTextHelpFormatter) parser.add_argument( diff --git a/TTS/server/README.md b/TTS/server/README.md index 3c65c961..cd885839 100644 --- a/TTS/server/README.md +++ b/TTS/server/README.md @@ -9,6 +9,20 @@ Instructions below are based on a Ubuntu 18.04 machine, but it should be simple ##### Using server.py If you have the environment set already for TTS, then you can directly call ```server.py```. +Examples runs: + +List officially released models. +```python TTS/server/server.py --list_models ``` + +Run the server with the official models. +```python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/mulitband-melgan``` + +Run the server with the official models on a GPU. +```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/mulitband-melgan --use_cuda True``` + +Run the server with a custom models. +```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth.tar --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth.tar --vocoder_config /path/to/vocoder/config.json``` + ##### Using .whl 1. apt-get install -y espeak libsndfile1 python3-venv 2. python3 -m venv /tmp/venv @@ -21,6 +35,8 @@ You can now open http://localhost:5002 in a browser #### Running with nginx/uwsgi: +**Note:** This method uses an old TTS model, so quality might be low. + 1. apt-get install -y uwsgi uwsgi-plugin-python3 nginx espeak libsndfile1 python3-venv 2. python3 -m venv /tmp/venv 3. source /tmp/venv/bin/activate From 007a4d7139a378d752eadfcc284278c0dc65c5e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Thu, 21 Jan 2021 15:30:16 +0100 Subject: [PATCH 15/44] remove 3rd paty wavernn support from server.py and add ModelManager arguments --- TTS/server/conf.json | 4 ---- TTS/server/server.py | 40 +++++++++++++++++++++++++--------------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/TTS/server/conf.json b/TTS/server/conf.json index 00045365..32e475cf 100644 --- a/TTS/server/conf.json +++ b/TTS/server/conf.json @@ -5,10 +5,6 @@ "tts_speakers": null, // json file listing speaker ids. null if no speaker embedding. "vocoder_config":null, "vocoder_file": null, - "wavernn_lib_path": null, // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis. - "wavernn_path":null, // wavernn model root path - "wavernn_file":null, // wavernn checkpoint file name - "wavernn_config": null, // wavernn config file "is_wavernn_batched":true, "port": 5002, "use_cuda": true, diff --git a/TTS/server/server.py b/TTS/server/server.py index bd23ea9c..ed98d35e 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -1,9 +1,12 @@ #!flask/bin/python import argparse import os +import sys +from pathlib import Path -from flask import Flask, request, render_template, send_file +from flask import Flask, render_template, request, send_file from TTS.server.synthesizer import Synthesizer +from TTS.utils.manage import ModelManager def create_argparser(): @@ -11,21 +14,19 @@ def create_argparser(): return x.lower() in ['true', '1', 'yes'] parser = argparse.ArgumentParser() - parser.add_argument('--tts_checkpoint', type=str, help='path to TTS checkpoint file') - parser.add_argument('--tts_config', type=str, help='path to TTS config.json file') + parser.add_argument('--list_models', type=convert_boolean, nargs='?', const=True, default=False, help='list available pre-trained tts and vocoder models.') + parser.add_argument('--model_name', type=str, help='name of one of the released tts models.') + parser.add_argument('--vocoder_name', type=str, help='name of one of the released vocoder models.') + parser.add_argument('--tts_checkpoint', type=str, help='path to custom tts checkpoint file') + parser.add_argument('--tts_config', type=str, help='path to custom tts config.json file') parser.add_argument('--tts_speakers', type=str, help='path to JSON file containing speaker ids, if speaker ids are used in the model') - parser.add_argument('--wavernn_lib_path', type=str, default=None, help='path to WaveRNN project folder to be imported. If this is not passed, model uses Griffin-Lim for synthesis.') - parser.add_argument('--wavernn_checkpoint', type=str, default=None, help='path to WaveRNN checkpoint file.') - parser.add_argument('--wavernn_config', type=str, default=None, help='path to WaveRNN config file.') - parser.add_argument('--is_wavernn_batched', type=convert_boolean, default=False, help='true to use batched WaveRNN.') - parser.add_argument('--vocoder_config', type=str, default=None, help='path to TTS.vocoder config file.') - parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to TTS.vocoder checkpoint file.') + parser.add_argument('--vocoder_config', type=str, default=None, help='path to vocoder config file.') + parser.add_argument('--vocoder_checkpoint', type=str, default=None, help='path to vocoder checkpoint file.') parser.add_argument('--port', type=int, default=5002, help='port to listen on.') parser.add_argument('--use_cuda', type=convert_boolean, default=False, help='true to use CUDA.') parser.add_argument('--debug', type=convert_boolean, default=False, help='true to enable Flask debug mode.') return parser - synthesizer = None embedded_models_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model') @@ -45,6 +46,20 @@ wavernn_config_file = os.path.join(embedded_wavernn_folder, 'config.json') args = create_argparser().parse_args() +path = Path(__file__).parent / "../../.models.json" +manager = ModelManager(path) + +if args.list_models: + manager.list_models() + sys.exit() + +# set models by the released models +if args.model_name is not None: + tts_checkpoint_file, tts_config_file = manager.download_model(args.model_name) + +if args.vocoder_name is not None: + vocoder_checkpoint_file, vocoder_config_file = manager.download_model(args.vocoder_name) + # If these were not specified in the CLI args, use default values with embedded model files if not args.tts_checkpoint and os.path.isfile(tts_checkpoint_file): args.tts_checkpoint = tts_checkpoint_file @@ -56,11 +71,6 @@ if not args.vocoder_checkpoint and os.path.isfile(vocoder_checkpoint_file): if not args.vocoder_config and os.path.isfile(vocoder_config_file): args.vocoder_config = vocoder_config_file -if not args.wavernn_checkpoint and os.path.isfile(wavernn_checkpoint_file): - args.wavernn_checkpoint = wavernn_checkpoint_file -if not args.wavernn_config and os.path.isfile(wavernn_config_file): - args.wavernn_config = wavernn_config_file - synthesizer = Synthesizer(args) app = Flask(__name__) From 50fee59a2c60ca711b7959baa1711d586aa49758 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Thu, 21 Jan 2021 15:30:49 +0100 Subject: [PATCH 16/44] update synthesizer.py for better interfacing to different models --- TTS/server/synthesizer.py | 171 +++++++++++++++----------------------- 1 file changed, 68 insertions(+), 103 deletions(-) diff --git a/TTS/server/synthesizer.py b/TTS/server/synthesizer.py index 9906291a..a76badd6 100644 --- a/TTS/server/synthesizer.py +++ b/TTS/server/synthesizer.py @@ -10,7 +10,7 @@ from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config from TTS.tts.utils.generic_utils import setup_model from TTS.tts.utils.speakers import load_speaker_mapping -from TTS.vocoder.utils.generic_utils import setup_generator +from TTS.vocoder.utils.generic_utils import setup_generator, interpolate_vocoder_input # pylint: disable=unused-wildcard-import # pylint: disable=wildcard-import from TTS.tts.utils.synthesis import * @@ -22,8 +22,9 @@ class Synthesizer(object): def __init__(self, config): self.wavernn = None self.vocoder_model = None + self.num_speakers = 0 + self.tts_speakers = None self.config = config - print(config) self.seg = self.get_segmenter("en") self.use_cuda = self.config.use_cuda if self.use_cuda: @@ -32,22 +33,36 @@ class Synthesizer(object): self.config.use_cuda) if self.config.vocoder_checkpoint: self.load_vocoder(self.config.vocoder_checkpoint, self.config.vocoder_config, self.config.use_cuda) - if self.config.wavernn_lib_path: - self.load_wavernn(self.config.wavernn_lib_path, self.config.wavernn_checkpoint, - self.config.wavernn_config, self.config.use_cuda) @staticmethod def get_segmenter(lang): return pysbd.Segmenter(language=lang, clean=True) + def load_speakers(self): + # load speakers + if self.model_config.use_speaker_embedding is not None: + self.tts_speakers = load_speaker_mapping(self.config.tts_speakers) + self.num_speakers = len(self.tts_speakers) + else: + self.num_speakers = 0 + # set external speaker embedding + if self.tts_config.use_external_speaker_embedding_file: + speaker_embedding = self.tts_speakers[list(self.tts_speakers.keys())[0]]['embedding'] + self.speaker_embedding_dim = len(speaker_embedding) + + def init_speaker(self, speaker_idx): + # load speakers + speaker_embedding = None + if hasattr(self, 'tts_speakers') and speaker_idx is not None: + assert speaker_idx < len(self.tts_speakers), f" [!] speaker_idx is out of the range. {speaker_idx} vs {len(self.tts_speakers)}" + if self.tts_config.use_external_speaker_embedding_file: + speaker_embedding = self.tts_speakers[speaker_idx]['embedding'] + return speaker_embedding + def load_tts(self, tts_checkpoint, tts_config, use_cuda): # pylint: disable=global-statement global symbols, phonemes - print(" > Loading TTS model ...") - print(" | > model config: ", tts_config) - print(" | > checkpoint file: ", tts_checkpoint) - self.tts_config = load_config(tts_config) self.use_phonemes = self.tts_config.use_phonemes self.ap = AudioProcessor(**self.tts_config.audio) @@ -59,127 +74,77 @@ class Synthesizer(object): self.input_size = len(phonemes) else: self.input_size = len(symbols) - # TODO: fix this for multi-speaker model - load speakers - if self.config.tts_speakers is not None: - self.tts_speakers = load_speaker_mapping(self.config.tts_speakers) - num_speakers = len(self.tts_speakers) - else: - num_speakers = 0 - self.tts_model = setup_model(self.input_size, num_speakers=num_speakers, c=self.tts_config) - # load model state - cp = torch.load(tts_checkpoint, map_location=torch.device('cpu')) - # load the model - self.tts_model.load_state_dict(cp['model']) + + self.tts_model = setup_model(self.input_size, num_speakers=self.num_speakers, c=self.tts_config) + self.tts_model.load_checkpoint(tts_config, tts_checkpoint, eval=True) if use_cuda: self.tts_model.cuda() - self.tts_model.eval() - self.tts_model.decoder.max_decoder_steps = 3000 - if 'r' in cp: - self.tts_model.decoder.set_r(cp['r']) - print(f" > model reduction factor: {cp['r']}") def load_vocoder(self, model_file, model_config, use_cuda): self.vocoder_config = load_config(model_config) + self.vocoder_ap = AudioProcessor(**self.vocoder_config['audio']) self.vocoder_model = setup_generator(self.vocoder_config) - self.vocoder_model.load_state_dict(torch.load(model_file, map_location="cpu")["model"]) - self.vocoder_model.remove_weight_norm() - self.vocoder_model.inference_padding = 0 - self.vocoder_config = load_config(model_config) - + self.vocoder_model.load_checkpoint(self.vocoder_config, model_file, eval=True) if use_cuda: self.vocoder_model.cuda() - self.vocoder_model.eval() - - def load_wavernn(self, lib_path, model_file, model_config, use_cuda): - # TODO: set a function in wavernn code base for model setup and call it here. - sys.path.append(lib_path) # set this if WaveRNN is not installed globally - #pylint: disable=import-outside-toplevel - from WaveRNN.models.wavernn import Model - print(" > Loading WaveRNN model ...") - print(" | > model config: ", model_config) - print(" | > model file: ", model_file) - self.wavernn_config = load_config(model_config) - # This is the default architecture we use for our models. - # You might need to update it - self.wavernn = Model( - rnn_dims=512, - fc_dims=512, - mode=self.wavernn_config.mode, - mulaw=self.wavernn_config.mulaw, - pad=self.wavernn_config.pad, - use_aux_net=self.wavernn_config.use_aux_net, - use_upsample_net=self.wavernn_config.use_upsample_net, - upsample_factors=self.wavernn_config.upsample_factors, - feat_dims=80, - compute_dims=128, - res_out_dims=128, - res_blocks=10, - hop_length=self.ap.hop_length, - sample_rate=self.ap.sample_rate, - ).cuda() - - check = torch.load(model_file, map_location="cpu") - self.wavernn.load_state_dict(check['model']) - if use_cuda: - self.wavernn.cuda() - self.wavernn.eval() def save_wav(self, wav, path): - # wav *= 32767 / max(1e-8, np.max(np.abs(wav))) wav = np.array(wav) self.ap.save_wav(wav, path) def split_into_sentences(self, text): return self.seg.segment(text) - def tts(self, text, speaker_id=None): + def tts(self, text, speaker_idx=None): start_time = time.time() wavs = [] sens = self.split_into_sentences(text) + print(" > Text splitted to sentences.") print(sens) - speaker_id = id_to_torch(speaker_id) - if speaker_id is not None and self.use_cuda: - speaker_id = speaker_id.cuda() + + speaker_embedding = self.init_speaker(speaker_idx) + use_gl = not hasattr(self, 'vocoder_model') for sen in sens: - # preprocess the given text - inputs = text_to_seqvec(sen, self.tts_config) - inputs = numpy_to_torch(inputs, torch.long, cuda=self.use_cuda) - inputs = inputs.unsqueeze(0) # synthesize voice - _, postnet_output, _, _ = run_model_torch(self.tts_model, inputs, self.tts_config, False, speaker_id, None) - if self.vocoder_model: - # use native vocoder model - vocoder_input = postnet_output[0].transpose(0, 1).unsqueeze(0) - wav = self.vocoder_model.inference(vocoder_input) - if self.use_cuda: - wav = wav.cpu().numpy() + waveform, _, _, mel_postnet_spec, _, _ = synthesis( + self.tts_model, + sen, + self.tts_config, + self.use_cuda, + self.ap, + speaker_idx, + None, + False, + self.tts_config.enable_eos_bos_chars, + use_gl, + speaker_embedding=speaker_embedding) + if not use_gl: + # denormalize tts output based on tts audio config + mel_postnet_spec = self.ap._denormalize(mel_postnet_spec.T).T + device_type = "cuda" if self.use_cuda else "cpu" + # renormalize spectrogram based on vocoder config + vocoder_input = self.vocoder_ap._normalize(mel_postnet_spec.T) + # compute scale factor for possible sample rate mismatch + scale_factor = [1, self.vocoder_config['audio']['sample_rate'] / self.ap.sample_rate] + if scale_factor[1] != 1: + print(" > interpolating tts model output.") + vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input) else: - wav = wav.numpy() - wav = wav.flatten() - elif self.wavernn: - # use 3rd paty wavernn - vocoder_input = None - if self.tts_config.model == "Tacotron": - vocoder_input = torch.FloatTensor(self.ap.out_linear_to_mel(linear_spec=postnet_output.T).T).T.unsqueeze(0) - else: - vocoder_input = postnet_output[0].transpose(0, 1).unsqueeze(0) - if self.use_cuda: - vocoder_input.cuda() - wav = self.wavernn.generate(vocoder_input, batched=self.config.is_wavernn_batched, target=11000, overlap=550) - else: - # use GL - if self.use_cuda: - postnet_output = postnet_output[0].cpu() - else: - postnet_output = postnet_output[0] - postnet_output = postnet_output.numpy() - wav = inv_spectrogram(postnet_output, self.ap, self.tts_config) + vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) + # run vocoder model + # [1, T, C] + waveform = self.vocoder_model.inference(vocoder_input.to(device_type)) + if self.use_cuda and not use_gl: + waveform = waveform.cpu() + if not use_gl: + waveform = waveform.numpy() + waveform = waveform.squeeze() # trim silence - wav = trim_silence(wav, self.ap) + waveform = trim_silence(waveform, self.ap) - wavs += list(wav) + wavs += list(waveform) wavs += [0] * 10000 out = io.BytesIO() From 9addfabc430ba6956d4f796d08fc2bd6fd10eac5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Thu, 21 Jan 2021 15:31:13 +0100 Subject: [PATCH 17/44] wavernn load_checkpoint function --- TTS/vocoder/models/wavernn.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py index 8aa84d34..bded4cd8 100644 --- a/TTS/vocoder/models/wavernn.py +++ b/TTS/vocoder/models/wavernn.py @@ -499,3 +499,10 @@ class WaveRNN(nn.Module): unfolded[start:end] += y[i] return unfolded + + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument + state = torch.load(checkpoint_path, map_location=torch.device('cpu')) + self.load_state_dict(state['model']) + if eval: + self.eval() + assert not self.training From 0ab2eb26648ed8f672051e6983ac3c32d20bb202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Thu, 21 Jan 2021 15:54:33 +0100 Subject: [PATCH 18/44] use synthesizer in both synthesize.py and server.pu --- TTS/bin/synthesize.py | 179 ++++----------------------- TTS/server/server.py | 12 +- TTS/{server => utils}/synthesizer.py | 38 +++--- 3 files changed, 59 insertions(+), 170 deletions(-) rename TTS/{server => utils}/synthesizer.py (83%) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index a4d70324..6cd28bc8 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -20,6 +20,7 @@ from TTS.tts.utils.io import load_checkpoint from TTS.utils.audio import AudioProcessor from TTS.utils.io import load_config from TTS.utils.manage import ModelManager +from TTS.utils.synthesizer import Synthesizer from TTS.vocoder.utils.generic_utils import setup_generator, interpolate_vocoder_input @@ -34,121 +35,6 @@ def str2bool(v): raise argparse.ArgumentTypeError('Boolean value expected.') -def load_tts_model(model_path, config_path, use_cuda, speakers_json=None, speaker_idx=None): - global phonemes - global symbols - - # load the config - model_config = load_config(config_path) - - # load the audio processor - ap = AudioProcessor(**model_config.audio) - - # if the vocabulary was passed, replace the default - if 'characters' in model_config.keys(): - symbols, phonemes = make_symbols(**model_config.characters) - - # load speakers - speaker_embedding = None - speaker_embedding_dim = None - num_speakers = 0 - if speakers_json is not None: - speaker_mapping = json.load(open(speakers_json, 'r')) - num_speakers = len(speaker_mapping) - if model_config.use_external_speaker_embedding_file: - if speaker_idx is not None: - speaker_embedding = speaker_mapping[speaker_idx]['embedding'] - else: # if speaker_idx is not specificated use the first sample in speakers.json - speaker_embedding = speaker_mapping[list(speaker_mapping.keys())[0]]['embedding'] - speaker_embedding_dim = len(speaker_embedding) - - # load tts model - num_chars = len(phonemes) if model_config.use_phonemes else len(symbols) - model = setup_model(num_chars, num_speakers, model_config, speaker_embedding_dim) - model.load_checkpoint(model_config, model_path, eval=True) - if use_cuda: - model.cuda() - return model, model_config, ap, speaker_embedding - - -def load_vocoder_model(model_path, config_path, use_cuda): - vocoder_config = load_config(vocoder_config_path) - vocoder_ap = AudioProcessor(**vocoder_config['audio']) - vocoder_model = setup_generator(vocoder_config) - vocoder_model.load_checkpoint(vocoder_config, model_path, eval=True) - if use_cuda: - vocoder_model.cuda() - return vocoder_model, vocoder_config, vocoder_ap - - -def tts(model, - vocoder_model, - text, - model_config, - vocoder_config, - use_cuda, - ap, - vocoder_ap, - use_gl, - speaker_fileid, - speaker_embedding=None, - gst_style=None): - t_1 = time.time() - waveform, _, _, mel_postnet_spec, _, _ = synthesis( - model, - text, - model_config, - use_cuda, - ap, - speaker_fileid, - gst_style, - False, - model_config.enable_eos_bos_chars, - use_gl, - speaker_embedding=speaker_embedding) - # grab spectrogram (thx to the nice guys at mozilla discourse for codesnippet) - if args.save_spectogram: - spec_file_name = args.text.replace(" ", "_")[0:10] - spec_file_name = spec_file_name.translate( - str.maketrans('', '', string.punctuation.replace('_', ''))) + '.npy' - spec_file_name = os.path.join(args.out_path, spec_file_name) - spectrogram = mel_postnet_spec.T - spectrogram = spectrogram[0] - np.save(spec_file_name, spectrogram) - print(" > Saving raw spectogram to " + spec_file_name) - # convert linear spectrogram to melspectrogram for tacotron - if model_config.model == "Tacotron" and not use_gl: - mel_postnet_spec = ap.out_linear_to_mel(mel_postnet_spec.T) - # run vocoder_model - if not use_gl: - # denormalize tts output based on tts audio config - mel_postnet_spec = ap._denormalize(mel_postnet_spec.T).T - device_type = "cuda" if use_cuda else "cpu" - # renormalize spectrogram based on vocoder config - vocoder_input = vocoder_ap._normalize(mel_postnet_spec.T) - # compute scale factor for possible sample rate mismatch - scale_factor = [1, vocoder_config['audio']['sample_rate'] / ap.sample_rate] - if scale_factor[1] != 1: - print(" > interpolating tts model output.") - vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input) - else: - vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) - # run vocoder model - # [1, T, C] - waveform = vocoder_model.inference(vocoder_input.to(device_type)) - if use_cuda and not use_gl: - waveform = waveform.cpu() - if not use_gl: - waveform = waveform.numpy() - waveform = waveform.squeeze() - rtf = (time.time() - t_1) / (len(waveform) / ap.sample_rate) - tps = (time.time() - t_1) / len(waveform) - print(" > Run-time: {}".format(time.time() - t_1)) - print(" > Real-time factor: {}".format(rtf)) - print(" > Time per step: {}".format(tps)) - return waveform - - if __name__ == "__main__": parser = argparse.ArgumentParser(description='''Synthesize speech on command line.\n\n''' @@ -273,7 +159,9 @@ if __name__ == "__main__": manager = ModelManager(path) model_path = None + config_path = None vocoder_path = None + vocoder_config_path = None model = None vocoder_model = None vocoder_config = None @@ -302,49 +190,36 @@ if __name__ == "__main__": # RUN THE SYNTHESIS # load models - model, model_config, ap, speaker_embedding = load_tts_model(model_path, config_path, args.use_cuda, args.speaker_idx) - if vocoder_path is not None: - vocoder_model, vocoder_config, vocoder_ap = load_vocoder_model(vocoder_path, vocoder_config_path, use_cuda=args.use_cuda) + synthesizer = Synthesizer(model_path, config_path, vocoder_path, vocoder_config_path, args.use_cuda) use_griffin_lim = vocoder_path is None print(" > Text: {}".format(args.text)) - # handle multi-speaker setting - if not model_config.use_external_speaker_embedding_file and args.speaker_idx is not None: - if args.speaker_idx.isdigit(): - args.speaker_idx = int(args.speaker_idx) - else: - args.speaker_idx = None - else: - args.speaker_idx = None + # # handle multi-speaker setting + # if not model_config.use_external_speaker_embedding_file and args.speaker_idx is not None: + # if args.speaker_idx.isdigit(): + # args.speaker_idx = int(args.speaker_idx) + # else: + # args.speaker_idx = None + # else: + # args.speaker_idx = None - if args.gst_style is None: - if 'gst' in model_config.keys() and model_config.gst['gst_style_input'] is not None: - gst_style = model_config.gst['gst_style_input'] - else: - gst_style = None - else: - # check if gst_style string is a dict, if is dict convert else use string - try: - gst_style = json.loads(args.gst_style) - if max(map(int, gst_style.keys())) >= model_config.gst['gst_style_tokens']: - raise RuntimeError("The highest value of the gst_style dictionary key must be less than the number of GST Tokens, \n Highest dictionary key value: {} \n Number of GST tokens: {}".format(max(map(int, gst_style.keys())), model_config.gst['gst_style_tokens'])) - except ValueError: - gst_style = args.gst_style + # if args.gst_style is None: + # if 'gst' in model_config.keys() and model_config.gst['gst_style_input'] is not None: + # gst_style = model_config.gst['gst_style_input'] + # else: + # gst_style = None + # else: + # # check if gst_style string is a dict, if is dict convert else use string + # try: + # gst_style = json.loads(args.gst_style) + # if max(map(int, gst_style.keys())) >= model_config.gst['gst_style_tokens']: + # raise RuntimeError("The highest value of the gst_style dictionary key must be less than the number of GST Tokens, \n Highest dictionary key value: {} \n Number of GST tokens: {}".format(max(map(int, gst_style.keys())), model_config.gst['gst_style_tokens'])) + # except ValueError: + # gst_style = args.gst_style # kick it - wav = tts(model, - vocoder_model, - args.text, - model_config, - vocoder_config, - args.use_cuda, - ap, - vocoder_ap, - use_griffin_lim, - args.speaker_idx, - speaker_embedding=speaker_embedding, - gst_style=gst_style) + wav = synthesizer.tts(args.text) # save the results file_name = args.text.replace(" ", "_")[0:20] @@ -352,4 +227,4 @@ if __name__ == "__main__": str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav' out_path = os.path.join(args.out_path, file_name) print(" > Saving output to {}".format(out_path)) - ap.save_wav(wav, out_path) + synthesizer.save_wav(wav, out_path) diff --git a/TTS/server/server.py b/TTS/server/server.py index ed98d35e..a89f4021 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -2,10 +2,11 @@ import argparse import os import sys +import io from pathlib import Path from flask import Flask, render_template, request, send_file -from TTS.server.synthesizer import Synthesizer +from TTS.utils.synthesizer import Synthesizer from TTS.utils.manage import ModelManager @@ -71,10 +72,11 @@ if not args.vocoder_checkpoint and os.path.isfile(vocoder_checkpoint_file): if not args.vocoder_config and os.path.isfile(vocoder_config_file): args.vocoder_config = vocoder_config_file -synthesizer = Synthesizer(args) +synthesizer = Synthesizer(args.tts_checkpoint, args.tts_config, args.vocoder_checkpoint, args.vocoder_config, args.use_cuda) app = Flask(__name__) + @app.route('/') def index(): return render_template('index.html') @@ -84,8 +86,10 @@ def index(): def tts(): text = request.args.get('text') print(" > Model input: {}".format(text)) - data = synthesizer.tts(text) - return send_file(data, mimetype='audio/wav') + wavs = synthesizer.tts(text) + out = io.BytesIO() + synthesizer.save_wav(wavs, out) + return send_file(out, mimetype='audio/wav') def main(): diff --git a/TTS/server/synthesizer.py b/TTS/utils/synthesizer.py similarity index 83% rename from TTS/server/synthesizer.py rename to TTS/utils/synthesizer.py index a76badd6..f7ca5f44 100644 --- a/TTS/server/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -1,5 +1,3 @@ -import io -import sys import time import numpy as np @@ -19,20 +17,35 @@ from TTS.tts.utils.text import make_symbols, phonemes, symbols class Synthesizer(object): - def __init__(self, config): + def __init__(self, tts_checkpoint, tts_config, vocoder_checkpoint, vocoder_config, use_cuda): + """Encapsulation of tts and vocoder models for inference. + + TODO: handle multi-speaker and GST inference. + + Args: + tts_checkpoint (str): path to the tts model file. + tts_config (str): path to the tts config file. + vocoder_checkpoint (str): path to the vocoder model file. + vocoder_config (str): path to the vocoder config file. + use_cuda (bool): enable/disable cuda. + """ + self.tts_checkpoint = tts_checkpoint + self.tts_config = tts_config + self.vocoder_checkpoint = vocoder_checkpoint + self.vocoder_config = vocoder_config + self.use_cuda = use_cuda self.wavernn = None self.vocoder_model = None self.num_speakers = 0 self.tts_speakers = None - self.config = config self.seg = self.get_segmenter("en") - self.use_cuda = self.config.use_cuda + self.use_cuda = use_cuda if self.use_cuda: assert torch.cuda.is_available(), "CUDA is not availabe on this machine." - self.load_tts(self.config.tts_checkpoint, self.config.tts_config, - self.config.use_cuda) - if self.config.vocoder_checkpoint: - self.load_vocoder(self.config.vocoder_checkpoint, self.config.vocoder_config, self.config.use_cuda) + self.load_tts(tts_checkpoint, tts_config, + use_cuda) + if vocoder_checkpoint: + self.load_vocoder(vocoder_checkpoint, vocoder_config, use_cuda) @staticmethod def get_segmenter(lang): @@ -41,7 +54,7 @@ class Synthesizer(object): def load_speakers(self): # load speakers if self.model_config.use_speaker_embedding is not None: - self.tts_speakers = load_speaker_mapping(self.config.tts_speakers) + self.tts_speakers = load_speaker_mapping(self.tts_config.tts_speakers_json) self.num_speakers = len(self.tts_speakers) else: self.num_speakers = 0 @@ -147,12 +160,9 @@ class Synthesizer(object): wavs += list(waveform) wavs += [0] * 10000 - out = io.BytesIO() - self.save_wav(wavs, out) - # compute stats process_time = time.time() - start_time audio_time = len(wavs) / self.tts_config.audio['sample_rate'] print(f" > Processing time: {process_time}") print(f" > Real-time factor: {process_time / audio_time}") - return out + return wavs From 26540d507db03d29acd686fa64159d52c4070d0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Thu, 21 Jan 2021 15:57:49 +0100 Subject: [PATCH 19/44] add pyproject.toml --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..fc0aca47 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[build-system] +requires = ["setuptools", "wheel", "Cython", "numpy>=1.16.0"] \ No newline at end of file From f251dc8c0e6a0e6cdcacaca17c5590726d29b59a Mon Sep 17 00:00:00 2001 From: Alexander Korolev Date: Thu, 21 Jan 2021 21:16:30 +0100 Subject: [PATCH 20/44] Update train_tacotron.py When attempting to fine-tune a model with "prenet_type": "bn" that was originally trained with "prenet_type": "original", a RuntimeError is thrown that stops the training. By catching the RuntimeError, the required layers can be partially restored and the training will continue without any problems. --- TTS/bin/train_tacotron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/bin/train_tacotron.py b/TTS/bin/train_tacotron.py index 38017b92..ccb35a7c 100644 --- a/TTS/bin/train_tacotron.py +++ b/TTS/bin/train_tacotron.py @@ -549,7 +549,7 @@ def main(args): # pylint: disable=redefined-outer-name scaler.load_state_dict(checkpoint["scaler"]) if c.reinit_layers: raise RuntimeError - except KeyError: + except (KeyError, RuntimeError): print(" > Partial model initialization.") model_dict = model.state_dict() model_dict = set_init_dict(model_dict, checkpoint['model'], c) From 32d21545ace69def11b013a5332cad013508b6eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Fri, 22 Jan 2021 02:30:32 +0100 Subject: [PATCH 21/44] README update --- README.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 723568e1..0738f2e9 100644 --- a/README.md +++ b/README.md @@ -36,8 +36,10 @@ Please use our dedicated channels for questions and discussion. Help is much mor | Type | Links | | ------------------------------- | --------------------------------------- | | 👩🏾‍🏫 **Tutorials and Examples** | [TTS/Wiki](https://github.com/mozilla/TTS/wiki/TTS-Notebooks-and-Tutorials) | -| 🤖 **Released Models** | [TTS/Wiki](https://github.com/mozilla/TTS/wiki/Released-Models)| +| 🚀 **Released Models** | [TTS/Wiki](https://github.com/mozilla/TTS/wiki/Released-Models)| | 💻 **Docker Image** | [Repository by @synesthesiam](https://github.com/synesthesiam/docker-mozillatts)| +| 🖥️ **Demo Server** | [TTS/server](https://github.com/mozilla/TTS/tree/master/TTS/server)| +| 🤖 **Running TTS on Terminal** | [TTS/README.md](https://github.com/mozilla/TTS#example-synthesizing-speech-on-terminal-using-the-released-models)| ## 🥇 TTS Performance

@@ -137,6 +139,23 @@ Some of the public datasets that we successfully applied TTS: - [LibriTTS](https://openslr.org/60/) - [Spanish](https://drive.google.com/file/d/1Sm_zyBo67XHkiFhcRSQ4YaHPYM0slO_e/view?usp=sharing) - thx! @carlfm01 +## Example: Synthesizing Speech on Terminal Using the Released Models. + +TTS provides a CLI interface for synthesizing speech using pre-trained models. You can either use your own model or the release models under the TTS project. + +Listing released TTS models. +```./TTS/bin/synthesize.py --list_models``` + +Run a tts and a vocoder model from the released model list. (Simply copy and paste the full model names from the list as arguments for the command below.) +```./TTS/bin/synthesize.py --text "Text for TTS" --model_name "///" --vocoder_name "///" --output_path``` + +Run your own TTS model (Using Griffin-Lim Vocoder) +```./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav``` + +Run your own TTS and Vocoder models +```./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav --vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json``` + + ## Example: Training and Fine-tuning LJ-Speech Dataset Here you can find a [CoLab](https://gist.github.com/erogol/97516ad65b44dbddb8cd694953187c5b) notebook for a hands-on example, training LJSpeech. Or you can manually follow the guideline below. From c990b3a59c4731cbde3b9fdd95e987ceb3051745 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Fri, 22 Jan 2021 02:32:35 +0100 Subject: [PATCH 22/44] linter fixes and test fixes --- TTS/bin/synthesize.py | 20 ++++--------------- TTS/tts/models/glow_tts.py | 2 +- TTS/tts/models/speedy_speech.py | 4 ++-- TTS/tts/models/tacotron_abstract.py | 2 +- TTS/tts/utils/visual.py | 2 +- TTS/utils/synthesizer.py | 19 +++++++++--------- TTS/vocoder/models/melgan_generator.py | 2 +- .../models/parallel_wavegan_generator.py | 2 +- TTS/vocoder/models/wavegrad.py | 2 +- TTS/vocoder/models/wavernn.py | 2 +- TTS/vocoder/utils/generic_utils.py | 2 +- tests/test_demo_server.py | 4 ++-- 12 files changed, 26 insertions(+), 37 deletions(-) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 6cd28bc8..64d3298b 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -2,26 +2,15 @@ # -*- coding: utf-8 -*- import argparse -import json import os import sys import string -import time from argparse import RawTextHelpFormatter # pylint: disable=redefined-outer-name, unused-argument from pathlib import Path -import numpy as np -import torch -from TTS.tts.utils.generic_utils import is_tacotron, setup_model -from TTS.tts.utils.synthesis import synthesis -from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols -from TTS.tts.utils.io import load_checkpoint -from TTS.utils.audio import AudioProcessor -from TTS.utils.io import load_config from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer -from TTS.vocoder.utils.generic_utils import setup_generator, interpolate_vocoder_input def str2bool(v): @@ -29,17 +18,16 @@ def str2bool(v): return v if v.lower() in ('yes', 'true', 't', 'y', '1'): return True - elif v.lower() in ('no', 'false', 'f', 'n', '0'): + if v.lower() in ('no', 'false', 'f', 'n', '0'): return False - else: - raise argparse.ArgumentTypeError('Boolean value expected.') + raise argparse.ArgumentTypeError('Boolean value expected.') if __name__ == "__main__": - + # pylint: disable=bad-continuation parser = argparse.ArgumentParser(description='''Synthesize speech on command line.\n\n''' - '''You can either use your trained model or choose a model from the provided list.\n''' + '''You can either use your trained model or choose a model from the provided list.\n'''\ ''' Example runs: diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index c978e4fa..2f9b6f9b 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -224,7 +224,7 @@ class GlowTts(nn.Module): def store_inverse(self): self.decoder.store_inverse() - def load_checkpoint(self, config, checkpoint_path, eval=False): + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin state = torch.load(checkpoint_path, map_location=torch.device('cpu')) self.load_state_dict(state['model']) if eval: diff --git a/TTS/tts/models/speedy_speech.py b/TTS/tts/models/speedy_speech.py index 7f5c660e..93496d59 100644 --- a/TTS/tts/models/speedy_speech.py +++ b/TTS/tts/models/speedy_speech.py @@ -188,10 +188,10 @@ class SpeedySpeech(nn.Module): o_dr_log = self.duration_predictor(o_en_dp.detach(), x_mask) o_dr = self.format_durations(o_dr_log, x_mask).squeeze(1) y_lengths = o_dr.sum(1) - o_de, attn= self._forward_decoder(o_en, o_en_dp, o_dr, x_mask, y_lengths, g=g) + o_de, attn = self._forward_decoder(o_en, o_en_dp, o_dr, x_mask, y_lengths, g=g) return o_de, attn - def load_checkpoint(self, config, checkpoint_path, eval=False): + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin state = torch.load(checkpoint_path, map_location=torch.device('cpu')) self.load_state_dict(state['model']) if eval: diff --git a/TTS/tts/models/tacotron_abstract.py b/TTS/tts/models/tacotron_abstract.py index 0a63b871..10953269 100644 --- a/TTS/tts/models/tacotron_abstract.py +++ b/TTS/tts/models/tacotron_abstract.py @@ -121,7 +121,7 @@ class TacotronAbstract(ABC, nn.Module): def inference(self): pass - def load_checkpoint(self, config, checkpoint_path, eval=False): + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin state = torch.load(checkpoint_path, map_location=torch.device('cpu')) self.load_state_dict(state['model']) self.decoder.set_r(state['r']) diff --git a/TTS/tts/utils/visual.py b/TTS/tts/utils/visual.py index 17cba648..e5bb5891 100644 --- a/TTS/tts/utils/visual.py +++ b/TTS/tts/utils/visual.py @@ -50,7 +50,7 @@ def plot_spectrogram(spectrogram, spectrogram_ = spectrogram_.astype( np.float32) if spectrogram_.dtype == np.float16 else spectrogram_ if ap is not None: - spectrogram_ = ap._denormalize(spectrogram_) # pylint: disable=protected-access + spectrogram_ = ap.denormalize(spectrogram_) # pylint: disable=protected-access fig = plt.figure(figsize=fig_size) plt.imshow(spectrogram_, aspect="auto", origin="lower") plt.colorbar() diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index f7ca5f44..615e0d1d 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -17,7 +17,7 @@ from TTS.tts.utils.text import make_symbols, phonemes, symbols class Synthesizer(object): - def __init__(self, tts_checkpoint, tts_config, vocoder_checkpoint, vocoder_config, use_cuda): + def __init__(self, tts_checkpoint, tts_config, vocoder_checkpoint=None, vocoder_config=None, use_cuda=False): """Encapsulation of tts and vocoder models for inference. TODO: handle multi-speaker and GST inference. @@ -25,9 +25,9 @@ class Synthesizer(object): Args: tts_checkpoint (str): path to the tts model file. tts_config (str): path to the tts config file. - vocoder_checkpoint (str): path to the vocoder model file. - vocoder_config (str): path to the vocoder config file. - use_cuda (bool): enable/disable cuda. + vocoder_checkpoint (str, optional): path to the vocoder model file. Defaults to None. + vocoder_config (str, optional): path to the vocoder config file. Defaults to None. + use_cuda (bool, optional): enable/disable cuda. Defaults to False. """ self.tts_checkpoint = tts_checkpoint self.tts_config = tts_config @@ -38,6 +38,7 @@ class Synthesizer(object): self.vocoder_model = None self.num_speakers = 0 self.tts_speakers = None + self.speaker_embedding_dim = None self.seg = self.get_segmenter("en") self.use_cuda = use_cuda if self.use_cuda: @@ -116,7 +117,7 @@ class Synthesizer(object): print(sens) speaker_embedding = self.init_speaker(speaker_idx) - use_gl = not hasattr(self, 'vocoder_model') + use_gl = self.vocoder_model is None for sen in sens: # synthesize voice @@ -134,17 +135,17 @@ class Synthesizer(object): speaker_embedding=speaker_embedding) if not use_gl: # denormalize tts output based on tts audio config - mel_postnet_spec = self.ap._denormalize(mel_postnet_spec.T).T + mel_postnet_spec = self.ap.denormalize(mel_postnet_spec.T).T device_type = "cuda" if self.use_cuda else "cpu" # renormalize spectrogram based on vocoder config - vocoder_input = self.vocoder_ap._normalize(mel_postnet_spec.T) + vocoder_input = self.vocoder_ap.normalize(mel_postnet_spec.T) # compute scale factor for possible sample rate mismatch - scale_factor = [1, self.vocoder_config['audio']['sample_rate'] / self.ap.sample_rate] + scale_factor = [1, self.vocoder_config['audio']['sample_rate'] / self.ap.sample_rate] if scale_factor[1] != 1: print(" > interpolating tts model output.") vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input) else: - vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) + vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) # pylint: disable=not-callable # run vocoder model # [1, T, C] waveform = self.vocoder_model.inference(vocoder_input.to(device_type)) diff --git a/TTS/vocoder/models/melgan_generator.py b/TTS/vocoder/models/melgan_generator.py index e5fd46eb..3070eac7 100644 --- a/TTS/vocoder/models/melgan_generator.py +++ b/TTS/vocoder/models/melgan_generator.py @@ -96,7 +96,7 @@ class MelganGenerator(nn.Module): except ValueError: layer.remove_weight_norm() - def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin state = torch.load(checkpoint_path, map_location=torch.device('cpu')) self.load_state_dict(state['model']) if eval: diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py index f5ed7712..1d1bcdcb 100644 --- a/TTS/vocoder/models/parallel_wavegan_generator.py +++ b/TTS/vocoder/models/parallel_wavegan_generator.py @@ -158,7 +158,7 @@ class ParallelWaveganGenerator(torch.nn.Module): return self._get_receptive_field_size(self.layers, self.stacks, self.kernel_size) - def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin state = torch.load(checkpoint_path, map_location=torch.device('cpu')) self.load_state_dict(state['model']) if eval: diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py index bb9d04b8..f4a5faa3 100644 --- a/TTS/vocoder/models/wavegrad.py +++ b/TTS/vocoder/models/wavegrad.py @@ -177,7 +177,7 @@ class Wavegrad(nn.Module): self.y_conv = weight_norm(self.y_conv) - def load_checkpoint(self, config, checkpoint_path, eval=False): + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin state = torch.load(checkpoint_path, map_location=torch.device('cpu')) self.load_state_dict(state['model']) if eval: diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py index bded4cd8..cb03deb3 100644 --- a/TTS/vocoder/models/wavernn.py +++ b/TTS/vocoder/models/wavernn.py @@ -500,7 +500,7 @@ class WaveRNN(nn.Module): return unfolded - def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument + def load_checkpoint(self, config, checkpoint_path, eval=False): # pylint: disable=unused-argument, redefined-builtin state = torch.load(checkpoint_path, map_location=torch.device('cpu')) self.load_state_dict(state['model']) if eval: diff --git a/TTS/vocoder/utils/generic_utils.py b/TTS/vocoder/utils/generic_utils.py index 478d9c00..fb943a37 100644 --- a/TTS/vocoder/utils/generic_utils.py +++ b/TTS/vocoder/utils/generic_utils.py @@ -20,7 +20,7 @@ def interpolate_vocoder_input(scale_factor, spec): torch.tensor: interpolated spectrogram. """ print(" > before interpolation :", spec.shape) - spec = torch.tensor(spec).unsqueeze(0).unsqueeze(0) + spec = torch.tensor(spec).unsqueeze(0).unsqueeze(0) # pylint: disable=not-callable spec = torch.nn.functional.interpolate(spec, scale_factor=scale_factor, recompute_scale_factor=True, diff --git a/tests/test_demo_server.py b/tests/test_demo_server.py index 0576430c..bccff55d 100644 --- a/tests/test_demo_server.py +++ b/tests/test_demo_server.py @@ -2,7 +2,7 @@ import os import unittest from tests import get_tests_input_path, get_tests_output_path -from TTS.server.synthesizer import Synthesizer +from TTS.utils.synthesizer import Synthesizer from TTS.tts.utils.generic_utils import setup_model from TTS.tts.utils.io import save_checkpoint from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols @@ -29,7 +29,7 @@ class DemoServerTest(unittest.TestCase): tts_root_path = get_tests_output_path() config['tts_checkpoint'] = os.path.join(tts_root_path, config['tts_checkpoint']) config['tts_config'] = os.path.join(tts_root_path, config['tts_config']) - synthesizer = Synthesizer(config) + synthesizer = Synthesizer(config['tts_checkpoint'], config['tts_config'], None, None) synthesizer.tts("Better this test works!!") def test_split_into_sentences(self): From ca8ad9c21eafa39bfbba5aaf3540c841f67a5583 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Fri, 22 Jan 2021 02:33:19 +0100 Subject: [PATCH 23/44] rename audio._normalize to audio.normalize --- TTS/utils/audio.py | 16 ++++++------ ...DDC_TTS_and_MultiBand_MelGAN_Example.ipynb | 2 +- .../DDC_TTS_and_ParallelWaveGAN_Example.ipynb | 2 +- .../dataset_analysis/CheckSpectrograms.ipynb | 6 ++--- setup.py | 14 +++++----- tests/test_audio.py | 26 +++++++++---------- 6 files changed, 33 insertions(+), 33 deletions(-) diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index 786f57b0..93a5880f 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -109,7 +109,7 @@ class AudioProcessor(object): return hop_length, win_length ### normalization ### - def _normalize(self, S): + def normalize(self, S): """Put values in [0, self.max_norm] or [-self.max_norm, self.max_norm]""" #pylint: disable=no-else-return S = S.copy() @@ -138,7 +138,7 @@ class AudioProcessor(object): else: return S - def _denormalize(self, S): + def denormalize(self, S): """denormalize values""" #pylint: disable=no-else-return S_denorm = S.copy() @@ -223,7 +223,7 @@ class AudioProcessor(object): else: D = self._stft(y) S = self._amp_to_db(np.abs(D)) - return self._normalize(S) + return self.normalize(S) def melspectrogram(self, y): if self.preemphasis != 0: @@ -231,11 +231,11 @@ class AudioProcessor(object): else: D = self._stft(y) S = self._amp_to_db(self._linear_to_mel(np.abs(D))) - return self._normalize(S) + return self.normalize(S) def inv_spectrogram(self, spectrogram): """Converts spectrogram to waveform using librosa""" - S = self._denormalize(spectrogram) + S = self.denormalize(spectrogram) S = self._db_to_amp(S) # Reconstruct phase if self.preemphasis != 0: @@ -244,7 +244,7 @@ class AudioProcessor(object): def inv_melspectrogram(self, mel_spectrogram): '''Converts melspectrogram to waveform using librosa''' - D = self._denormalize(mel_spectrogram) + D = self.denormalize(mel_spectrogram) S = self._db_to_amp(D) S = self._mel_to_linear(S) # Convert back to linear if self.preemphasis != 0: @@ -252,11 +252,11 @@ class AudioProcessor(object): return self._griffin_lim(S**self.power) def out_linear_to_mel(self, linear_spec): - S = self._denormalize(linear_spec) + S = self.denormalize(linear_spec) S = self._db_to_amp(S) S = self._linear_to_mel(np.abs(S)) S = self._amp_to_db(S) - mel = self._normalize(S) + mel = self.normalize(S) return mel ### STFT and ISTFT ### diff --git a/notebooks/DDC_TTS_and_MultiBand_MelGAN_Example.ipynb b/notebooks/DDC_TTS_and_MultiBand_MelGAN_Example.ipynb index dc582830..17403771 100644 --- a/notebooks/DDC_TTS_and_MultiBand_MelGAN_Example.ipynb +++ b/notebooks/DDC_TTS_and_MultiBand_MelGAN_Example.ipynb @@ -112,7 +112,7 @@ " t_1 = time.time()\n", " waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, style_wav=None,\n", " truncated=False, enable_eos_bos_chars=CONFIG.enable_eos_bos_chars)\n", - " # mel_postnet_spec = ap._denormalize(mel_postnet_spec.T)\n", + " # mel_postnet_spec = ap.denormalize(mel_postnet_spec.T)\n", " if not use_gl:\n", " waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0))\n", " waveform = waveform.flatten()\n", diff --git a/notebooks/DDC_TTS_and_ParallelWaveGAN_Example.ipynb b/notebooks/DDC_TTS_and_ParallelWaveGAN_Example.ipynb index 00de8bbd..35a257e0 100644 --- a/notebooks/DDC_TTS_and_ParallelWaveGAN_Example.ipynb +++ b/notebooks/DDC_TTS_and_ParallelWaveGAN_Example.ipynb @@ -112,7 +112,7 @@ " t_1 = time.time()\n", " waveform, alignment, mel_spec, mel_postnet_spec, stop_tokens, inputs = synthesis(model, text, CONFIG, use_cuda, ap, speaker_id, style_wav=None,\n", " truncated=False, enable_eos_bos_chars=CONFIG.enable_eos_bos_chars)\n", - " # mel_postnet_spec = ap._denormalize(mel_postnet_spec.T)\n", + " # mel_postnet_spec = ap.denormalize(mel_postnet_spec.T)\n", " if not use_gl:\n", " waveform = vocoder_model.inference(torch.FloatTensor(mel_postnet_spec.T).unsqueeze(0))\n", " waveform = waveform.flatten()\n", diff --git a/notebooks/dataset_analysis/CheckSpectrograms.ipynb b/notebooks/dataset_analysis/CheckSpectrograms.ipynb index a1f2fab8..4d4ba57a 100644 --- a/notebooks/dataset_analysis/CheckSpectrograms.ipynb +++ b/notebooks/dataset_analysis/CheckSpectrograms.ipynb @@ -230,8 +230,8 @@ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mspec\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAP\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspectrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mwav\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Max:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Min:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Mean:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mplot_spectrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAP\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m;\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Projects/TTS/tts/utils/audio.py\u001b[0m in \u001b[0;36mspectrogram\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0mD\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stft\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_amp_to_db\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mD\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_normalize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 221\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmelspectrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Projects/TTS/tts/utils/audio.py\u001b[0m in \u001b[0;36m_normalize\u001b[0;34m(self, S)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear_scaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m' [!] Mean-Var stats does not match the given feature dimensions.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0;31m# range normalization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m-=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mref_level_db\u001b[0m \u001b[0;31m# discard certain range of DB assuming it is air noise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/TTS/tts/utils/audio.py\u001b[0m in \u001b[0;36mspectrogram\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0mD\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stft\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 219\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_amp_to_db\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mabs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mD\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 220\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnormalize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 221\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmelspectrogram\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Projects/TTS/tts/utils/audio.py\u001b[0m in \u001b[0;36mnormalize\u001b[0;34m(self, S)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear_scaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m' [!] Mean-Var stats does not match the given feature dimensions.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0;31m# range normalization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0mS\u001b[0m \u001b[0;34m-=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mref_level_db\u001b[0m \u001b[0;31m# discard certain range of DB assuming it is air noise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mRuntimeError\u001b[0m: [!] Mean-Var stats does not match the given feature dimensions." ] } @@ -314,7 +314,7 @@ " exec(set_val_cmd)\n", " wav = AP.load_wav(file)\n", " spec = AP.spectrogram(wav)\n", - " spec_norm = AP._denormalize(spec.T)\n", + " spec_norm = AP.denormalize(spec.T)\n", " plt.subplot(len(values), 2, 2*idx + 1)\n", " plt.imshow(spec_norm.T, aspect=\"auto\", origin=\"lower\")\n", " # plt.colorbar()\n", diff --git a/setup.py b/setup.py index d55b2c12..7653b6bd 100644 --- a/setup.py +++ b/setup.py @@ -106,10 +106,10 @@ def pip_install(package_name): reqs_from_file = open('requirements.txt').readlines() -reqs_without_tf = [r for r in reqs_from_file if not r.startswith('tensorflow')] -tf_req = [r for r in reqs_from_file if r.startswith('tensorflow')] +# reqs_without_tf = [r for r in reqs_from_file if not r.startswith('tensorflow')] +# tf_req = [r for r in reqs_from_file if r.startswith('tensorflow')] -requirements = {'install_requires': reqs_without_tf, 'pip_install': tf_req} +# requirements = {'install_requires': reqs_without_tf, 'pip_install': tf_req} setup( name='TTS', @@ -132,7 +132,7 @@ setup( 'build_py': build_py, 'develop': develop, }, - install_requires=requirements['install_requires'], + install_requires=reqs_from_file, python_requires='>=3.6.0', classifiers=[ "Programming Language :: Python", @@ -149,6 +149,6 @@ setup( # for some reason having tensorflow in 'install_requires' # breaks some of the dependencies. -if 'bdist_wheel' not in unknown_args: - for module in requirements['pip_install']: - pip_install(module) +# if 'bdist_wheel' not in unknown_args: +# for module in requirements['pip_install']: +# pip_install(module) diff --git a/tests/test_audio.py b/tests/test_audio.py index dcc511e2..c00cd8f8 100644 --- a/tests/test_audio.py +++ b/tests/test_audio.py @@ -67,21 +67,21 @@ class TestAudio(unittest.TestCase): self.ap.symmetric_norm = False self.ap.clip_norm = False self.ap.max_norm = 4.0 - x_norm = self.ap._normalize(x) + x_norm = self.ap.normalize(x) print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") assert (x_old - x).sum() == 0 # check value range assert x_norm.max() <= self.ap.max_norm + 1, x_norm.max() assert x_norm.min() >= 0 - 1, x_norm.min() # check denorm. - x_ = self.ap._denormalize(x_norm) + x_ = self.ap.denormalize(x_norm) assert (x - x_).sum() < 1e-3, (x - x_).mean() self.ap.signal_norm = True self.ap.symmetric_norm = False self.ap.clip_norm = True self.ap.max_norm = 4.0 - x_norm = self.ap._normalize(x) + x_norm = self.ap.normalize(x) print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") @@ -90,14 +90,14 @@ class TestAudio(unittest.TestCase): assert x_norm.max() <= self.ap.max_norm, x_norm.max() assert x_norm.min() >= 0, x_norm.min() # check denorm. - x_ = self.ap._denormalize(x_norm) + x_ = self.ap.denormalize(x_norm) assert (x - x_).sum() < 1e-3, (x - x_).mean() self.ap.signal_norm = True self.ap.symmetric_norm = True self.ap.clip_norm = False self.ap.max_norm = 4.0 - x_norm = self.ap._normalize(x) + x_norm = self.ap.normalize(x) print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") @@ -107,14 +107,14 @@ class TestAudio(unittest.TestCase): assert x_norm.min() >= -self.ap.max_norm - 2, x_norm.min() #pylint: disable=invalid-unary-operand-type assert x_norm.min() <= 0, x_norm.min() # check denorm. - x_ = self.ap._denormalize(x_norm) + x_ = self.ap.denormalize(x_norm) assert (x - x_).sum() < 1e-3, (x - x_).mean() self.ap.signal_norm = True self.ap.symmetric_norm = True self.ap.clip_norm = True self.ap.max_norm = 4.0 - x_norm = self.ap._normalize(x) + x_norm = self.ap.normalize(x) print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") @@ -124,26 +124,26 @@ class TestAudio(unittest.TestCase): assert x_norm.min() >= -self.ap.max_norm, x_norm.min() #pylint: disable=invalid-unary-operand-type assert x_norm.min() <= 0, x_norm.min() # check denorm. - x_ = self.ap._denormalize(x_norm) + x_ = self.ap.denormalize(x_norm) assert (x - x_).sum() < 1e-3, (x - x_).mean() self.ap.signal_norm = True self.ap.symmetric_norm = False self.ap.max_norm = 1.0 - x_norm = self.ap._normalize(x) + x_norm = self.ap.normalize(x) print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") assert (x_old - x).sum() == 0 assert x_norm.max() <= self.ap.max_norm, x_norm.max() assert x_norm.min() >= 0, x_norm.min() - x_ = self.ap._denormalize(x_norm) + x_ = self.ap.denormalize(x_norm) assert (x - x_).sum() < 1e-3 self.ap.signal_norm = True self.ap.symmetric_norm = True self.ap.max_norm = 1.0 - x_norm = self.ap._normalize(x) + x_norm = self.ap.normalize(x) print(f" > MaxNorm: {self.ap.max_norm}, ClipNorm:{self.ap.clip_norm}, SymmetricNorm:{self.ap.symmetric_norm}, SignalNorm:{self.ap.signal_norm} Range-> {x_norm.max()} -- {x_norm.min()}") @@ -151,7 +151,7 @@ class TestAudio(unittest.TestCase): assert x_norm.max() <= self.ap.max_norm, x_norm.max() assert x_norm.min() >= -self.ap.max_norm, x_norm.min() #pylint: disable=invalid-unary-operand-type assert x_norm.min() < 0, x_norm.min() - x_ = self.ap._denormalize(x_norm) + x_ = self.ap.denormalize(x_norm) assert (x - x_).sum() < 1e-3 def test_scaler(self): @@ -172,5 +172,5 @@ class TestAudio(unittest.TestCase): wav = self.ap.load_wav(WAV_FILE) mel_reference = self.ap.melspectrogram(wav) mel_norm = ap.melspectrogram(wav) - mel_denorm = ap._denormalize(mel_norm) + mel_denorm = ap.denormalize(mel_norm) assert abs(mel_reference - mel_denorm).max() < 1e-4 From ca647cf222b071483145e2e2504350780a3e9173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Fri, 22 Jan 2021 02:35:43 +0100 Subject: [PATCH 24/44] Model Manager to download released models --- TTS/utils/manage.py | 103 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 TTS/utils/manage.py diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py new file mode 100644 index 00000000..68bc92d2 --- /dev/null +++ b/TTS/utils/manage.py @@ -0,0 +1,103 @@ +import json +import gdown +from pathlib import Path +import os + +from TTS.utils.io import load_config + + +class ModelManager(object): + """Manage TTS models defined in .models.json. + It provides an interface to list and download + models defines in '.model.json' + + Models are downloaded under '.TTS' folder in the user's + home path. + + Args: + models_file (str): path to .model.json + """ + def __init__(self, models_file): + super().__init__() + self.output_prefix = os.path.join(str(Path.home()), '.tts') + self.url_prefix = "https://drive.google.com/uc?id=" + self.models_dict = None + self.read_models_file(models_file) + + def read_models_file(self, file_path): + """Read .models.json as a dict + + Args: + file_path (str): path to .models.json. + """ + with open(file_path) as json_file: + self.models_dict = json.load(json_file) + + def list_langs(self): + print(" Name format: type/language") + for model_type in self.models_dict: + for lang in self.models_dict[model_type]: + print(f" >: {model_type}/{lang} ") + + def list_datasets(self): + print(" Name format: type/language/dataset") + for model_type in self.models_dict: + for lang in self.models_dict[model_type]: + for dataset in self.models_dict[model_type][lang]: + print(f" >: {model_type}/{lang}/{dataset}") + + def list_models(self): + print(" Name format: type/language/dataset/model") + for model_type in self.models_dict: + for lang in self.models_dict[model_type]: + for dataset in self.models_dict[model_type][lang]: + for model in self.models_dict[model_type][lang][dataset]: + print(f" >: {model_type}/{lang}/{dataset}/{model} ") + + def download_model(self, model_name): + """Download model files given the full model name. + Model name is in the format + 'type/language/dataset/model' + e.g. 'tts_model/en/ljspeech/tacotron' + + Args: + model_name (str): model name as explained above. + + TODO: support multi-speaker models + """ + # fetch model info from the dict + type, lang, dataset, model = model_name.split("/") + model_full_name = f"{type}--{lang}--{dataset}--{model}" + model_item = self.models_dict[type][lang][dataset][model] + # set the model specific output path + output_path = os.path.join(self.output_prefix, model_full_name) + output_model_path = os.path.join(output_path, "model_file.pth.tar") + output_config_path = os.path.join(output_path, "config.json") + if os.path.exists(output_path): + print(f" > {model_name} is already downloaded.") + else: + os.makedirs(output_path, exist_ok=True) + print(f" > Downloading model to {output_path}") + output_stats_path = None + # download files to the output path + self._download_file(model_item['model_file'], output_model_path) + self._download_file(model_item['config_file'], output_config_path) + if model_item['stats_file'] is not None and len(model_item['stats_file']) > 1: + output_stats_path = os.path.join(output_path, 'scale_stats.npy') + self._download_file(model_item['stats_file'], output_stats_path) + # set scale stats path in config.json + config_path = output_config_path + config = load_config(config_path) + config["audio"]['stats_path'] = output_stats_path + with open(config_path, "w") as jf: + json.dump(config, jf) + return output_model_path, output_config_path + + def _download_file(self, id, output): + gdown.download(f"{self.url_prefix}{id}", output=output) + + + + + + From 5fb611ef40256e1cc441a1a2c7969fae0755cb33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Fri, 22 Jan 2021 03:01:53 +0100 Subject: [PATCH 25/44] static image for server index.html --- TTS/server/static/TTS_circle.png | Bin 0 -> 25491 bytes TTS/server/templates/index.html | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 TTS/server/static/TTS_circle.png diff --git a/TTS/server/static/TTS_circle.png b/TTS/server/static/TTS_circle.png new file mode 100644 index 0000000000000000000000000000000000000000..34755811aa0459e7d4cf3ad5de26527d555a93dc GIT binary patch literal 25491 zcmXt91yogA*CnJ|kWMMD(%m3PmxOe8m%yb%QfWatR7$$L8>G8iK)R&s-}imve{bmU zT`%YCz1LcE%{kY8pWiDgbDA<2?{v*d> z(Ox-S_AG(($XPy7j*0MOvn_!k-T(d}wCyzC_4ALZQu}Jc8#O5s4$q&b7nVY8YD}ZY zv-0~7Ob^#y*FF*+LqglLWWKGGlYcI54-u($WcK@JgIZVdd|ChbZJKr*SQ2I)9P%w%dA`9jS6vIF7$*>$+l1Q2OS+^^3`cgY{hZ%o}C? zDNo4r-ztPL+FO{l0fqAVd<6~ZX8lZr()7MnOqAeeQ~O%yVbs#_3+wrV*Q4s-WhtJr z-;GpZ8_T2G#<5bmD3s1{B^mPI%75H;<~JujsLTEdB=&StrwBqqv9U!HQ_ z2*)&6O~}XNTWJrRH6;B@hYIaf<)nKuVt?y4`5bev%{c?wa^_S^3v%Zqz2v4;tjElDzD6(!#M*otX2|`sMCYn<`tdv^Gt)Fz4&R@| z;U^w<-O}!Gf4s#v)edD=^iYf8nkgOK!w(V^ZnaBtMI|*W>3=LnTmCKR9_!!zRjVQ_ zS*R}B8{;=K<13HmxgBHgO@Ge7m|D!u?|1h5>o-kpx0>ms(uzEIsy7-X+PpQ!9|uRs zQs?ZiO1uv|Rr%O@Ao-c@x8s~6%|?gYx(+RemqpN}oB0~3rvL2?zP|i%>A)mJwA9}{ zN>k#PN}eu1eor{tRP{N<@d`W~^GemP0)o>K&(RIs(4XjO#Vcv$@z6-Z#Q2<*fAdX_ z8FzKJgoTf~zVWMWEFSxhL>U;>M?0+#KW!J?Mlz#Y^2TgQIVbzr)woW0rTiZ1V*Hu^ z`B%lFQj;$n9B4f38l|hN#=VhLwrrNGrm>ImdHZ#yq3iR-N{UpK+s9VM4`Yp2)l<;@ zhLJ{j?aHK-+M%4ALVF}-Vi9*Vzbq~N3m?@e6uIB&-FRut)(SylxD~7 z{$n9(Y;-BvaV2#z)SaYNR6Dn*9i=IeU(xbI^G=B+PgJ>X?jtVstfqq@RFhx&mnrl0 zfr9+khCZG~n?7hcS>=vk`Sc#eCgfgdp+R-1zZH_n4p~nrf_Ph<2Y-z}64aH=o3|Og zxnR#VoFWaXN@}a5`Q`1>LA2z!<1%|MpnEc@SVrwOF?FoM_G)%We%VvP;Lw1P<+UkV z__RbH0a^h?Tvm6;L}yAZyhFO=Idb7?A>VU{>LSzi{x!$?+9-4lUhjta`UKP7GnJZm zMio*gN>#gtpN0fmVzWvInMuXDo8>>w)Qmcr!8^5JCN!8fg~zn8wi~DT7Ob=exF|Os z^tErF_HHewx%A4o6}!CGnL_hkD6eYS{@6VJkg~kHEl^VPi=nm3t#>xD@VKdbm!?LV zZfC=Q@UTvbb75rurQkr=^4KePt12{iy93>ZFs}j2o|d zKif*?o`#ZV?21VG!g)~~b1yBi5cy9xlP@s7_*iLm`sHx@Vfm2DwZ2#>#S0<(;!+|m z<*U%9(Ml-tz`M3M9hFk{R1TF-RQx(ZAEQPeACB04Jb#pDEXz{$9R<-1^gkh_;k34| zEzEvbfJ_cHbiV{~?a^p*)Vzbpx_dbsJE znE`9sfsMKD^*lKuPmFq@No8h+kKSZ@u3`U{Nv;wnThZL+y}`DYq5U&c%l)0=`Q zT}1Whsf`TVgSI4Y7mEKyMSW|DklRfN4Lu2LRPM?9@Dph-V38skzuB_o$F5?nQ**g` z{J$*|Ql56`3leco!W~^uD}dgW{YOjSH?avl2lyx=O@qfLjZ0jHUv?J(b4y#4TG5>6e<XPO1u%l|^?>wCEcD;}0ly_`B?NM@W} zG9hm@_SePIgDVI(_wzIBL>7%WF%G2#9pHbutljsJkJo+peh9~#O-&*PrJl`bU}73j z3}54ru^tNP$K`(}xr;|^aa~>Q?5isx^g=}x-a+&kMf>Q)yB|TRdsa#Qb5h;uj45*x zQx>$#sx8W-!TH!t$EMF~HII!2YBa}@A78#oltw1{BfjnS!oiUFAcenEQlTnK%gnT7 z1|6=Hs0-7&;?ZM#({sf(km17*?}Sz2kw>;H)60sFB)rM&flk6rOo5%xm+cVakwa+> zHppu;j70+2QC_(%ly`KmPlaD!&)3M5;0hBZn>)2IcaJb+h0=bYK$+klc9k^ylSi{j zlJTWrQIBnzd8ilxwg0a1$ybQh7ptw;wFfS)*ci#WM&wqx=X0Fd0sSPmP*YwFa#7&E zFIwMV3i+*XGxE}()?$206~izH{-f%gia~34pNW@N3x=e9bbSps-*fZ;bxa6WV9}eO zZ(?7QiR(8c>mXwe-(N4QTpy$vr3f~nwQ;3CajL6_SDgLm93@2~gR1^wT^QY3wyol$< zR`??+6f<%=k%pm5JvtdQrVHp0Ng|g$@`}HcShMRJ{hk=FV&2ic-&o9;{A~4vQFw^B zU{d%Ip~}-GWvI2jl5j4E@%W~HxqMaD$3V!(81DsSnS8A~Q5y}ZHqu^LK?bD`+(duT4t$bND*5j^npR{kT z>q?^Y(JyANWnzrvo8NjMW@PTN9ANa%WJVH5qM=u&Vzy-keTE#_E{ltaF=2`@%)g1_ zl{KI9x)gpsFtT%MA}j#U=QZu+6cKXp^9NUeT#hA5@h9|5OLVBe$=r8}(R;NP_$VRr zUR>rsgO1eq?s2;etPD6ISay-J*UHa`XLKKh426c4Gp+Mm-Frg#8=VtnO3dp`1=eRI zulel9p7AA~8mfQ32^g4gW^`ThyAh{*v3o*&6m3=B{2ZAbTl`X%F1VM8rLZF91#b>9 zVc}fuDnfFp25sW==V4pHiz(glL0m?gYf7ZHdVX~7WQ07-CT~`-+1V6ukZASY9Yd9$ z|7b>eDcGdEBXCV!og~tCdx2+UIu@bV)3B_x@X=zy_NLTX^DSR&g^SnA;n+FUC`;VP zp?J!Jw0B>|SQS$DRn;d>PspA^(ODEda(zm->#>IDfgq0G%Ec9o*(mooOQ*@}Y0HZiIF9Xv?|yG4 zi9xJ;k3~;3W>LnQ=RMBh*&I7}joY-aC_A&)WrZH-J4!P4e@mSV;5wXL_my?fcO){l zrIQ(B9C|E}?os{IwP%QJ_(^8c_3G0OhHm5eOsX^HE22QWq`)28QViJ|w7odfOH@wX z*5qL zYuiIP&!7=j%o~czcP2vbeO}%$`8KqZN4XhOhf!Wy%hzi^)&I*E@zWNcz#xBV$#-T; zK2!FV2Lz2g)ON(^i>Qd3^mQM^ULh|3J+j8Z9?R?>U^3PVQ7N`NMbE>7Uz<`ORUyc< z%@iX>$U%&;&_t2!@o!;<0^B1KpZQz%u zzw(jAiFss3p~U&hr789!mRs*xN0}deUH{`U-5<@4Z@0pq`VQV||Ej_brlD1TwHY0` zOj;i}M4$NBx+pCjns(ix z*x}A@5pRD-RE8*xaFWbw^FhD)pNAQQ4>D*0eb(P|ADZDcCK_8bNO?cP{ob_pN^6k| zx!7Lb8R6A%uX_*~U53n4>c11)!l(0?HoaHI=CQpEjy;|)ZLZbYEK!@h&RFV1BUPz{ zmquGo$fq0!;?Kxt9A8D})%`RTtDl$s`euQS8~NxOli)uY8ml11B;9I~6cakZj90vb|#yNc-6uvP2y*~P> zAXN8EmXH5wkNMMMV0kSjWxwy_W>XWEYb1$e7f+*wwPpGw0S?jNd95W}61w_N6DMQx zQp#J_eheFrlduq5qvx@=>xNm1*$=6DM=Azw7bSFDE-Bk);gy+tP$MUzZ{c6?#3k08 zndVN&kTUDX)j{2hahcBX75Q(%tL%~9eogO{i8Et+sYU4+C;w8Zjga^6=+oAk0mXuA z7w)4p+pX=7Ho~*xB&lmg6vCf7f6JK{fddg@Eg|tsl<&L%{yCSr5y`Or&|Q2YstuvtbL1?tg1t>J{H zrN#e@NboRJwZhvB>AT}Q&KPXkffuj(tzKpM{5|)e45Nt&OWSK?gY_;rI6RuqDP9~^ z$sv}$%aitnO|Oo(e2+&-)zKLceO{glO=3lO1bMhmPVgV&hiPl8hL3(`#Pp&*rJXfT zTIEY3C-fLCk5laX+~xC0m1t0Z=y#}`kOaAkdekijf6|K)-SlajjGEN}Mohx$Zz zQ7;4O>EeDynoX&^eL?uM+BPRyX3}%0OLXH}?a$YN5Ad{o2sZC&`29{quQN)t(=%NJ zkxa#oBkcVn;RE%*PnRDan=U$Jz&%>7$?ad;DjFTd|C5rAAcvP&Z0~WCj=u^Rq%gXc zO~M?AyRQy5!SfK!cKnU_L-)J?@NkK5^dt(kyf*VH97t`-s&dV39fEk2rA}{?8?g{qq_m=#4@CllejP^%3IBalU*j?~_j=#*nhbXSHijpW> z@ECBcuM5)rftf@NC;RTLy2rx7GE`6RD&zQR7HaWV|6epQt_%mlg-PJF{eU8i^TC|+ z-m$}Fo*fVL@L%!fa7&Z+>Jpt&RlK4JqkDQc{ho_vRWXY^hqySx_`73FOy7}X9IPsr zZt{g8w)UMHk0H}5JS<|79jBG^o6O@IzZc>PA5-&g;GM;oa1r0Z$F(z2BcN8bi3b=_ z}X{BsN` z9C{$mVOdo>Nv?WOf|Sf0>gj67b2ui%M1+XLGKGSnV0~nzc|*p4O}PRGNJMNo9w)ut zM&O5bxjS3_S%+qfE7E7t@X`lmSKm}?R0+X5^5pB^!PmjvT+Y5M_%h3fMPAv4{VF## zhfcgxN+CA@0yhJ18;DTQpWuu;htgb>An4JVkbRm{foJ0c5oZHuhk5gPk2rk zPC``+RpegM*>#&eQ$Q|9n!{XTc#(2xfO3i8ouEpuiVa@3M8c-sp3zRB*MviBk%!pM zVwA61sP&r$oXHym2BbOkrQw=8HgUaUeIvngMaTyT>}@7Y@c5`pC`-0Dq%=~dWGAF2 zg{r?_|F;h9{$h9$gLH4IUqd(R9I9_f~&gQJok;WrU|o_hq_;^?8XlU%~H!uhMq zUC8r;9sNT~QD0MQ&koRj{j&KTHs+~i=po%^({vi7Fkq#zVG+XdZ22XcV|TM9A1*;!zj zi#;m|P4`?2^8GuAn~WR?ssm;U-&)ooD^)=_F8&10J@IvgojtWN?W3TT2$m04f1C~K zx+)dIlQQHY9c_zuKC%$48%(8)hHK7uZ{EFr*_~4f_k?y2Qc3wY zc#i(3ZNjzvb_=k419R3BiQFl8u>4_loYcMmN4P5R6pviOHPHlBWm#}=c_#f#)nP-3 zs#r96!EFl?JBa^%87*}bVf7gY?3EDu_O>^2A?gd^r*(uomq$1by2KME|?>9e%t)9c4@rL`d{U!vqdkzT(%*MX{gzOYB}rz((ybIdmA?HdKrok7JbeG) zS*E&NMuMu$oC25CQ8koH9`>(EnRWu4s>?CAn`z$ zqxL5fN9FY(^ALx)X?0p`BM*CdOUJYNuGz-&W02at&@Bi=nQ=_Z>T6Tf)P(*(o(y-E{c2>ivi}O*h?jJ0;&Dy>-_ib_7mg1<#u9(-Cu33b_C0)Ib?b~g%qvNPF&5JZ^X_Ge{qt;|Rm-JSC z;l-HL!O5g6$gLv!Nwq17#-@hq=Mb|JPMYqiN<i9k+Y1{aY^$a= za;r{{zN0;Rq^hI>AKG{mUN5*V1lTva@vO#w#}ijTd@^dwl(pOp(I4K_OT(oO#+~e$ z{igZ^G2Rq@d#=4^DfT@*O+4JX8{J16a?w=q7@=@u%ys92^S>XL2yDN~>#F2Y6R_h_ z)J59$zq9(ApAO{pCBLIS&B7Z& zGQ~+kFK;;vnqZ>ayoXzeF{j)r9LJw#Jn?Y$fpSApdONA{X(41G>YYej@yJ2M%l%)P zr-j};sd>u?&B#Mv&m~c5dP|V%H6dGHy+qfIAfn<5b4+Xdm-~gm!>gX;+R;*| zG%|6mB4 zY|Su$*+mJJCRo?5!qAd&Iwvn9&pIMdSS&If)W$jwRy$$tR6)wZ{5>lgNLZ&P;=2p? z2?d=wtJF*DH5z`DVBAhx%1T41r4smv5XDC^x&>F6ku}(G}q7A!T!AgOjCIHyq_!(Pgj?dM@`Xr9I3(1>{*Ifpjn%e^705skBjz27p4&KbN27$S~x!lruipqG6v(SebOzPb=U{T*+el%-{*K+ zK5s{ii3|b7GD{&>uzYmgd(@nChU8dgBQKA!wY7zbiK%eM*>A)ao184;bAQc%l=$tN zii*lxBcrv*UmxV;(8tHeU*#r4eh&^pUccV>{q0RcQj(gA%KCMi97J4PoV@JUuU9S~ zKWaC+LT+wuf`j`@k0!f`;Oc^-bRWNhbzsaL4ZpB=aM(_<%v_K0zo)&pxR?}h!bC>= z@#BZkNjsv?MUf#rKfkAfq5shmzu&)XstV|_ftR;;#k}2Qt%it*$U7-1m#byy`E;4z zx15|DYhQEzkK34z&8Ns^ZI1#WAtA1AZpKyfV6RrG+t*KF;+l{x4-!n|NGV#W?6{#J zg_%tYjH#ct9wtJ3Ikvk?3see~joP2zLza9%u!GmXl z5OhozY0lT$leIlP9DDwWllB0=*i!zWg+n&56(=2_7cFE^@`q975F2D9q~ZQ55aq~S zk#|N$)F7suwHlLeZEfH9-wYaFUzGVPIy^5*BDVrv^&QF(gi2*xv$M13?qQ@x@2Xox358}T;JX{+Vj1e zN~d*k9{Yvhwzj8)7M=Tl`J!9-q7RU7 z-@dVsv80{6|1$8-#N=g+$nC4A>zF6LqxrZ_S_5@C;SD$AZ0m*RYu)OFX8!)7_V)J4 z3Ji7beBclFlcEp!i*-1nZyi@#egAB2HMphPTUs)LO1r+fiQ3<9YHHfu+v~LAYOK+a zl$7*&*f-o6`#}aRZrROCn*%`x-*gaWO;CM?f}%WMTKDT$IMvg2pWR|@)b_Rom*RP0 z7T_r{F*wjAXB1_j(`Wqr{4Uy82J+uj8THTJc7=#e<>%|X@b&Q#auMwu9gXSlm&t#k zBqvwU(n{9tVqjvj6E~#7#T5hTyJ-6Y4{x0d7kn8N10(I^m?Ps51dcJw=PL*rNL$|R z@1H+^reS7|(yp?=BPEryvU){BLo;W`1Csae?(UDGqKME?WE)%Cf|3%7^Re_i>wh3{ zrhVAp(64W2+eUn^mKjk-1${|RG69}4yS)yL$`>6*TUYXbJkRpn_{>=Eb90u=@>oz% zkdvEh%#)VFZA-`yzZ<$^v)0BqZIvn|B^A2KpPraV2p;+C*RStCe}2l$%v5=Rq97yl zx$O}c-fs{!xNM;tUUw4xS%1%p37TD|s@w+;dCPMMwM=vbZv1A=tik30V-~sE7#%%* zTvC#>loT?kblMU1W|4qWA~G_0S640!B9@$nh7|cE78NZm8C~7QaM6fii|iji0+0Q! znyTyS&bL_oQ|9Kh_`k2v_WSwzyG(QeN^&;DyYqlz97^RL93LQ#lIra0`WzWa$Hau`@9*#9>&s4zNsJj9!=yjCxF|0z z9kyv+p(Sz_dop%~fq`Lqy3qsP<#hH(R!&Y1EMod$9@NP7&8WW;6w14LJ5y&dgKb)Z$1v$shsVa^`uO;SfBEuKKp^$x#Oc$g zPbU`_J$-$BcpO~-@ZU*GbF#6Cf${-hR9#y;F*73tI#otS1{GRHclSGRjC{}|asW$| z6@$CS$2(xF%&B>&hmQWb2z}l{90jl%BsD4_N1+kgZx2Kix|mYSMo+D;=hG%wX{p`4%RdVw3 zH@&>AZ;g%D!{5dyCia5%6Ao(T)vNI8YOb$ezmjJRy@TYEuj3OF%YgEcm6e^EntHFV zAJt>x9IURRLyCuom(J(>sk2kOv8jpPDKAkOE)xeN@kT&c&ZD@;47Bx}VkA>oaKj9) zYJRBEZ9hCBf>uC)v}#@fWG!rChw!0!{5XJ52&WDAuB}l&JUnEldKj@0qoSg=0|;1K zGy0jG4J(rA>FH>H92^|Whue$myStBjQ;=ef{qVr}xVVm_w)-t-(O4$EZ5qYN*8Bw0 zS@atGT|3HWpK`%6MxbqX`YT@h%*&;4)cpJ@0CK+qCShaq+M+6@yqry%Xyf=;SzUeV zS0ZJl7LyheAxIto5ynnV?|FSf#rtMwlL7+ZUeeMQ6c+xhshM_W-Pula>@{M8Wn5!p zqbuT$;OY%Fc$)W7gS1k>zze&!qo?jk<8$YN7TnD9#ns0|(ff5AE?(aKR#q04=qNJY zuc@hW8XANEtQdrZ(qm&|orSwNss(r6*GMau7YUvFyC;RGni%|ue`J5|-^p!&bz8@u@WG#hi4_GBI4BWlPjhnz%S zt$4>|-Tz@!|L|eAm6eSx7QEerCv9MG(Dd>C20&L_T-^HCFnTOx#EOcF=np6s!>J){ zZEcfbZ@;O6lg1|@kpwQcC8&r-IB{-xIFIk%*@%Jcx9K}33`4KhFSEx zi&aizL>~nkmS9V*5{*p1+g&`%Op++6qvms^%Bm_D0mC3u-^ z#2yxvXC2u7pz25?w7Hv~wY>~orEnguxWD(=9!lvN=3P}#Qo@EnAe1*ZfXsnEG)DZw zcH;+`2k?T;|Iw$)f% zggCVU0&$kMdLIf9eff&npgH+Nk%sf#VO^E$t|};cp~tG;o}M@mSQ(jc)0@cvQia#eUO*tV6xokw&cEO&t%w|-rn9WdzbQXe@{L-$>$wy`By~A#f8Ju(=!?} z3r;TtgXn8UMoE)(#G_Y?8IUgeHY|pc@PnsO7t6)y70c z<$nJj@aYr6+4*_Pwq;CA3?m1Ji+W&{$Eiu*lLHs&!00F=3yTu;Lr??}>)8)d#N9`a~ET5h`8H+mnw=hO}(qmEBV z-90`08Oz`kkJBEa`BvY%8>jIldW0z^_T zK=jSw>SXjooU&THuYjDmmxKmYTwi+}wOkp%=&aKG>PNtVm5$dz_kH>DWwNk050DD4 za{EU{7~Z^*hY}wRrEuPZpUeTNsYEy0*prFFvTs)_qZr~;mSo`d8L+8kOPfSi!;wU0 zwY$;r|Mc2lSDoszGFE42X8<((D_QaJ5*{A)liYxZk&Oypkvq2Dvsr0dW2w$jZiFTgH92|;gmxUTzg$7^kE9sLa#vb92M~gSf?#W_ zd=a=4m5iT_mZqpGO|z6;S^3p|v37iBX8WHofF_}TV-ifdwRSo)ph?Z|N)*7u!vk&- zn~?BK48S|UO=`Zm8-|X2rHl<_|J%uMoF4QfQg%iud>*o#Xz}{K{j#)QbCWX zH17KMz)4QZLFOPz9@HTQgCyPkAM@6rXDiJrRn6PM&_}P;M_Y+)l;rf;G!`fn^Z<{7 zY}`^!4UJxqJZ?>?Q&Z|9kEb!-c8;k7&jHtTaylu82ki$91A~^E`$fI`lMN`6#$`oq zZ4%bj>y2>1H1wp;bvr%vf@Ip6QmDK@?;b4FMBpr-}gdwTcp^0LzR&Rr=>MA$_0e*8`DhhM#UGvir}X@iM=m6h$3 z)U8ZV{7e&svcQz@kqFDAzMWn1Bpiv3ATt94a!?1G33PIKdAL-+$qd8V5vDLU0wq;> zY4q~*@rV7+t zo;R+(^!AHvtkS0E#_z02O-=RK$$*w?F)6Kbf_6&j^_M1J&CBvD;Lr$&h++D_d;9vr zl9P!6O23zjOGzp4ju|$oDF3*l1k9SPcdl(8OwrQ5sj`CgfN}s0R#i{0cyZcpzS4Yw zMV^xqKz)}x?|&|ijI3VUN5Dlh7W5e zAo{~;eJxijZ+cu_^6`1h`8opO4;oVTkpn3y>0HA{M$IyVZ$BGj#xrSYLx)uhzTuXt z7#NIzkn{WAa_Q^qE3MY~J$Qj6qDA^B?)duOd+6r=zF^^z#nsU=hf4cHVl;ytp?pTL zQxwQ;Y)T;+Z*RfWJgmD+z@I36PTnpv$Mn>^p!nCAR$pHa_A23x3b>h^%Ccoea1<`3 z>W2@K=H~Q_O6lN`G|CKw59@GG)S;ZN#|*;4%e&s(il(EP!j%mRW}$#kv5VzR$)e7> zav!(evADUp0o}*fBD3QE^bozJ+1=fk_QRVO0AyVggoR}r=+o?i0@*Knlqr`&&I|K@ zWTS$U>l+&dbaZuLB-KF^#*RQ>a*bH_96%bJoS)MIRSv2ne}8e_jt4Xy2}Q*yf@$Bg zf1OezUNm~&b8|yrG11Wx*pNt1N#RIdD?DO&`LeUZLdVOi(E<8#dx!*NK(3(tdj*+N zgSkI4zQM`Kd3g*10s(*P(bfXx5?xtf@ox*%je`&Ldv00=$>LXbQ4Znh`8Ao}pj*eEQ4Fm_#qZ55U|5GO~!Qtkv)vpFe+wM@E{Br1Li}53jU%^ZQ(v z6HHxHhGCFtC(DMTS=ridu5|=W&CKkrIJG`pF3Kt^Cw*!NSot#6(a}NG0L`9V1_@OT z?9)=chL@6VCI3YLs7eaL2=ngJUOS#D^T9;pF#qoBrnL2=WnhNEI?uaz&p>}|l%KOpD!=nK}_W=E%PTo!voa~a3=(m6eN|b@xOQ9+~84B zDQjsF!%Uu>96tR{WFt^1di73k(f0)KE`6L`c{l=?y7}B(UB_l-dK*6OfcE?1jRqUE zRN>Y&3}BG4M7I$e?(^s3c6JB963eaa?94!_RW_vB<{DD*SgYdD^ulQC$cR!jm8g=r zdG3*bRT_`|*WD3^BZmZ4rKYs+->CsN0TfCpshng%Pmuw=UOg3*HNLu`p)xS&03jfC zHfaay-OMC_4C~Mm95IM@q{neyVc{1*F$TVjIiQoxH^_!N|NZ;-dvkO8&z~=sX{vz` z{@o3SW;j4u^bf~HJGDE#Z&1Jio1DW13N3=bO1in z)jA5)o33Na2OgR80Kg;vV`ZyRQnbd##@*fBxC8{<>HHr-=6(J8+`#*gdvAX~_ZNPW zkMCT+VIZ#Q)bz9@C_2lL^bVUE0pHcLt-*#}2JOume4x_+@#$J=BH44ezrRoBw8RGL zL5b*ZHJ}opf!0R5N9F8yF}&TQ>Hdp_iD^`!b$!x>17T*KrVw8Q?GpmwP(2&~0G?t! z>sU%iK=2OeY><0OZOX>RG>f(N*+ATV5nxAyLsp!;53ni-!_C$-G>nTHFNk|}Y zXt~f>c54W>O;KP#KtKpR)B(TC$jNoCu4VzF9?0X|#pQ$fD(Sqp%6ip#)_%y;N;Y5u z4^j=lJ>a6Bj&K*4>y}>PBQg{;ksPfIzlY+36qb=e)z;Q-ymC;=5a`vwb>IRItN}%B&b94ZPmYaZ@4h{>0Edxk~ z`T2QJz3W|Jp8F+rfTF2Rokp<3si>mRNafen5k>Tf%gUl9vFJ}=r75QIfD&MpOrrw= z^ZWPj#!ZQB#}1vHouP-HLL1YBy)XN>ZSe8U;a=uqi$^MfmZ(wm4N$nr@$q%n@nMh| zwRZF3?}CdR1yg{u2kZ>kT;Ru(1)-%5(DM<&<8ok@L;hY%k_Kq2h7D{T;D4Li%mz}C zu|q9xrJ@C7At-HPT>Z{@g`t9Gpe|)~jy-{T5YHFwoKlF=l#vT6AcNT3Imw z(--X~cV=K3WlmxI;+?VCpDF3??R9y)_vBLS0s!6IECgCDO)#T^}Q^NU7CT!FBGoq^YdP1#KfC~Pnl*!VLL4=^8QydT^FEYPa5 zh|&vhu5Q`|a|7d!DS#_qsa0FduCt6*zyzjd>dm0w@4 z^+h1Gm=~_nDjpX`J}R`BfC;K*U{ER;>^{g`hf4xc#`h?KrLgELrQ8QU|UU8-f~qgoEmfi4R;yO^FHIq0&$b`yhE1`Jzl0(oIF zTdJ=VrrF~k79|TnCJGGQU`+%h*Bi~!$oh+qCMG`#?7y)6N4S^R(8M5;0O)`=2Vezr z*Z}1VrEn+X0O1BcxIJtx$wU}IFe^KN6)@EC!h~3&$(iuQi>P`bML$0g0CK9;5IPYN zzd3IXK)=A?7CZnI#AG9B{NCRbY@uNZ8x#dH&~Z3rQY0M}Q|H_a_p|r3}D_z1W`}oEcSn56FeDuP?7% z01+UHK#?*tGh3zhPUOl1cEbin8;ybIeWH(Xvxfc|V5%DnObl1oY6s0fQJ@z{g71TV z?b2AV#7C^q9;_MQ13=Z|prY>;3k#4c`I`%BpH+W5BdWkACv?2LNwC@h?$}Y|K8q$L z>bf}y`z|=qii$X}on>vSHyXvUpW)T`?7*Z7(5burFtebJ_G0a-{@=ehfWif%Iba&} zf-G?z4FO&cY=#A-;K_QInoHpFj~X(VqjrA18Pil~s>vWm=D4P=u70vtRCZRC^#Cl7 zFV=V*UeV_M!vgT=a-9;;ze7_~N%IFM;ff7f zU|s-Zf#3xd)o6P=+~MINRYNFfkbqvoz6#P!*P$V#ww8y*u(epwE8ChBumoPF2+nDt z^C3=v)hD^Ez5_3)in$TlN?Vj-s6&X3{9pp_fdvn3$Y&)S3V@4O~3D4n^MOcVG+!a31Dx z_fbH>@H;vtX0*Ck68OykvP!Qtd^ge4_H~DE&UdPuH>E%!&|5-gLuY}k_XN3rz2pCA z3C5l|uhe8DBoKkb1uu-@3uYLcR4^PE7yvUgAj;PMx&XQdlrn(4FyNDdt;#4X@02zj znQZn(<>cop7pY`QMi5pQcOmC5R>yGLDTrNZwY>&iQ7vDIA6v}Af&q}lXi@dFVV*6F zWiS*2^FBa8dV71>9}Fb)N5K435E%vKX4d~f6EvVLRu6O-dVygn=(Di7*}vpBxvyTm ziro~50uxQV7cV5il?IKXX?9w6I=Wz>A%Wm|_2x}>YilO(S6y6OtnBS`fj})SWdS`3 zj11Hq&eX-k-~jCfby)%0gQoA|k#oo}==8+I#A^UVT%k}?HewkiB?bt@T4z{}>{cJR zpHWd!>KYm@H)rO1dwUKZs6+FyL+7KSF{)pVeUH6ejL%#^0|b{w#;2yZOf;)}Zd}gJ z&T_0c+s8QGDJm)g9%EEB4`$!A92{Q(%L2?36cQNu!e@{Fog1|(G{6W8#ndd;3Y@od zx1Q7LQ@d|EYK#EaI2INcGYmXXP*BFXu+oONFK~FVhNF%T58aqrfl&f;76MG2TY-iD zURzrhq;C?d;Ydq1dXXf$Bn2S^df#8<$p>kPpFXKn(5p{Pg?fqVu< zEONbuxZJ)A2G{eJS?24VA%Avu7#ZU;GnK5ZAz+nYFlxk+0ul+1nB55w*7g{`f3(1909Y1HFdYGlI8ERL z<<`~B?s{XOqnmk>()8vnQyX*gfV&Eyv6ll{+4n${JudV1Eqnwa^MwA5CNR5>SB9%b zMda=6O9&?AF@<$GQ+;hGCML9++)Ik`g3#HsJM%v5vAF|Q&-+XXG|?i>vYBN$cR(mt zpB}H8A8+?n-?5I5jRgRSGmX0d8;dC@L=q^5S6FkyF{_51qKp9XNM6MQE+c`rnt<6v zBZU%>XKmL#tb8B0BMHVafVcr52~%1CJ7+UyGPAN+uWW;fn+OrdnB0ilKwm5eIM64Y z`~%z4YP@wFG*A>OpLWhsAqgOA37k1Z9p*v>cl)qgT$4r?atL+c4m?*h|l^=(H zZE`3}!B}4kuv1fsTmbcb*#2E4zyboNs@Lc$`(=j)JOZE-m^k+XQX=hBD@FMJg~^f) zTxjQZ;Z#>mD38`BTi&Fq%1R*Q0bribwmp6;(%gFQ0eEq4P0cshjT3+p%E~d5SEYa> z0DBDrHuLe~wmj~TPM-;oR4|l$PD;A>*8!-%3%bt?#qP)9=B5NN*ny?_cZ>!wvm7Uu8F0Y`BswftTYXZ1shNM*9Tpl2 ztBLtr?eNdmfJ)3NS3u$#>tp8a7{H{EQfv8N5Ia?vpK#=V&hixN`#D+N;?I}V`b_74rYE-Zt34MapS`xy{{W&fZD zg6ipc6J9kIsu2k8)h^PbrOC#A`z8sjhwLUaEG$U~_knkrUPC9?vAkjw)6v>qX&x|^ zqNB0xcoxf#!Sc<)MU6(XH8h%DDjuR-mZNXrh6xbZ@UWis=FOlr)U;|IcF4g|5#X)> zxIhA=66y3ASLNS4Toz~N`tk;9H)mGBd;#SJE{HgUdxPsQa27s-8<=2!K*>%lEh&Ii z{t6un6(IoE+y8Vr8LK_Rl(AoW!_7U-jwJxn5BNFv4-a9!j^IpDa3~Xy1Q>tS)+T}) zBqbvQ8P5O+Iq>Ub#(`@M?_KJ{WtX5DB=%#fuLXGj6jkX7s=)Wd1ytqVgY^HkaNU7a z?*HEiw@R%OphW*H&lNLEB7y7tW>dvlc$viGWUQ7R!0p%O(I6;aX+ z^?N<{{{B7Ad3>JF^ZvZw@7H)gak~;8C`mj5Vb$SWAS_1+TxpVZ1cA#zi!u>fH9(0+ zgVv;AuwO;x)!gxTw1OWe?%OSl$I_qO%0k7*c|8fT)zfpaY7rh&LVAZkKr)+)do@7? z9{NqF1B6g|BnJ+h;&Cb<*v9HaaZAe?c12DA5ggXjApM}W{VN)@0!F;=&pW8a-N9@x z8pIu>i(qneyNK{!kb8-}I&HaEG}}=gZYA74Pz`_pRIyU!a0176hczCL-lAN#Q^BP` zEjQuj%}L%0s2yUkC(gM&LxJ6;5^(dzPN&O1ED4hl5^!*^DtaGK-O9lY;;5`(SAd_6 zH}&xq+uu%hV>n*Zs*4^-nM%4PwH8Z1ixqDUO7Gg zN(BtriASzzySbH4CHeaJ)GVBQL72&WXKxyA=XJE8KYMl*hZgm6c)StqrF`6)E0)_T ztOKL@Dcs={)6U(y`w&rn670Lwu39!wcWYQU)}JAI`&+k;`#aD|&B`nr+%vWtZQd5u zyQL&)2HiA>B|!~J;CL^<4_I8DKwhI~sLo2}Tlc~>D|q}YimLPp(eMj7yhfwD*jNkA zb4lFX8UVSs=>snV{{n^%W80=b8H*lr%)7Ga?%l0m35qD4_Y`Gh5{M0vmJ_yid6p~s z16wlB8mHH;B8o#Qa|($p3K$AcwTJ?$#Za7m7StO;AE_L%Ks4l+<$-J9OwoOVO>+EK zYhometSo0EuR=={9H+ismfv~Mb{b++63zS8y?eXC)uF#ej|QNkjeCAke*w&vu=Rz} zbFnuQ{wV{v%!Pe$UvB?%}j8nmmx=_eEP z&3!9t=N+CI6#y@HKGj&{pzb5C8v9shIr7V`YPH2#H@^v7j6Z^tQct6lWXy<(i(fnM z-3KdGace+BL&K9lnK~f}3R$t0*VfYKx(I&QqxF+Q2Jio{uMIQZca#M}HB1!yW8gMX zRTT^cDMONAhn4L3!pn6V3p&tHP|(^PJd?e9A%gCuXa4<0;;h3xI?48I# zC=i{GlO&w9u77CZFB{g!E?{R0T3Q0Tb}&bF*ld<*w!d_A{=L=6;@b@9n1+U1#gizb zt1sLYK(rsQcU*Ks&H;6G3+VtU^N6x?&b<)mj%WZ(jLE{*i?sAkZOP$HyOLjAiFA|oif_ETm26&ThEuI0+t zjWfiy^&6R)?SMi_fPusJ%Bs(l6FxcTjs*Ll&tW z+j(JZ1Agu8zWj`Mp(nLExK0bbX-VG|!Sc_j7w1WzZ3;j2|u}V(^K~*AKR<9fdv?FNp zyk>Bk8P#XESx*cnr=)li)^T~YEU#f>gnx^64OPs_-){!IC}9G#Vl&#j%KNWr!BbaIl`z900V2s3 z7S0)Ozf)21KF9xcepQv%LqB=cPQoD#hJn~Osg)eJFGQdV~9>2dl8H~5(zp?Q{;Nak9QDZR>j zar9$CV4y8>m!n<^+KBk3TeJjGouUoR>kvS@+o6v0gAOr7+5}KEzVH0yeXp#{(ao!T zW@@ogI6iwGEz2i4Ad=B$PR-2dK;2wu&$_J>Z(?alE{~!kCh;u5V_kpvP#Z=dXiEfn z1gUr{Ygo47@ut^|L26BRYZ*uP_^q^I1;FR@q`xvwn$O$K zZ`2&0gvmr8!c3T4dA9Q3TVsOTXs2uB9iM9`(6D03=4y36r5<770$ z?mw+b>Ik;2o(X-4RGwD&Iteu{6$A;CvUJ$sfff9Zz*5h@_U+RYTopv-f>Bm-Nm)T& z9+Mr%z)gXKvTpW?03#jx_N@b+YxcEq)zw#>fTGi4q>lE7ej7p-h^lYQI)(3(M4=nt zp<=y!d~?1^sUX{70nkDx1eyd)z?GFnIO+bSOFu(pwL4xFHXb(2NSG!vI(4nV9geM?+d`$ z3^x@)C4&hBg^7`p_67gaiVA{{2DO2wNEnf9d-H$t90b z1+zZ*$)hd8;X&=^?=J+k)iQoh!Ua*hYkc166HGP=Ky!0*OD6@g?3)KW?R-COncBZ^ zD|DTq-NSr%#{v`aUVuSJj9`G_eaGcbfy_iF%USp$8t#>`#b!}r zyTUgCt+i=h!(32lJkpkvyv7EGhC}1$AWL6df3JAo`>vw-nKSdg%1$TzLA>L`FrBXW2;5bAS3(~XubCio)CHZNvYdr-a^llSZ$AX+f>)qVALGZKnO_8T;z=>WqD1TiE!v|^$X5>b#h#>;PF9E4z> zNACr)aYS9XqNVJI%4af{Shv$M!R;e3;AaP4EXi|2Yc(U2Ak#3;M*98t4qV9? zg&Tw^7z(tZVECYiHBY<19vHXB6+hc$KR7)6AWG<(u9g;yvi}0YOIy*EL2b3UzZy}a zGdJQfGq~iV zAU_dG5}Z1BTRx(HfWQss+0^`bNMs;pcTO(LNHDq&xNqX@Smn4dUaxxAlWi-|OZ zElgrQ5Wc2boLuUqXWTR|F(`L&i`VhE-syc(QVuX#0I1Z4{JI9(#lYXcabX0e2+Ze9ZZ-F)^UG9B_qIgpBEEIZ$5;S-svUiu03{k8q!pR;$Sn8Y{DXEh&jf6lo0NM@N6cTNa*w`yyX z1eGs)fl4B+)??GD5G z@q9Wa2azw)fDkBhbs}V9neM8@_Y-?;$xV`Us>=!hkkAa2qml9L_je zHJz<|O_zlzgqcl;%9ZOMg_k*4L{T@6*K#*$#HFpmoWX>vzd<8TEbFjKUo4q28K|-9 ziq;qm3EEIpSY6m{5scxHkq)vMAo2eKGyR|b3XGUvSC>r0#YXl3n(fIH3tM~Ku+Qi4 z&v$L~OO$OkjK+5;_%ptImwtsvJOsat0PM-vh$07molQ?GIi=6B#{Z)+zruM|J~fmt zF(u^zp;8b!44UKq*0YlJox>AD`wiYn6<&7DY%pjMSWNtH6bLB^vpC? z%d1XEOj~}Ff-R6EF*rbsz#H0H)OHF^jOKi(Q5UtJ3WggIut7pxJQBuaG$Q1nZ_z>z)r}{cGz=IDa@~s7uO+12(O`6w=8!I!uNh-y?uB@nt zL(@tqC9eafIE-^uKusw@qA6UZU`T1S%AVQo$6%UeTR`=8+vvxuZ6@>dh4pO#BJQb- zQQnIi!kCoIqM|dBl9Iy6o-B^X?W(Gq%ga8QSy?*P5wr~0rG3Eo`M^48AR!vQ%FEzk zt}mE3NQAo(vt5egkrh~m2SG^uKZ{~!<_*gQ_&OpU&}CH5Is@!L@I)Vf_`-8(1e2bw z?*HJY!X&9hZU(7Cc|%ijrGMdPe1MeuQ=0J62D5Bd=tsygaDl{2w&Pa2Nqa zk8`{V=Sr&Jd1RsA9Ng#>zxATz%CAQMMH#zPzg^wLV-Gc{cYugS#EAQjJ(19%&pN86I9Kjn7FnN={ID ztblH$KFLTs5x%D+K$Rv*8b}Zt_4}wA=X>mQww<0-gk+9XW?ZiOW%8R>EZK`H1}63k zanW%o>AP^xRrsiaKbX3Td0o^S5Jj z_u&V{;cR(M)77&v66D}!i|QejeE13YNUUgRGX9@2@_jszCs%#CG7Y| zg#EsO1N%9Vst5AIdckhL9jczaC8;%5&~b$!eX`|dfttMVr^*M5U!3&BdTN5NE71;( zM2UT>_Lyvo4yf3Aut;v3fZp|3HicGgF-p8U>%+7{SA8>u5-%RcDQk(uI-Q4w8+9sc zL42p5)O_TeWE#04w(=r}cErrD)Gd1izvw(_K& z@~h50@D%xQsDO=fVx579;hMA3>Q`BbG^a6!b3BKJ{KWJsT^SF_iYTP6urxf$wq^H_ zuIj57(^IbJ0*)u$JX1SkYZu*bN z8MC~I==3aO@>!dtL^tOPM+QuPiiVjB;@zTizR4dX$anQPj(*XHtwb-S>njUu?KwW}?+A9t)7I62;j?@$+o=G*9Gnl`cP~Ot-ped~zL7&P@HI-E@apf7>IDQr~ ze)OzyMD1Yx{t_|sqFZww9S<5ZWy|I+Z^H|Bt)~*%zv)V4K77@zmZ{Kbm74S0)otV_ zD+fww+`XzsEwip~4PC4hRCe@njkFhV&Qo5*6b(2fvAbAGWl!1sN<12X-}KC}QmC{hFG~Dcnwk5luIm=|&$Rle^&nu2AA?n@V2&|) zCOxu1?Q2JS-)@gUuGHQe5tlTjvPnh4CEMD(R^Jb6+mE%?d2t#?e4)^9*0B|xrnf4d zRS32q+tB>4S7t>Pj33&~YtuM0V5#tFU(P7dB+k>z>RynUO<# z%R(=RIHclxIa1l&CU3SL<|rYxoF0&tXYVPZN6>}7=+@V8@(Pd?1tOeIr*&z0Y#aD21107-z_ zQ6qV2&N@?n#*2Pa%E?HI&r{iP%(TcWj_Dqw^*Mf%J+HRJ-iS`Yk4>w6D|8Dlg;I12 z>y%QSYGTc$wq%JUb)oKwY29~&9I^4yTKL{jq1QK-3PZ;b?gaLFmUITg=;)c)&YBX( zxojqD#%w2k6XExa+@fz8ae;YTGUL_#)Su!aeqG|{em^8BB0E*(o@shSBearfXGY|Z zc_Z_PmS#M*!stMbL1-mIn32Z(>3F6Qoi0QCK6z)~(a9dlM@Nyt-D9kVtl~Odthm6} zmutIP4B{n(vF{TcG)VVZ=Y28aK? z&tcI4kM>Fn)K~gOGNmyK$#Td)wV~}NRnzdHUxlyQW^2~$i~Oa{YoQd<(r;L?sr6ho z^VKJVr;H5VN0GcMac5E^dNWD1D@FN#W-+fnN{V&5?UU9=p%v0}=_emNWpdW+I!^pB zXHbc?NsKy@5v@OOM>9O zZg1iWsEoN2zroYQ{Jo^{N27az;0a|RMzz@ZvgZpgH?8mbo>9zer4U#@I45#{N8dl+ zvjji2C{6b#Ht!Q@;E$3PSSbqN)Qh-uz$-Imb`KBJD#Im_x874Xoo?UTo7>3T`PlQu zrJ5MI=OW85A3dc;tMlO}^|b+EDU_Nl?b7}E zPhSjW*F~wqr9)L@supRfAmLHXDVrxV=O#Bz22*xz`{*QlaaE;i>vto515tS?YFvHl7ruCD2HeU7T(Cj3gD7yNu?=)$HFzKSxCeyxt+wqwN+# z;$V2q-nNIT5vs`S$;8UYW;Q9Ci?`y1u>E9J;ar!e-q-BXb~%$uHC>Pm
- + +
From 9c1b3226f5745e52bb9883dade23f36f3b562882 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Fri, 22 Jan 2021 03:26:25 +0100 Subject: [PATCH 26/44] update version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7653b6bd..e07dea09 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ args, unknown_args = parser.parse_known_args() # Remove our arguments from argv so that setuptools doesn't see them sys.argv = [sys.argv[0]] + unknown_args -version = '0.0.8' +version = '0.0.9' # Adapted from https://github.com/pytorch/pytorch cwd = os.path.dirname(os.path.abspath(__file__)) From 1d3de15b163a27a11c19f8115de04e9143b61f82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Fri, 22 Jan 2021 13:57:36 +0100 Subject: [PATCH 27/44] circleCI update --- .circleci/config.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5b340e32..e0d0e87c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,13 +27,9 @@ jobs: sudo apt update sudo apt install espeak git # so we can take advantage of pyproject.toml build-dependency support - - run: python3 -m pip install --upgrade pip - - run: python3 -m pip install numpy Cython - - run: | - python3 setup.py egg_info - python3 -m pip install -e . - - run: | - python3 -m pip install --quiet --upgrade cardboardlint pylint + - run: sudo pip install --upgrade pip + - run: sudo pip install -e . + - run: sudo pip install --quiet --upgrade cardboardlint pylint cardboardlinter --refspec ${CIRCLE_BRANCH} -n auto - run: nosetests tests --nocapture - run: | From d6f6d24fa6211c86713fd41fd498b728af010c67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Fri, 22 Jan 2021 14:24:50 +0100 Subject: [PATCH 28/44] config fix --- .circleci/config.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e0d0e87c..2d2a0dfc 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,7 +29,8 @@ jobs: # so we can take advantage of pyproject.toml build-dependency support - run: sudo pip install --upgrade pip - run: sudo pip install -e . - - run: sudo pip install --quiet --upgrade cardboardlint pylint + - run: | + sudo pip install --quiet --upgrade cardboardlint pylint cardboardlinter --refspec ${CIRCLE_BRANCH} -n auto - run: nosetests tests --nocapture - run: | From 58f6b34f8bbec7e3b976bd634eee3c6d9109aa7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Fri, 22 Jan 2021 14:50:20 +0100 Subject: [PATCH 29/44] config update --- .circleci/config.yml | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2d2a0dfc..8aba5c75 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -26,7 +26,6 @@ jobs: - run: | sudo apt update sudo apt install espeak git - # so we can take advantage of pyproject.toml build-dependency support - run: sudo pip install --upgrade pip - run: sudo pip install -e . - run: | @@ -34,14 +33,14 @@ jobs: cardboardlinter --refspec ${CIRCLE_BRANCH} -n auto - run: nosetests tests --nocapture - run: | - ./tests/test_server_package.sh - ./tests/test_glow-tts_train.sh - ./tests/test_server_package.sh - ./tests/test_tacotron_train.sh - ./tests/test_vocoder_gan_train.sh - ./tests/test_vocoder_wavegrad_train.sh - ./tests/test_vocoder_wavernn_train.sh - ./tests/test_speedy_speech_train.sh + sudo ./tests/test_server_package.sh + sudo ./tests/test_glow-tts_train.sh + sudo ./tests/test_server_package.sh + sudo ./tests/test_tacotron_train.sh + sudo ./tests/test_vocoder_gan_train.sh + sudo ./tests/test_vocoder_wavegrad_train.sh + sudo ./tests/test_vocoder_wavernn_train.sh + sudo ./tests/test_speedy_speech_train.sh test-3.7: <<: *test-template From afb7db2a1dec6d7a135e94a808e73b04f6135e7e Mon Sep 17 00:00:00 2001 From: Thorsten Mueller Date: Fri, 22 Jan 2021 16:22:50 +0100 Subject: [PATCH 30/44] Removed unneeded check and removed specific taco2 model name. --- TTS/server/server.py | 6 ++---- TTS/server/templates/details.html | 6 +++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/TTS/server/server.py b/TTS/server/server.py index f2412bb8..99d3a92e 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -73,9 +73,7 @@ def index(): @app.route('/details') def details(): - if args.tts_config is not None and os.path.isfile(args.tts_config): - taco2_config = load_config(args.tts_config) - + model_config = load_config(args.tts_config) if args.vocoder_config is not None and os.path.isfile(args.vocoder_config): vocoder_config = load_config(args.vocoder_config) else: @@ -83,7 +81,7 @@ def details(): return render_template('details.html', show_details=args.show_details - , taco2_config=taco2_config + , model_config=model_config , vocoder_config=vocoder_config , args=args.__dict__ ) diff --git a/TTS/server/templates/details.html b/TTS/server/templates/details.html index 2db60657..51c9ed85 100644 --- a/TTS/server/templates/details.html +++ b/TTS/server/templates/details.html @@ -62,10 +62,10 @@
- {% if taco2_config != None %} + {% if model_config != None %}
- Tacotron2 model config: + Model config: @@ -74,7 +74,7 @@ - {% for key, value in taco2_config.items() %} + {% for key, value in model_config.items() %} From 60c1bb93d9a424ab4881de9f2afc4edb848d4fc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Mon, 25 Jan 2021 11:16:20 +0100 Subject: [PATCH 31/44] fixes before first PyPI release --- README.md | 11 ++++++----- TTS/bin/synthesize.py | 6 +++++- TTS/server/README.md | 2 ++ setup.py | 39 ++++++++++++++++++++------------------- 4 files changed, 33 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 0738f2e9..76d0ab49 100644 --- a/README.md +++ b/README.md @@ -141,20 +141,21 @@ Some of the public datasets that we successfully applied TTS: ## Example: Synthesizing Speech on Terminal Using the Released Models. -TTS provides a CLI interface for synthesizing speech using pre-trained models. You can either use your own model or the release models under the TTS project. +After the installation, TTS provides a CLI interface for synthesizing speech using pre-trained models. You can either use your own model or the release models under the TTS project. Listing released TTS models. -```./TTS/bin/synthesize.py --list_models``` +```tts --list_models``` Run a tts and a vocoder model from the released model list. (Simply copy and paste the full model names from the list as arguments for the command below.) -```./TTS/bin/synthesize.py --text "Text for TTS" --model_name "///" --vocoder_name "///" --output_path``` +```tts --text "Text for TTS" --model_name "///" --vocoder_name "///" --output_path``` Run your own TTS model (Using Griffin-Lim Vocoder) -```./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav``` +```tts --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav``` Run your own TTS and Vocoder models -```./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav --vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json``` +```tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav --vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json``` +**Note:** You can use ```./TTS/bin/synthesize.py``` if you prefer running ```tts``` from the TTS project folder. ## Example: Training and Fine-tuning LJ-Speech Dataset Here you can find a [CoLab](https://gist.github.com/erogol/97516ad65b44dbddb8cd694953187c5b) notebook for a hands-on example, training LJSpeech. Or you can manually follow the guideline below. diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 64d3298b..18bd6ac1 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -23,7 +23,7 @@ def str2bool(v): raise argparse.ArgumentTypeError('Boolean value expected.') -if __name__ == "__main__": +def main(): # pylint: disable=bad-continuation parser = argparse.ArgumentParser(description='''Synthesize speech on command line.\n\n''' @@ -216,3 +216,7 @@ if __name__ == "__main__": out_path = os.path.join(args.out_path, file_name) print(" > Saving output to {}".format(out_path)) synthesizer.save_wav(wav, out_path) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/TTS/server/README.md b/TTS/server/README.md index cd885839..a8d8635a 100644 --- a/TTS/server/README.md +++ b/TTS/server/README.md @@ -9,6 +9,8 @@ Instructions below are based on a Ubuntu 18.04 machine, but it should be simple ##### Using server.py If you have the environment set already for TTS, then you can directly call ```server.py```. +**Note:** After installing TTS as a package you can use ```tts-server``` to call the commands below. + Examples runs: List officially released models. diff --git a/setup.py b/setup.py index e07dea09..a7804ad6 100644 --- a/setup.py +++ b/setup.py @@ -33,22 +33,8 @@ args, unknown_args = parser.parse_known_args() # Remove our arguments from argv so that setuptools doesn't see them sys.argv = [sys.argv[0]] + unknown_args -version = '0.0.9' - -# Adapted from https://github.com/pytorch/pytorch +version = '0.0.9a0' cwd = os.path.dirname(os.path.abspath(__file__)) -if os.getenv('TTS_PYTORCH_BUILD_VERSION'): - version = os.getenv('TTS_PYTORCH_BUILD_VERSION') -else: - try: - sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], - cwd=cwd).decode('ascii').strip() - version += '+' + sha[:7] - except subprocess.CalledProcessError: - pass - except IOError: # FileNotFoundError for python 3 - pass - # Handle Cython code def find_pyx(path='.'): @@ -108,9 +94,11 @@ def pip_install(package_name): reqs_from_file = open('requirements.txt').readlines() # reqs_without_tf = [r for r in reqs_from_file if not r.startswith('tensorflow')] # tf_req = [r for r in reqs_from_file if r.startswith('tensorflow')] - # requirements = {'install_requires': reqs_without_tf, 'pip_install': tf_req} +with open('README.md', "r", encoding="utf-8") as readme_file: + README = readme_file.read() + setup( name='TTS', version=version, @@ -118,8 +106,8 @@ setup( author='Eren Gölge', author_email='egolge@mozilla.com', description='Text to Speech with Deep Learning', + # long_description=README, license='MPL-2.0', - entry_points={'console_scripts': ['tts-server = TTS.server.server:main']}, ext_modules=find_cython_extensions(), packages=find_packages(include=['TTS*']), project_urls={ @@ -134,17 +122,30 @@ setup( }, install_requires=reqs_from_file, python_requires='>=3.6.0', + entry_points={ + 'console_scripts': [ + 'tts=TTS.bin.synthesize:main', + 'tts-server = TTS.server.server:main' + ] + }, classifiers=[ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", 'Development Status :: 3 - Alpha', - "Intended Audience :: Science/Research :: Developers", + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", "Operating System :: POSIX :: Linux", 'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)', - "Topic :: Software Development :: Libraries :: Python Modules :: Speech :: Sound/Audio :: Multimedia :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Multimedia :: Sound/Audio :: Speech", + "Topic :: Multimedia :: Sound/Audio", + "Topic :: Multimedia", + "Topic :: Scientific/Engineering :: Artificial Intelligence" ]) # for some reason having tensorflow in 'install_requires' From 57b668fd869c15395f0172c2dca972e3703afc69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Mon, 25 Jan 2021 13:06:12 +0100 Subject: [PATCH 32/44] fixing dome pypi issues --- MANIFEST.in | 4 ++++ README.md | 2 +- .models.json => TTS/.models.json | 0 TTS/bin/__init__.py | 0 setup.py | 23 +++++++---------------- 5 files changed, 12 insertions(+), 17 deletions(-) create mode 100644 MANIFEST.in rename .models.json => TTS/.models.json (100%) create mode 100644 TTS/bin/__init__.py diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..416867eb --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include README.md +include LICENSE.txt +include requirements.txt +include TTS/.models.json \ No newline at end of file diff --git a/README.md b/README.md index 76d0ab49..7dd17e43 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ Please use our dedicated channels for questions and discussion. Help is much mor You can also help us implement more models. Some TTS related work can be found [here](https://github.com/erogol/TTS-papers). ## Install TTS -TTS supports **python >= 3.6**. +TTS supports **python >= 3.6, <3.9**. ```pip install .``` or ```pip install -e .``` to keep your installation in your working directory. diff --git a/.models.json b/TTS/.models.json similarity index 100% rename from .models.json rename to TTS/.models.json diff --git a/TTS/bin/__init__.py b/TTS/bin/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/setup.py b/setup.py index a7804ad6..573230de 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ import subprocess import sys import numpy -from setuptools import setup, find_packages, Extension +from setuptools import setup, find_packages import setuptools.command.develop import setuptools.command.build_py @@ -33,7 +33,7 @@ args, unknown_args = parser.parse_known_args() # Remove our arguments from argv so that setuptools doesn't see them sys.argv = [sys.argv[0]] + unknown_args -version = '0.0.9a0' +version = '0.0.9a4' cwd = os.path.dirname(os.path.abspath(__file__)) # Handle Cython code @@ -91,11 +91,7 @@ def pip_install(package_name): subprocess.call([sys.executable, '-m', 'pip', 'install', package_name]) -reqs_from_file = open('requirements.txt').readlines() -# reqs_without_tf = [r for r in reqs_from_file if not r.startswith('tensorflow')] -# tf_req = [r for r in reqs_from_file if r.startswith('tensorflow')] -# requirements = {'install_requires': reqs_without_tf, 'pip_install': tf_req} - +requirements = open(os.path.join(cwd, 'requirements.txt'), 'r').readlines() with open('README.md', "r", encoding="utf-8") as readme_file: README = readme_file.read() @@ -106,7 +102,8 @@ setup( author='Eren Gölge', author_email='egolge@mozilla.com', description='Text to Speech with Deep Learning', - # long_description=README, + long_description=README, + long_description_content_type="text/markdown", license='MPL-2.0', ext_modules=find_cython_extensions(), packages=find_packages(include=['TTS*']), @@ -120,8 +117,8 @@ setup( 'build_py': build_py, 'develop': develop, }, - install_requires=reqs_from_file, - python_requires='>=3.6.0', + install_requires=requirements, + python_requires='>=3.6.0, <3.9', entry_points={ 'console_scripts': [ 'tts=TTS.bin.synthesize:main', @@ -147,9 +144,3 @@ setup( "Topic :: Multimedia", "Topic :: Scientific/Engineering :: Artificial Intelligence" ]) - -# for some reason having tensorflow in 'install_requires' -# breaks some of the dependencies. -# if 'bdist_wheel' not in unknown_args: -# for module in requirements['pip_install']: -# pip_install(module) From 82e029529e2a514ebfef249b6e0aee87d825eff1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Mon, 25 Jan 2021 13:27:54 +0100 Subject: [PATCH 33/44] fix manifest file --- Dockerfile | 20 -------------------- MANIFEST.in | 8 +++++++- TTS/bin/synthesize.py | 2 +- TTS/server/server.py | 2 +- setup.py | 3 ++- 5 files changed, 11 insertions(+), 24 deletions(-) delete mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 43f2e9e9..00000000 --- a/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM pytorch/pytorch:1.0.1-cuda10.0-cudnn7-runtime - -WORKDIR /srv/app - -RUN apt-get update && \ - apt-get install -y libsndfile1 espeak && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -# Copy Source later to enable dependency caching -COPY requirements.txt /srv/app/ -RUN pip install -r requirements.txt - -COPY . /srv/app - -# http://bugs.python.org/issue19846 -# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK. -ENV LANG C.UTF-8 - -CMD python3.6 server/server.py -c server/conf.json diff --git a/MANIFEST.in b/MANIFEST.in index 416867eb..346c1436 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,10 @@ include README.md include LICENSE.txt include requirements.txt -include TTS/.models.json \ No newline at end of file +recursive-include TTS *.json +recursive-include TTS *.html +recursive-include TTS *.png +recursive-include TTS *.md +recursive-include notebooks *.ipynb +recursive-include images *.png + diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 18bd6ac1..bb9339d5 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -143,7 +143,7 @@ def main(): args = parser.parse_args() # load model manager - path = Path(__file__).parent / "../../.models.json" + path = Path(__file__).parent / "../.models.json" manager = ModelManager(path) model_path = None diff --git a/TTS/server/server.py b/TTS/server/server.py index a89f4021..c579ff67 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -47,7 +47,7 @@ wavernn_config_file = os.path.join(embedded_wavernn_folder, 'config.json') args = create_argparser().parse_args() -path = Path(__file__).parent / "../../.models.json" +path = Path(__file__).parent / "../.models.json" manager = ModelManager(path) if args.list_models: diff --git a/setup.py b/setup.py index 573230de..5504007c 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ args, unknown_args = parser.parse_known_args() # Remove our arguments from argv so that setuptools doesn't see them sys.argv = [sys.argv[0]] + unknown_args -version = '0.0.9a4' +version = '0.0.9a9' cwd = os.path.dirname(os.path.abspath(__file__)) # Handle Cython code @@ -105,6 +105,7 @@ setup( long_description=README, long_description_content_type="text/markdown", license='MPL-2.0', + include_package_data = True, ext_modules=find_cython_extensions(), packages=find_packages(include=['TTS*']), project_urls={ From 877f0bbfba9c376a380cbfc38f8682d63ece8453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 02:56:55 +0100 Subject: [PATCH 34/44] manifest.in update --- MANIFEST.in | 3 ++- TTS/bin/synthesize.py | 4 ---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 346c1436..3b6f33ce 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,7 @@ recursive-include TTS *.json recursive-include TTS *.html recursive-include TTS *.png recursive-include TTS *.md -recursive-include notebooks *.ipynb +recursive-include TTS *.py +recursive-include TTS *.pyx recursive-include images *.png diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index bb9339d5..b7ccf850 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -150,10 +150,6 @@ def main(): config_path = None vocoder_path = None vocoder_config_path = None - model = None - vocoder_model = None - vocoder_config = None - vocoder_ap = None # CASE1: list pre-trained TTS models if args.list_models: From 660d61aeebb0a28e89b2e485a61f8949407e2e88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 02:57:07 +0100 Subject: [PATCH 35/44] maximum_path_numpy and CYTHON adabtable import --- .../glow_tts/monotonic_align/__init__.py | 56 ++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/TTS/tts/layers/glow_tts/monotonic_align/__init__.py b/TTS/tts/layers/glow_tts/monotonic_align/__init__.py index 267fb7f4..a2912a98 100644 --- a/TTS/tts/layers/glow_tts/monotonic_align/__init__.py +++ b/TTS/tts/layers/glow_tts/monotonic_align/__init__.py @@ -2,7 +2,13 @@ import numpy as np import torch from torch.nn import functional as F from TTS.tts.utils.generic_utils import sequence_mask -from TTS.tts.layers.glow_tts.monotonic_align.core import maximum_path_c + +try: + # TODO: fix pypi cython installation problem. + from TTS.tts.layers.glow_tts.monotonic_align.core import maximum_path_c + CYTHON = True +except ModuleNotFoundError: + CYTHON = False def convert_pad_shape(pad_shape): @@ -32,6 +38,12 @@ def generate_path(duration, mask): def maximum_path(value, mask): + if CYTHON: + return maximum_path_cython(value, mask) + return maximum_path_numpy(value, mask) + + +def maximum_path_cython(value, mask): """ Cython optimised version. value: [b, t_x, t_y] mask: [b, t_x, t_y] @@ -47,3 +59,45 @@ def maximum_path(value, mask): t_y_max = mask.sum(2)[:, 0].astype(np.int32) maximum_path_c(path, value, t_x_max, t_y_max) return torch.from_numpy(path).to(device=device, dtype=dtype) + + +def maximum_path_numpy(value, mask, max_neg_val=None): + """ + Monotonic alignment search algorithm + Numpy-friendly version. It's about 4 times faster than torch version. + value: [b, t_x, t_y] + mask: [b, t_x, t_y] + """ + if max_neg_val is None: + max_neg_val = -np.inf # Patch for Sphinx complaint + value = value * mask + + device = value.device + dtype = value.dtype + value = value.cpu().detach().numpy() + mask = mask.cpu().detach().numpy().astype(np.bool) + + b, t_x, t_y = value.shape + direction = np.zeros(value.shape, dtype=np.int64) + v = np.zeros((b, t_x), dtype=np.float32) + x_range = np.arange(t_x, dtype=np.float32).reshape(1, -1) + for j in range(t_y): + v0 = np.pad(v, [[0, 0], [1, 0]], mode="constant", constant_values=max_neg_val)[:, :-1] + v1 = v + max_mask = v1 >= v0 + v_max = np.where(max_mask, v1, v0) + direction[:, :, j] = max_mask + + index_mask = x_range <= j + v = np.where(index_mask, v_max + value[:, :, j], max_neg_val) + direction = np.where(mask, direction, 1) + + path = np.zeros(value.shape, dtype=np.float32) + index = mask[:, :, 0].sum(1).astype(np.int64) - 1 + index_range = np.arange(b) + for j in reversed(range(t_y)): + path[index_range, index, j] = 1 + index = index + direction[index_range, index, j] - 1 + path = path * mask.astype(np.float32) + path = torch.from_numpy(path).to(device=device, dtype=dtype) + return path From b464cab9b8a6a6d8fa7d320affc323f4736fbb44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 02:57:50 +0100 Subject: [PATCH 36/44] setup.py update and pylint fixes --- .../layers/glow_tts/monotonic_align/setup.py | 12 ++-- TTS/utils/manage.py | 12 ++-- setup.py | 59 ++++++++++--------- 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/TTS/tts/layers/glow_tts/monotonic_align/setup.py b/TTS/tts/layers/glow_tts/monotonic_align/setup.py index 1d669ea0..f22bc6a3 100644 --- a/TTS/tts/layers/glow_tts/monotonic_align/setup.py +++ b/TTS/tts/layers/glow_tts/monotonic_align/setup.py @@ -1,7 +1,7 @@ -from distutils.core import setup -from Cython.Build import cythonize -import numpy +# from distutils.core import setup +# from Cython.Build import cythonize +# import numpy -setup(name='monotonic_align', - ext_modules=cythonize("core.pyx"), - include_dirs=[numpy.get_include()]) +# setup(name='monotonic_align', +# ext_modules=cythonize("core.pyx"), +# include_dirs=[numpy.get_include()]) diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index 68bc92d2..c90d3931 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -51,7 +51,7 @@ class ModelManager(object): for model_type in self.models_dict: for lang in self.models_dict[model_type]: for dataset in self.models_dict[model_type][lang]: - for model in self.models_dict[model_type][lang][dataset]: + for model in self.models_dict[model_type][lang][dataset]: print(f" >: {model_type}/{lang}/{dataset}/{model} ") def download_model(self, model_name): @@ -66,9 +66,9 @@ class ModelManager(object): TODO: support multi-speaker models """ # fetch model info from the dict - type, lang, dataset, model = model_name.split("/") - model_full_name = f"{type}--{lang}--{dataset}--{model}" - model_item = self.models_dict[type][lang][dataset][model] + model_type, lang, dataset, model = model_name.split("/") + model_full_name = f"{model_type}--{lang}--{dataset}--{model}" + model_item = self.models_dict[model_type][lang][dataset][model] # set the model specific output path output_path = os.path.join(self.output_prefix, model_full_name) output_model_path = os.path.join(output_path, "model_file.pth.tar") @@ -93,8 +93,8 @@ class ModelManager(object): json.dump(config, jf) return output_model_path, output_config_path - def _download_file(self, id, output): - gdown.download(f"{self.url_prefix}{id}", output=output) + def _download_file(self, idx, output): + gdown.download(f"{self.url_prefix}{idx}", output=output) diff --git a/setup.py b/setup.py index 5504007c..7e070b6c 100644 --- a/setup.py +++ b/setup.py @@ -5,22 +5,16 @@ import os import shutil import subprocess import sys + import numpy - -from setuptools import setup, find_packages -import setuptools.command.develop import setuptools.command.build_py +import setuptools.command.develop -# handle import if cython is not already installed. -try: - from Cython.Build import cythonize -except ImportError: - # create closure for deferred import - def cythonize(*args, **kwargs): #pylint: disable=redefined-outer-name - from Cython.Build import cythonize #pylint: disable=redefined-outer-name, import-outside-toplevel - return cythonize(*args, **kwargs) - +from setuptools import find_packages, setup +from distutils.extension import Extension +from Cython.Build import cythonize +# parameters for wheeling server. parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False) parser.add_argument('--checkpoint', type=str, @@ -33,24 +27,24 @@ args, unknown_args = parser.parse_known_args() # Remove our arguments from argv so that setuptools doesn't see them sys.argv = [sys.argv[0]] + unknown_args -version = '0.0.9a9' +version = '0.0.9a10' cwd = os.path.dirname(os.path.abspath(__file__)) # Handle Cython code -def find_pyx(path='.'): - pyx_files = [] - for root, _, filenames in os.walk(path): - for fname in filenames: - if fname.endswith('.pyx'): - pyx_files.append(os.path.join(root, fname)) - return pyx_files +# def find_pyx(path='.'): +# pyx_files = [] +# for root, _, filenames in os.walk(path): +# for fname in filenames: +# if fname.endswith('.pyx'): +# pyx_files.append(os.path.join(root, fname)) +# return pyx_files -def find_cython_extensions(path="."): - exts = cythonize(find_pyx(path), language_level=3) - for ext in exts: - ext.include_dirs = [numpy.get_include()] - return exts +# def find_cython_extensions(path="."): +# exts = cythonize(find_pyx(path), language_level=3) +# for ext in exts: +# ext.include_dirs = [numpy.get_include()] +# return exts class build_py(setuptools.command.build_py.build_py): # pylint: disable=too-many-ancestors @@ -95,6 +89,8 @@ requirements = open(os.path.join(cwd, 'requirements.txt'), 'r').readlines() with open('README.md', "r", encoding="utf-8") as readme_file: README = readme_file.read() +exts = [Extension(name='TTS.tts.layers.glow_tts.monotonic_align.core', + sources=["TTS/tts/layers/glow_tts/monotonic_align/core.pyx"])] setup( name='TTS', version=version, @@ -105,8 +101,12 @@ setup( long_description=README, long_description_content_type="text/markdown", license='MPL-2.0', - include_package_data = True, - ext_modules=find_cython_extensions(), + # cython + include_dirs=numpy.get_include(), + ext_modules=cythonize(exts, language_level=3), + # ext_modules=find_cython_extensions(), + # package + include_package_data=True, packages=find_packages(include=['TTS*']), project_urls={ 'Documentation': 'https://github.com/mozilla/TTS/wiki', @@ -117,6 +117,7 @@ setup( cmdclass={ 'build_py': build_py, 'develop': develop, + # 'build_ext': build_ext }, install_requires=requirements, python_requires='>=3.6.0, <3.9', @@ -144,4 +145,6 @@ setup( "Topic :: Multimedia :: Sound/Audio", "Topic :: Multimedia", "Topic :: Scientific/Engineering :: Artificial Intelligence" - ]) + ], + zip_safe=False +) From 1358e1a3686a5b14bf289c478e8d44c1ccf55fb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 03:06:58 +0100 Subject: [PATCH 37/44] README.md update --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 7dd17e43..87863001 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,13 @@ You can also help us implement more models. Some TTS related work can be found [ ## Install TTS TTS supports **python >= 3.6, <3.9**. +If you are only interested in [synthesizing speech](https://github.com/mozilla/TTS/tree/dev#example-synthesizing-speech-on-terminal-using-the-released-models) with the released TTS models, installing from PyPI is the easiest option. + +```pip install TTS``` + +If you plan to code or train models, clone TTS and install it locally. + +```git clone https://github.com/mozilla/TTS``` ```pip install .``` or ```pip install -e .``` to keep your installation in your working directory. ## Directory Structure From 9a865c8f281c25375393152bd1c580b3a973a5ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 03:08:45 +0100 Subject: [PATCH 38/44] Update README.md --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 87863001..9e24d2c5 100644 --- a/README.md +++ b/README.md @@ -95,12 +95,16 @@ TTS supports **python >= 3.6, <3.9**. If you are only interested in [synthesizing speech](https://github.com/mozilla/TTS/tree/dev#example-synthesizing-speech-on-terminal-using-the-released-models) with the released TTS models, installing from PyPI is the easiest option. -```pip install TTS``` +``` +pip install TTS +``` If you plan to code or train models, clone TTS and install it locally. -```git clone https://github.com/mozilla/TTS``` -```pip install .``` or ```pip install -e .``` to keep your installation in your working directory. +``` +git clone https://github.com/mozilla/TTS +pip install -e . +``` ## Directory Structure ``` From 5c730aca9f116a64c2cd4558eebe52e32e742f35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 03:33:12 +0100 Subject: [PATCH 39/44] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9e24d2c5..5c2542b7 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ Please use our dedicated channels for questions and discussion. Help is much mor ## 🔗 Links and Resources | Type | Links | | ------------------------------- | --------------------------------------- | +| 💾 **Installation** | [TTS/README.md](https://github.com/mozilla/TTS/tree/dev#install-tts)| | 👩🏾‍🏫 **Tutorials and Examples** | [TTS/Wiki](https://github.com/mozilla/TTS/wiki/TTS-Notebooks-and-Tutorials) | | 🚀 **Released Models** | [TTS/Wiki](https://github.com/mozilla/TTS/wiki/Released-Models)| | 💻 **Docker Image** | [Repository by @synesthesiam](https://github.com/synesthesiam/docker-mozillatts)| From e80b1556df8a32a879e6464870d50544cefcabf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 11:59:11 +0100 Subject: [PATCH 40/44] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5c2542b7..4fac67a0 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # TTS: Text-to-Speech for all. -TTS is a library for advanced Text-to-Speech generation. It's built on the latest research, was designed to be achive the best trade-off among ease-of-training, speed and quality. +TTS is a library for advanced Text-to-Speech generation. It's built on the latest research, was designed toe achieve the best trade-off among ease-of-training, speed and quality. TTS comes with [pretrained models](https://github.com/mozilla/TTS/wiki/Released-Models), tools for measuring dataset quality and already used in **20+ languages** for products and research projects. [![CircleCI]()]() From c3f7bf2b954542c85bf97ffb77b89672ce6eff07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 12:07:18 +0100 Subject: [PATCH 41/44] remove python 3.9 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7e070b6c..7ab6a91d 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ cwd = os.path.dirname(os.path.abspath(__file__)) # exts = cythonize(find_pyx(path), language_level=3) # for ext in exts: # ext.include_dirs = [numpy.get_include()] + # return exts @@ -133,7 +134,6 @@ setup( "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", 'Development Status :: 3 - Alpha', "Intended Audience :: Science/Research", "Intended Audience :: Developers", From 0117c811a950480035fd8e2974cd8dae3fd42184 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 12:33:27 +0100 Subject: [PATCH 42/44] add a button to index.html to see the model details --- TTS/server/server.py | 2 +- TTS/server/templates/index.html | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/TTS/server/server.py b/TTS/server/server.py index e15e8880..1f7357af 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -81,7 +81,7 @@ app = Flask(__name__) @app.route('/') def index(): - return render_template('index.html') + return render_template('index.html', show_details=args.show_details) @app.route('/details') def details(): diff --git a/TTS/server/templates/index.html b/TTS/server/templates/index.html index 76a60825..8c3c631d 100644 --- a/TTS/server/templates/index.html +++ b/TTS/server/templates/index.html @@ -62,6 +62,9 @@

+ {%if show_details%} +

+ {%endif%}

From 4f32e77006ecee8920aaa679f962e40f7857fe4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 17:32:43 +0100 Subject: [PATCH 43/44] platform indep. way to fetch user data folder --- TTS/utils/generic_utils.py | 19 +++++++++++++++++++ TTS/utils/manage.py | 4 ++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index 7d7911b0..5890f04d 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -3,6 +3,8 @@ import glob import os import shutil import subprocess +import sys +from pathlib import Path import torch @@ -67,6 +69,22 @@ def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) +def get_user_data_dir(appname): + if sys.platform == "win32": + import winreg # pylint: disable=import-outside-toplevel + key = winreg.OpenKey( + winreg.HKEY_CURRENT_USER, + r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" + ) + dir_, _ = winreg.QueryValueEx(key, "Local AppData") + ans = Path(dir_).resolve(strict=False) + elif sys.platform == 'darwin': + ans = Path('~/Library/Application Support/').expanduser() + else: + ans = Path.home().joinpath('.local/share') + return ans.joinpath(appname) + + def set_init_dict(model_dict, checkpoint_state, c): # Partial initialization: if there is a mismatch with new and old layer, it is skipped. for k, v in checkpoint_state.items(): @@ -97,6 +115,7 @@ def set_init_dict(model_dict, checkpoint_state, c): len(model_dict))) return model_dict + class KeepAverage(): def __init__(self): self.avg_values = {} diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index c90d3931..25b3d797 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -4,7 +4,7 @@ from pathlib import Path import os from TTS.utils.io import load_config - +from TTS.utils.generic_utils import get_user_data_dir class ModelManager(object): """Manage TTS models defined in .models.json. @@ -19,7 +19,7 @@ class ModelManager(object): """ def __init__(self, models_file): super().__init__() - self.output_prefix = os.path.join(str(Path.home()), '.tts') + self.output_prefix = get_user_data_dir('tts') self.url_prefix = "https://drive.google.com/uc?id=" self.models_dict = None self.read_models_file(models_file) From 6c8d943ea87315ad600a5a6f58dd12d5cbcd8c6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Tue, 26 Jan 2021 17:38:11 +0100 Subject: [PATCH 44/44] version update --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7ab6a91d..6cc06f89 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ args, unknown_args = parser.parse_known_args() # Remove our arguments from argv so that setuptools doesn't see them sys.argv = [sys.argv[0]] + unknown_args -version = '0.0.9a10' +version = '0.0.9' cwd = os.path.dirname(os.path.abspath(__file__)) # Handle Cython code
{{ key }}