mirror of https://github.com/coqui-ai/TTS.git
Make lint
This commit is contained in:
parent
30cfafce56
commit
c9972e6f14
|
@ -111,8 +111,8 @@ def load_tts_samples(
|
||||||
meta_data_eval_all += meta_data_eval
|
meta_data_eval_all += meta_data_eval
|
||||||
meta_data_train_all += meta_data_train
|
meta_data_train_all += meta_data_train
|
||||||
# load attention masks for the duration predictor training
|
# load attention masks for the duration predictor training
|
||||||
if dataset.meta_file_attn_mask:
|
if d.meta_file_attn_mask:
|
||||||
meta_data = dict(load_attention_mask_meta_data(dataset["meta_file_attn_mask"]))
|
meta_data = dict(load_attention_mask_meta_data(d["meta_file_attn_mask"]))
|
||||||
for idx, ins in enumerate(meta_data_train_all):
|
for idx, ins in enumerate(meta_data_train_all):
|
||||||
attn_file = meta_data[ins["audio_file"]].strip()
|
attn_file = meta_data[ins["audio_file"]].strip()
|
||||||
meta_data_train_all[idx].update({"alignment_file": attn_file})
|
meta_data_train_all[idx].update({"alignment_file": attn_file})
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import collections
|
import collections
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
from multiprocessing import Pool
|
|
||||||
from typing import Dict, List, Union
|
from typing import Dict, List, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -10,7 +9,6 @@ import tqdm
|
||||||
from torch.utils.data import Dataset
|
from torch.utils.data import Dataset
|
||||||
|
|
||||||
from TTS.tts.utils.data import prepare_data, prepare_stop_target, prepare_tensor
|
from TTS.tts.utils.data import prepare_data, prepare_stop_target, prepare_tensor
|
||||||
from TTS.tts.utils.text import TTSTokenizer
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
|
||||||
|
|
||||||
|
@ -183,7 +181,7 @@ class TTSDataset(Dataset):
|
||||||
def get_phonemes(self, idx, text):
|
def get_phonemes(self, idx, text):
|
||||||
out_dict = self.phoneme_dataset[idx]
|
out_dict = self.phoneme_dataset[idx]
|
||||||
assert text == out_dict["text"], f"{text} != {out_dict['text']}"
|
assert text == out_dict["text"], f"{text} != {out_dict['text']}"
|
||||||
assert out_dict["token_ids"].size > 0
|
assert len(out_dict["token_ids"]) > 0
|
||||||
return out_dict
|
return out_dict
|
||||||
|
|
||||||
def get_f0(self, idx):
|
def get_f0(self, idx):
|
||||||
|
@ -192,7 +190,8 @@ class TTSDataset(Dataset):
|
||||||
assert wav_file == out_dict["audio_file"]
|
assert wav_file == out_dict["audio_file"]
|
||||||
return out_dict
|
return out_dict
|
||||||
|
|
||||||
def get_attn_maks(self, attn_file):
|
@staticmethod
|
||||||
|
def get_attn_mask(attn_file):
|
||||||
return np.load(attn_file)
|
return np.load(attn_file)
|
||||||
|
|
||||||
def get_token_ids(self, idx, text):
|
def get_token_ids(self, idx, text):
|
||||||
|
@ -207,7 +206,7 @@ class TTSDataset(Dataset):
|
||||||
|
|
||||||
raw_text = item["text"]
|
raw_text = item["text"]
|
||||||
|
|
||||||
wav = np.asarray(self.load_wav(item[]), dtype=np.float32)
|
wav = np.asarray(self.load_wav(item["audio_file"]), dtype=np.float32)
|
||||||
|
|
||||||
# apply noise for augmentation
|
# apply noise for augmentation
|
||||||
if self.use_noise_augment:
|
if self.use_noise_augment:
|
||||||
|
@ -262,7 +261,7 @@ class TTSDataset(Dataset):
|
||||||
idxs = np.argsort(lengths) # ascending order
|
idxs = np.argsort(lengths) # ascending order
|
||||||
ignore_idx = []
|
ignore_idx = []
|
||||||
keep_idx = []
|
keep_idx = []
|
||||||
for i, idx in enumerate(idxs):
|
for idx in idxs:
|
||||||
length = lengths[idx]
|
length = lengths[idx]
|
||||||
if length < min_len or length > max_len:
|
if length < min_len or length > max_len:
|
||||||
ignore_idx.append(idx)
|
ignore_idx.append(idx)
|
||||||
|
@ -277,6 +276,7 @@ class TTSDataset(Dataset):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create_buckets(samples, batch_group_size: int):
|
def create_buckets(samples, batch_group_size: int):
|
||||||
|
assert batch_group_size > 0
|
||||||
for i in range(len(samples) // batch_group_size):
|
for i in range(len(samples) // batch_group_size):
|
||||||
offset = i * batch_group_size
|
offset = i * batch_group_size
|
||||||
end_offset = offset + batch_group_size
|
end_offset = offset + batch_group_size
|
||||||
|
@ -319,6 +319,7 @@ class TTSDataset(Dataset):
|
||||||
# shuffle batch groups
|
# shuffle batch groups
|
||||||
# create batches with similar length items
|
# create batches with similar length items
|
||||||
# the larger the `batch_group_size`, the higher the length variety in a batch.
|
# the larger the `batch_group_size`, the higher the length variety in a batch.
|
||||||
|
if self.batch_group_size > 0:
|
||||||
samples = self.create_buckets(samples, self.batch_group_size)
|
samples = self.create_buckets(samples, self.batch_group_size)
|
||||||
|
|
||||||
# update items to the new sorted items
|
# update items to the new sorted items
|
||||||
|
@ -571,6 +572,7 @@ class PhonemeDataset(Dataset):
|
||||||
|
|
||||||
We use pytorch dataloader because we are lazy.
|
We use pytorch dataloader because we are lazy.
|
||||||
"""
|
"""
|
||||||
|
print("[*] Pre-computing phonemes...")
|
||||||
with tqdm.tqdm(total=len(self)) as pbar:
|
with tqdm.tqdm(total=len(self)) as pbar:
|
||||||
batch_size = num_workers if num_workers > 0 else 1
|
batch_size = num_workers if num_workers > 0 else 1
|
||||||
dataloder = torch.utils.data.DataLoader(
|
dataloder = torch.utils.data.DataLoader(
|
||||||
|
@ -658,16 +660,21 @@ class F0Dataset:
|
||||||
return len(self.samples)
|
return len(self.samples)
|
||||||
|
|
||||||
def precompute(self, num_workers=0):
|
def precompute(self, num_workers=0):
|
||||||
|
print("[*] Pre-computing F0s...")
|
||||||
with tqdm.tqdm(total=len(self)) as pbar:
|
with tqdm.tqdm(total=len(self)) as pbar:
|
||||||
batch_size = num_workers if num_workers > 0 else 1
|
batch_size = num_workers if num_workers > 0 else 1
|
||||||
|
# we do not normalize at preproessing
|
||||||
|
normalize_f0 = self.normalize_f0
|
||||||
|
self.normalize_f0 = False
|
||||||
dataloder = torch.utils.data.DataLoader(
|
dataloder = torch.utils.data.DataLoader(
|
||||||
batch_size=batch_size, dataset=self, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn
|
batch_size=batch_size, dataset=self, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn
|
||||||
)
|
)
|
||||||
computed_data = []
|
computed_data = []
|
||||||
for batch in dataloder:
|
for batch in dataloder:
|
||||||
f0 = batch["f0"]
|
f0 = batch["f0"]
|
||||||
computed_data.append([f for f in f0])
|
computed_data.append(f for f in f0)
|
||||||
pbar.update(batch_size)
|
pbar.update(batch_size)
|
||||||
|
self.normalize_f0 = normalize_f0
|
||||||
|
|
||||||
if self.normalize_f0:
|
if self.normalize_f0:
|
||||||
computed_data = [tensor for batch in computed_data for tensor in batch] # flatten
|
computed_data = [tensor for batch in computed_data for tensor in batch] # flatten
|
||||||
|
@ -746,80 +753,80 @@ class F0Dataset:
|
||||||
print(f"{indent}| > Number of instances : {len(self.samples)}")
|
print(f"{indent}| > Number of instances : {len(self.samples)}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
from torch.utils.data import DataLoader
|
# from torch.utils.data import DataLoader
|
||||||
|
|
||||||
from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
|
# from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
|
||||||
from TTS.tts.datasets import load_tts_samples
|
# from TTS.tts.datasets import load_tts_samples
|
||||||
from TTS.tts.utils.text.characters import IPAPhonemes
|
# from TTS.tts.utils.text.characters import IPAPhonemes
|
||||||
from TTS.tts.utils.text.phonemizers import ESpeak
|
# from TTS.tts.utils.text.phonemizers import ESpeak
|
||||||
|
|
||||||
dataset_config = BaseDatasetConfig(
|
# dataset_config = BaseDatasetConfig(
|
||||||
name="ljspeech",
|
# name="ljspeech",
|
||||||
meta_file_train="metadata.csv",
|
# meta_file_train="metadata.csv",
|
||||||
path="/Users/erengolge/Projects/TTS/recipes/ljspeech/LJSpeech-1.1",
|
# path="/Users/erengolge/Projects/TTS/recipes/ljspeech/LJSpeech-1.1",
|
||||||
)
|
# )
|
||||||
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True)
|
# train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True)
|
||||||
samples = train_samples + eval_samples
|
# samples = train_samples + eval_samples
|
||||||
|
|
||||||
phonemizer = ESpeak(language="en-us")
|
# phonemizer = ESpeak(language="en-us")
|
||||||
tokenizer = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=phonemizer)
|
# tokenizer = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=phonemizer)
|
||||||
# ph_dataset = PhonemeDataset(samples, tokenizer, phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests")
|
# # ph_dataset = PhonemeDataset(samples, tokenizer, phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests")
|
||||||
# ph_dataset.precompute(num_workers=4)
|
# # ph_dataset.precompute(num_workers=4)
|
||||||
|
|
||||||
# dataloader = DataLoader(ph_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=ph_dataset.collate_fn)
|
# # dataloader = DataLoader(ph_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=ph_dataset.collate_fn)
|
||||||
|
# # for batch in dataloader:
|
||||||
|
# # print(batch)
|
||||||
|
# # break
|
||||||
|
|
||||||
|
# audio_config = BaseAudioConfig(
|
||||||
|
# sample_rate=22050,
|
||||||
|
# win_length=1024,
|
||||||
|
# hop_length=256,
|
||||||
|
# num_mels=80,
|
||||||
|
# preemphasis=0.0,
|
||||||
|
# ref_level_db=20,
|
||||||
|
# log_func="np.log",
|
||||||
|
# do_trim_silence=True,
|
||||||
|
# trim_db=45,
|
||||||
|
# mel_fmin=0,
|
||||||
|
# mel_fmax=8000,
|
||||||
|
# spec_gain=1.0,
|
||||||
|
# signal_norm=False,
|
||||||
|
# do_amp_to_db_linear=False,
|
||||||
|
# )
|
||||||
|
|
||||||
|
# ap = AudioProcessor.init_from_config(audio_config)
|
||||||
|
|
||||||
|
# # f0_dataset = F0Dataset(samples, ap, cache_path="/Users/erengolge/Projects/TTS/f0_tests", verbose=False, precompute_num_workers=4)
|
||||||
|
|
||||||
|
# # dataloader = DataLoader(f0_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=f0_dataset.collate_fn)
|
||||||
|
# # for batch in dataloader:
|
||||||
|
# # print(batch)
|
||||||
|
# # breakpoint()
|
||||||
|
# # break
|
||||||
|
|
||||||
|
# dataset = TTSDataset(
|
||||||
|
# outputs_per_step=1,
|
||||||
|
# compute_linear_spec=False,
|
||||||
|
# samples=samples,
|
||||||
|
# ap=ap,
|
||||||
|
# return_wav=False,
|
||||||
|
# batch_group_size=0,
|
||||||
|
# min_seq_len=0,
|
||||||
|
# max_seq_len=500,
|
||||||
|
# use_noise_augment=False,
|
||||||
|
# verbose=True,
|
||||||
|
# speaker_id_mapping=None,
|
||||||
|
# d_vector_mapping=None,
|
||||||
|
# compute_f0=True,
|
||||||
|
# f0_cache_path="/Users/erengolge/Projects/TTS/f0_tests",
|
||||||
|
# tokenizer=tokenizer,
|
||||||
|
# phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests",
|
||||||
|
# precompute_num_workers=4,
|
||||||
|
# )
|
||||||
|
|
||||||
|
# dataloader = DataLoader(dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=dataset.collate_fn)
|
||||||
# for batch in dataloader:
|
# for batch in dataloader:
|
||||||
# print(batch)
|
# print(batch)
|
||||||
# break
|
# break
|
||||||
|
|
||||||
audio_config = BaseAudioConfig(
|
|
||||||
sample_rate=22050,
|
|
||||||
win_length=1024,
|
|
||||||
hop_length=256,
|
|
||||||
num_mels=80,
|
|
||||||
preemphasis=0.0,
|
|
||||||
ref_level_db=20,
|
|
||||||
log_func="np.log",
|
|
||||||
do_trim_silence=True,
|
|
||||||
trim_db=45,
|
|
||||||
mel_fmin=0,
|
|
||||||
mel_fmax=8000,
|
|
||||||
spec_gain=1.0,
|
|
||||||
signal_norm=False,
|
|
||||||
do_amp_to_db_linear=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
ap = AudioProcessor.init_from_config(audio_config)
|
|
||||||
|
|
||||||
# f0_dataset = F0Dataset(samples, ap, cache_path="/Users/erengolge/Projects/TTS/f0_tests", verbose=False, precompute_num_workers=4)
|
|
||||||
|
|
||||||
# dataloader = DataLoader(f0_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=f0_dataset.collate_fn)
|
|
||||||
# for batch in dataloader:
|
|
||||||
# print(batch)
|
|
||||||
# breakpoint()
|
|
||||||
# break
|
|
||||||
|
|
||||||
dataset = TTSDataset(
|
|
||||||
outputs_per_step=1,
|
|
||||||
compute_linear_spec=False,
|
|
||||||
samples=samples,
|
|
||||||
ap=ap,
|
|
||||||
return_wav=False,
|
|
||||||
batch_group_size=0,
|
|
||||||
min_seq_len=0,
|
|
||||||
max_seq_len=500,
|
|
||||||
use_noise_augment=False,
|
|
||||||
verbose=True,
|
|
||||||
speaker_id_mapping=None,
|
|
||||||
d_vector_mapping=None,
|
|
||||||
compute_f0=True,
|
|
||||||
f0_cache_path="/Users/erengolge/Projects/TTS/f0_tests",
|
|
||||||
tokenizer=tokenizer,
|
|
||||||
phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests",
|
|
||||||
precompute_num_workers=4,
|
|
||||||
)
|
|
||||||
|
|
||||||
dataloader = DataLoader(dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=dataset.collate_fn)
|
|
||||||
for batch in dataloader:
|
|
||||||
print(batch)
|
|
||||||
break
|
|
||||||
|
|
|
@ -199,10 +199,10 @@ def synthesis(
|
||||||
wav = model_outputs.squeeze(0)
|
wav = model_outputs.squeeze(0)
|
||||||
else:
|
else:
|
||||||
if use_griffin_lim:
|
if use_griffin_lim:
|
||||||
wav = inv_spectrogram(model_outputs, ap, CONFIG)
|
wav = inv_spectrogram(model_outputs, model.ap, CONFIG)
|
||||||
# trim silence
|
# trim silence
|
||||||
if do_trim_silence:
|
if do_trim_silence:
|
||||||
wav = trim_silence(wav, ap)
|
wav = trim_silence(wav, model.ap)
|
||||||
return_dict = {
|
return_dict = {
|
||||||
"wav": wav,
|
"wav": wav,
|
||||||
"alignments": alignments,
|
"alignments": alignments,
|
||||||
|
|
|
@ -1,3 +1,8 @@
|
||||||
|
from dataclasses import replace
|
||||||
|
|
||||||
|
from TTS.tts.configs.shared_configs import CharactersConfig
|
||||||
|
|
||||||
|
|
||||||
def parse_symbols():
|
def parse_symbols():
|
||||||
return {
|
return {
|
||||||
"pad": _pad,
|
"pad": _pad,
|
||||||
|
@ -29,46 +34,49 @@ _diacrilics = "ɚ˞ɫ"
|
||||||
_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
|
_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
|
||||||
|
|
||||||
|
|
||||||
def create_graphemes(
|
# def create_graphemes(
|
||||||
characters=_characters,
|
# characters=_characters,
|
||||||
punctuations=_punctuations,
|
# punctuations=_punctuations,
|
||||||
pad=_pad,
|
# pad=_pad,
|
||||||
eos=_eos,
|
# eos=_eos,
|
||||||
bos=_bos,
|
# bos=_bos,
|
||||||
blank=_blank,
|
# blank=_blank,
|
||||||
unique=True,
|
# unique=True,
|
||||||
): # pylint: disable=redefined-outer-name
|
# ): # pylint: disable=redefined-outer-name
|
||||||
"""Function to create default characters and phonemes"""
|
# """Function to create default characters and phonemes"""
|
||||||
# create graphemes
|
# # create graphemes
|
||||||
_graphemes = list(characters)
|
# = (
|
||||||
_graphemes = [bos] + _graphemes if len(bos) > 0 and bos is not None else _graphemes
|
# sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
|
||||||
_graphemes = [eos] + _graphemes if len(bos) > 0 and eos is not None else _graphemes
|
# ) # this is to keep previous models compatible.
|
||||||
_graphemes = [pad] + _graphemes if len(bos) > 0 and pad is not None else _graphemes
|
# _graphemes = list(characters)
|
||||||
_graphemes = [blank] + _graphemes if len(bos) > 0 and blank is not None else _graphemes
|
# _graphemes = [bos] + _graphemes if len(bos) > 0 and bos is not None else _graphemes
|
||||||
_graphemes = _graphemes + list(punctuations)
|
# _graphemes = [eos] + _graphemes if len(bos) > 0 and eos is not None else _graphemes
|
||||||
return _graphemes, _phonemes
|
# _graphemes = [pad] + _graphemes if len(bos) > 0 and pad is not None else _graphemes
|
||||||
|
# _graphemes = [blank] + _graphemes if len(bos) > 0 and blank is not None else _graphemes
|
||||||
|
# _graphemes = _graphemes + list(punctuations)
|
||||||
|
# return _graphemes, _phonemes
|
||||||
|
|
||||||
|
|
||||||
def create_phonemes(
|
# def create_phonemes(
|
||||||
phonemes=_phonemes, punctuations=_punctuations, pad=_pad, eos=_eos, bos=_bos, blank=_blank, unique=True
|
# phonemes=_phonemes, punctuations=_punctuations, pad=_pad, eos=_eos, bos=_bos, blank=_blank, unique=True
|
||||||
):
|
# ):
|
||||||
# create phonemes
|
# # create phonemes
|
||||||
_phonemes = None
|
# _phonemes = None
|
||||||
_phonemes_sorted = (
|
# _phonemes_sorted = (
|
||||||
sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
|
# sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
|
||||||
) # this is to keep previous models compatible.
|
# ) # this is to keep previous models compatible.
|
||||||
_phonemes = list(_phonemes_sorted)
|
# _phonemes = list(_phonemes_sorted)
|
||||||
_phonemes = [bos] + _phonemes if len(bos) > 0 and bos is not None else _phonemes
|
# _phonemes = [bos] + _phonemes if len(bos) > 0 and bos is not None else _phonemes
|
||||||
_phonemes = [eos] + _phonemes if len(bos) > 0 and eos is not None else _phonemes
|
# _phonemes = [eos] + _phonemes if len(bos) > 0 and eos is not None else _phonemes
|
||||||
_phonemes = [pad] + _phonemes if len(bos) > 0 and pad is not None else _phonemes
|
# _phonemes = [pad] + _phonemes if len(bos) > 0 and pad is not None else _phonemes
|
||||||
_phonemes = [blank] + _phonemes if len(bos) > 0 and blank is not None else _phonemes
|
# _phonemes = [blank] + _phonemes if len(bos) > 0 and blank is not None else _phonemes
|
||||||
_phonemes = _phonemes + list(punctuations)
|
# _phonemes = _phonemes + list(punctuations)
|
||||||
_phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations)
|
# _phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations)
|
||||||
return _phonemes
|
# return _phonemes
|
||||||
|
|
||||||
|
|
||||||
graphemes = create_graphemes(_characters, _phonemes, _punctuations, _pad, _eos, _bos)
|
# DEF_GRAPHEMES = create_graphemes(_characters, _phonemes, _punctuations, _pad, _eos, _bos)
|
||||||
phonemes = create_phonemes(_phonemes, _punctuations, _pad, _eos, _bos, _blank)
|
# DEF_PHONEMES = create_phonemes(_phonemes, _punctuations, _pad, _eos, _bos, _blank)
|
||||||
|
|
||||||
|
|
||||||
class BaseCharacters:
|
class BaseCharacters:
|
||||||
|
@ -114,7 +122,7 @@ class BaseCharacters:
|
||||||
eos: str,
|
eos: str,
|
||||||
bos: str,
|
bos: str,
|
||||||
blank: str,
|
blank: str,
|
||||||
is_unique: bool = True,
|
is_unique: bool = False,
|
||||||
is_sorted: bool = True,
|
is_sorted: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._characters = characters
|
self._characters = characters
|
||||||
|
@ -202,14 +210,20 @@ class BaseCharacters:
|
||||||
_vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
|
_vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
|
||||||
self._vocab = _vocab + list(self._punctuations)
|
self._vocab = _vocab + list(self._punctuations)
|
||||||
self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
|
self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
|
||||||
self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
|
self._id_to_char = {
|
||||||
|
idx: char for idx, char in enumerate(self.vocab) # pylint: disable=unnecessary-comprehension
|
||||||
|
}
|
||||||
if self.is_unique:
|
if self.is_unique:
|
||||||
|
duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
|
||||||
assert (
|
assert (
|
||||||
len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
|
len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
|
||||||
), f" [!] There are duplicate characters in the character set. {set([x for x in self.vocab if self.vocab.count(x) > 1])}"
|
), f" [!] There are duplicate characters in the character set. {duplicates}"
|
||||||
|
|
||||||
def char_to_id(self, char: str) -> int:
|
def char_to_id(self, char: str) -> int:
|
||||||
|
try:
|
||||||
return self._char_to_id[char]
|
return self._char_to_id[char]
|
||||||
|
except KeyError as e:
|
||||||
|
raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
|
||||||
|
|
||||||
def id_to_char(self, idx: int) -> str:
|
def id_to_char(self, idx: int) -> str:
|
||||||
return self._id_to_char[idx]
|
return self._id_to_char[idx]
|
||||||
|
@ -229,9 +243,23 @@ class BaseCharacters:
|
||||||
print(f"{indent}| > Num chars: {self.num_chars}")
|
print(f"{indent}| > Num chars: {self.num_chars}")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def init_from_config(config: "Coqpit"):
|
def init_from_config(config: "Coqpit"): # pylint: disable=unused-argument
|
||||||
return BaseCharacters(
|
"""Init your character class from a config.
|
||||||
**config.characters if config.characters is not None else {},
|
|
||||||
|
Implement this method for your subclass.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
def to_config(self) -> "CharactersConfig":
|
||||||
|
return CharactersConfig(
|
||||||
|
characters=self._characters,
|
||||||
|
punctuations=self._punctuations,
|
||||||
|
pad=self._pad,
|
||||||
|
eos=self._eos,
|
||||||
|
bos=self._bos,
|
||||||
|
blank=self._blank,
|
||||||
|
is_unique=self.is_unique,
|
||||||
|
is_sorted=self.is_sorted,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -275,18 +303,24 @@ class IPAPhonemes(BaseCharacters):
|
||||||
eos: str = _eos,
|
eos: str = _eos,
|
||||||
bos: str = _bos,
|
bos: str = _bos,
|
||||||
blank: str = _blank,
|
blank: str = _blank,
|
||||||
is_unique: bool = True,
|
is_unique: bool = False,
|
||||||
is_sorted: bool = True,
|
is_sorted: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
|
super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def init_from_config(config: "Coqpit"):
|
def init_from_config(config: "Coqpit"):
|
||||||
|
"""Init a IPAPhonemes object from a model config
|
||||||
|
|
||||||
|
If characters are not defined in the config, it will be set to the default characters and the config
|
||||||
|
will be updated.
|
||||||
|
"""
|
||||||
# band-aid for compatibility with old models
|
# band-aid for compatibility with old models
|
||||||
if "characters" in config and config.characters is not None:
|
if "characters" in config and config.characters is not None:
|
||||||
if "phonemes" in config.characters and config.characters.phonemes is not None:
|
if "phonemes" in config.characters and config.characters.phonemes is not None:
|
||||||
config.characters["characters"] = config.characters["phonemes"]
|
config.characters["characters"] = config.characters["phonemes"]
|
||||||
return IPAPhonemes(
|
return (
|
||||||
|
IPAPhonemes(
|
||||||
characters=config.characters["characters"],
|
characters=config.characters["characters"],
|
||||||
punctuations=config.characters["punctuations"],
|
punctuations=config.characters["punctuations"],
|
||||||
pad=config.characters["pad"],
|
pad=config.characters["pad"],
|
||||||
|
@ -295,11 +329,16 @@ class IPAPhonemes(BaseCharacters):
|
||||||
blank=config.characters["blank"],
|
blank=config.characters["blank"],
|
||||||
is_unique=config.characters["is_unique"],
|
is_unique=config.characters["is_unique"],
|
||||||
is_sorted=config.characters["is_sorted"],
|
is_sorted=config.characters["is_sorted"],
|
||||||
|
),
|
||||||
|
config,
|
||||||
)
|
)
|
||||||
else:
|
# use character set from config
|
||||||
return IPAPhonemes(
|
if config.characters is not None:
|
||||||
**config.characters if config.characters is not None else {},
|
return IPAPhonemes(**config.characters), config
|
||||||
)
|
# return default character set
|
||||||
|
characters = IPAPhonemes()
|
||||||
|
new_config = replace(config, characters=characters.to_config())
|
||||||
|
return characters, new_config
|
||||||
|
|
||||||
|
|
||||||
class Graphemes(BaseCharacters):
|
class Graphemes(BaseCharacters):
|
||||||
|
@ -339,24 +378,42 @@ class Graphemes(BaseCharacters):
|
||||||
eos: str = _eos,
|
eos: str = _eos,
|
||||||
bos: str = _bos,
|
bos: str = _bos,
|
||||||
blank: str = _blank,
|
blank: str = _blank,
|
||||||
is_unique: bool = True,
|
is_unique: bool = False,
|
||||||
is_sorted: bool = True,
|
is_sorted: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
|
super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def init_from_config(config: "Coqpit"):
|
def init_from_config(config: "Coqpit"):
|
||||||
return Graphemes(
|
"""Init a Graphemes object from a model config
|
||||||
**config.characters if config.characters is not None else {},
|
|
||||||
|
If characters are not defined in the config, it will be set to the default characters and the config
|
||||||
|
will be updated.
|
||||||
|
"""
|
||||||
|
if config.characters is not None:
|
||||||
|
# band-aid for compatibility with old models
|
||||||
|
if "phonemes" in config.characters:
|
||||||
|
return (
|
||||||
|
Graphemes(
|
||||||
|
characters=config.characters["characters"],
|
||||||
|
punctuations=config.characters["punctuations"],
|
||||||
|
pad=config.characters["pad"],
|
||||||
|
eos=config.characters["eos"],
|
||||||
|
bos=config.characters["bos"],
|
||||||
|
blank=config.characters["blank"],
|
||||||
|
is_unique=config.characters["is_unique"],
|
||||||
|
is_sorted=config.characters["is_sorted"],
|
||||||
|
),
|
||||||
|
config,
|
||||||
)
|
)
|
||||||
|
return Graphemes(**config.characters), config
|
||||||
|
characters = Graphemes()
|
||||||
|
new_config = replace(config, characters=characters.to_config())
|
||||||
|
return characters, new_config
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
gr = Graphemes()
|
gr = Graphemes()
|
||||||
ph = IPAPhonemes()
|
ph = IPAPhonemes()
|
||||||
|
gr.print_log()
|
||||||
print(gr.vocab)
|
ph.print_log()
|
||||||
print(ph.vocab)
|
|
||||||
|
|
||||||
print(gr.num_chars)
|
|
||||||
assert "a" == gr.id_to_char(gr.char_to_id("a"))
|
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
import abc
|
import abc
|
||||||
import itertools
|
from typing import List, Tuple
|
||||||
from typing import List, Tuple, Union
|
|
||||||
|
|
||||||
from TTS.tts.utils.text.punctuation import Punctuation
|
from TTS.tts.utils.text.punctuation import Punctuation
|
||||||
|
|
||||||
|
@ -8,6 +7,19 @@ from TTS.tts.utils.text.punctuation import Punctuation
|
||||||
class BasePhonemizer(abc.ABC):
|
class BasePhonemizer(abc.ABC):
|
||||||
"""Base phonemizer class
|
"""Base phonemizer class
|
||||||
|
|
||||||
|
Phonemization follows the following steps:
|
||||||
|
1. Preprocessing:
|
||||||
|
- remove empty lines
|
||||||
|
- remove punctuation
|
||||||
|
- keep track of punctuation marks
|
||||||
|
|
||||||
|
2. Phonemization:
|
||||||
|
- convert text to phonemes
|
||||||
|
|
||||||
|
3. Postprocessing:
|
||||||
|
- join phonemes
|
||||||
|
- restore punctuation marks
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
language (str):
|
language (str):
|
||||||
Language used by the phonemizer.
|
Language used by the phonemizer.
|
||||||
|
@ -51,40 +63,30 @@ class BasePhonemizer(abc.ABC):
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def name():
|
def name():
|
||||||
"""The name of the backend"""
|
"""The name of the backend"""
|
||||||
|
...
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def is_available(cls):
|
def is_available(cls):
|
||||||
"""Returns True if the backend is installed, False otherwise"""
|
"""Returns True if the backend is installed, False otherwise"""
|
||||||
|
...
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def version(cls):
|
def version(cls):
|
||||||
"""Return the backend version as a tuple (major, minor, patch)"""
|
"""Return the backend version as a tuple (major, minor, patch)"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def supported_languages():
|
def supported_languages():
|
||||||
"""Return a dict of language codes -> name supported by the backend"""
|
"""Return a dict of language codes -> name supported by the backend"""
|
||||||
|
...
|
||||||
|
|
||||||
def is_supported_language(self, language):
|
def is_supported_language(self, language):
|
||||||
"""Returns True if `language` is supported by the backend"""
|
"""Returns True if `language` is supported by the backend"""
|
||||||
return language in self.supported_languages()
|
return language in self.supported_languages()
|
||||||
|
|
||||||
fr"""
|
|
||||||
Phonemization follows the following steps:
|
|
||||||
1. Preprocessing:
|
|
||||||
- remove empty lines
|
|
||||||
- remove punctuation
|
|
||||||
- keep track of punctuation marks
|
|
||||||
|
|
||||||
2. Phonemization:
|
|
||||||
- convert text to phonemes
|
|
||||||
|
|
||||||
3. Postprocessing:
|
|
||||||
- join phonemes
|
|
||||||
- restore punctuation marks
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def _phonemize(self, text, separator):
|
def _phonemize(self, text, separator):
|
||||||
"""The main phonemization method"""
|
"""The main phonemization method"""
|
||||||
|
|
|
@ -28,12 +28,13 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
|
||||||
"1", # UTF8 text encoding
|
"1", # UTF8 text encoding
|
||||||
]
|
]
|
||||||
cmd.extend(args)
|
cmd.extend(args)
|
||||||
logging.debug("espeakng: executing %s" % repr(cmd))
|
logging.debug("espeakng: executing %s", repr(cmd))
|
||||||
p = subprocess.Popen(
|
|
||||||
|
with subprocess.Popen(
|
||||||
cmd,
|
cmd,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.STDOUT,
|
stderr=subprocess.STDOUT,
|
||||||
)
|
) as p:
|
||||||
res = iter(p.stdout.readline, b"")
|
res = iter(p.stdout.readline, b"")
|
||||||
if not sync:
|
if not sync:
|
||||||
p.stdout.close()
|
p.stdout.close()
|
||||||
|
@ -85,7 +86,24 @@ class ESpeak(BasePhonemizer):
|
||||||
def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
|
def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
|
||||||
if self._ESPEAK_LIB is None:
|
if self._ESPEAK_LIB is None:
|
||||||
raise Exception("Unknown backend: %s" % backend)
|
raise Exception("Unknown backend: %s" % backend)
|
||||||
|
|
||||||
|
# band-aid for backwards compatibility
|
||||||
|
if language == "en":
|
||||||
|
language = "en-us"
|
||||||
|
|
||||||
super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs)
|
super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs)
|
||||||
|
if backend is not None:
|
||||||
|
self.backend = backend
|
||||||
|
|
||||||
|
@property
|
||||||
|
def backend(self):
|
||||||
|
return self._ESPEAK_LIB
|
||||||
|
|
||||||
|
@backend.setter
|
||||||
|
def backend(self, backend):
|
||||||
|
if backend not in ["espeak", "espeak-ng"]:
|
||||||
|
raise Exception("Unknown backend: %s" % backend)
|
||||||
|
self._ESPEAK_LIB = backend
|
||||||
|
|
||||||
def auto_set_espeak_lib(self) -> None:
|
def auto_set_espeak_lib(self) -> None:
|
||||||
if is_tool("espeak-ng"):
|
if is_tool("espeak-ng"):
|
||||||
|
@ -115,24 +133,25 @@ class ESpeak(BasePhonemizer):
|
||||||
# espeak and espeak-ng parses `ipa` differently
|
# espeak and espeak-ng parses `ipa` differently
|
||||||
if tie:
|
if tie:
|
||||||
# use '͡' between phonemes
|
# use '͡' between phonemes
|
||||||
if _DEF_ESPEAK_LIB == "espeak":
|
if self.backend == "espeak":
|
||||||
args.append("--ipa=1")
|
args.append("--ipa=1")
|
||||||
else:
|
else:
|
||||||
args.append("--ipa=3")
|
args.append("--ipa=3")
|
||||||
else:
|
else:
|
||||||
# split with '_'
|
# split with '_'
|
||||||
if _DEF_ESPEAK_LIB == "espeak":
|
if self.backend == "espeak":
|
||||||
args.append("--ipa=3")
|
args.append("--ipa=3")
|
||||||
else:
|
else:
|
||||||
args.append("--ipa=1")
|
args.append("--ipa=1")
|
||||||
if tie:
|
if tie:
|
||||||
args.append("--tie=%s" % tie)
|
args.append("--tie=%s" % tie)
|
||||||
|
|
||||||
args.append('"' + text + '"')
|
args.append('"' + text + '"')
|
||||||
# compute phonemes
|
# compute phonemes
|
||||||
phonemes = ""
|
phonemes = ""
|
||||||
for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True):
|
for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True):
|
||||||
logging.debug("line: %s" % repr(line))
|
logging.debug("line: %s", repr(line))
|
||||||
phonemes += line.decode("utf8").strip()
|
phonemes += line.decode("utf8").strip()[2:] # skip two redundant characters
|
||||||
return phonemes.replace("_", separator)
|
return phonemes.replace("_", separator)
|
||||||
|
|
||||||
def _phonemize(self, text, separator=None):
|
def _phonemize(self, text, separator=None):
|
||||||
|
@ -146,7 +165,7 @@ class ESpeak(BasePhonemizer):
|
||||||
Dict: Dictionary of language codes.
|
Dict: Dictionary of language codes.
|
||||||
"""
|
"""
|
||||||
if _DEF_ESPEAK_LIB is None:
|
if _DEF_ESPEAK_LIB is None:
|
||||||
raise {}
|
return {}
|
||||||
args = ["--voices"]
|
args = ["--voices"]
|
||||||
langs = {}
|
langs = {}
|
||||||
count = 0
|
count = 0
|
||||||
|
@ -157,7 +176,7 @@ class ESpeak(BasePhonemizer):
|
||||||
lang_code = cols[1]
|
lang_code = cols[1]
|
||||||
lang_name = cols[3]
|
lang_name = cols[3]
|
||||||
langs[lang_code] = lang_name
|
langs[lang_code] = lang_name
|
||||||
logging.debug("line: %s" % repr(line))
|
logging.debug("line: %s", repr(line))
|
||||||
count += 1
|
count += 1
|
||||||
return langs
|
return langs
|
||||||
|
|
||||||
|
@ -168,9 +187,9 @@ class ESpeak(BasePhonemizer):
|
||||||
str: Version of the used backend.
|
str: Version of the used backend.
|
||||||
"""
|
"""
|
||||||
args = ["--version"]
|
args = ["--version"]
|
||||||
for line in _espeak_exe(_DEF_ESPEAK_LIB, args, sync=True):
|
for line in _espeak_exe(self.backend, args, sync=True):
|
||||||
version = line.decode("utf8").strip().split()[2]
|
version = line.decode("utf8").strip().split()[2]
|
||||||
logging.debug("line: %s" % repr(line))
|
logging.debug("line: %s", repr(line))
|
||||||
return version
|
return version
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import importlib
|
import importlib
|
||||||
from os import stat
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import gruut
|
import gruut
|
||||||
|
@ -55,7 +54,7 @@ class Gruut(BasePhonemizer):
|
||||||
def name():
|
def name():
|
||||||
return "gruut"
|
return "gruut"
|
||||||
|
|
||||||
def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str:
|
def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str: # pylint: disable=unused-argument
|
||||||
"""Convert input text to phonemes.
|
"""Convert input text to phonemes.
|
||||||
|
|
||||||
Gruut phonemizes the given `str` by seperating each phoneme character with `separator`, even for characters
|
Gruut phonemizes the given `str` by seperating each phoneme character with `separator`, even for characters
|
||||||
|
|
|
@ -30,7 +30,7 @@ class JA_JP_Phonemizer(BasePhonemizer):
|
||||||
|
|
||||||
language = "ja-jp"
|
language = "ja-jp"
|
||||||
|
|
||||||
def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwargs):
|
def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwargs): # pylint: disable=unused-argument
|
||||||
super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
|
super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -61,12 +61,12 @@ class JA_JP_Phonemizer(BasePhonemizer):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
text = "これは、電話をかけるための私の日本語の例のテキストです。"
|
# text = "これは、電話をかけるための私の日本語の例のテキストです。"
|
||||||
e = JA_JP_Phonemizer()
|
# e = JA_JP_Phonemizer()
|
||||||
print(e.supported_languages())
|
# print(e.supported_languages())
|
||||||
print(e.version())
|
# print(e.version())
|
||||||
print(e.language)
|
# print(e.language)
|
||||||
print(e.name())
|
# print(e.name())
|
||||||
print(e.is_available())
|
# print(e.is_available())
|
||||||
print("`" + e.phonemize(text) + "`")
|
# print("`" + e.phonemize(text) + "`")
|
||||||
|
|
|
@ -17,7 +17,7 @@ class MultiPhonemizer:
|
||||||
lang_to_phonemizer_name = DEF_LANG_TO_PHONEMIZER
|
lang_to_phonemizer_name = DEF_LANG_TO_PHONEMIZER
|
||||||
language = "multi-lingual"
|
language = "multi-lingual"
|
||||||
|
|
||||||
def __init__(self, custom_lang_to_phonemizer: Dict = {}) -> None:
|
def __init__(self, custom_lang_to_phonemizer: Dict = {}) -> None: # pylint: disable=dangerous-default-value
|
||||||
self.lang_to_phonemizer_name.update(custom_lang_to_phonemizer)
|
self.lang_to_phonemizer_name.update(custom_lang_to_phonemizer)
|
||||||
self.lang_to_phonemizer = self.init_phonemizers(self.lang_to_phonemizer_name)
|
self.lang_to_phonemizer = self.init_phonemizers(self.lang_to_phonemizer_name)
|
||||||
|
|
||||||
|
@ -40,16 +40,16 @@ class MultiPhonemizer:
|
||||||
return list(self.lang_to_phonemizer_name.keys())
|
return list(self.lang_to_phonemizer_name.keys())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
texts = {
|
# texts = {
|
||||||
"tr": "Merhaba, bu Türkçe bit örnek!",
|
# "tr": "Merhaba, bu Türkçe bit örnek!",
|
||||||
"en-us": "Hello, this is English example!",
|
# "en-us": "Hello, this is English example!",
|
||||||
"de": "Hallo, das ist ein Deutches Beipiel!",
|
# "de": "Hallo, das ist ein Deutches Beipiel!",
|
||||||
"zh-cn": "这是中国的例子",
|
# "zh-cn": "这是中国的例子",
|
||||||
}
|
# }
|
||||||
phonemes = {}
|
# phonemes = {}
|
||||||
ph = MultiPhonemizer()
|
# ph = MultiPhonemizer()
|
||||||
for lang, text in texts.items():
|
# for lang, text in texts.items():
|
||||||
phoneme = ph.phonemize(text, lang)
|
# phoneme = ph.phonemize(text, lang)
|
||||||
phonemes[lang] = phoneme
|
# phonemes[lang] = phoneme
|
||||||
print(phonemes)
|
# print(phonemes)
|
||||||
|
|
|
@ -25,14 +25,15 @@ class ZH_CN_Phonemizer(BasePhonemizer):
|
||||||
|
|
||||||
language = "zh-cn"
|
language = "zh-cn"
|
||||||
|
|
||||||
def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs):
|
def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs): # pylint: disable=unused-argument
|
||||||
super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
|
super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def name():
|
def name():
|
||||||
return "zh_cn_phonemizer"
|
return "zh_cn_phonemizer"
|
||||||
|
|
||||||
def phonemize_zh_cn(self, text: str, separator: str = "|") -> str:
|
@staticmethod
|
||||||
|
def phonemize_zh_cn(text: str, separator: str = "|") -> str:
|
||||||
ph = chinese_text_to_phonemes(text, separator)
|
ph = chinese_text_to_phonemes(text, separator)
|
||||||
return ph
|
return ph
|
||||||
|
|
||||||
|
@ -50,12 +51,12 @@ class ZH_CN_Phonemizer(BasePhonemizer):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
text = "这是,样本中文。"
|
# text = "这是,样本中文。"
|
||||||
e = ZH_CN_Phonemizer()
|
# e = ZH_CN_Phonemizer()
|
||||||
print(e.supported_languages())
|
# print(e.supported_languages())
|
||||||
print(e.version())
|
# print(e.version())
|
||||||
print(e.language)
|
# print(e.language)
|
||||||
print(e.name())
|
# print(e.name())
|
||||||
print(e.is_available())
|
# print(e.is_available())
|
||||||
print("`" + e.phonemize(text) + "`")
|
# print("`" + e.phonemize(text) + "`")
|
||||||
|
|
|
@ -130,7 +130,7 @@ class Punctuation:
|
||||||
return cls._restore(text, puncs, 0)
|
return cls._restore(text, puncs, 0)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _restore(cls, text, puncs, num):
|
def _restore(cls, text, puncs, num): # pylint: disable=too-many-return-statements
|
||||||
"""Auxiliary method for Punctuation.restore()"""
|
"""Auxiliary method for Punctuation.restore()"""
|
||||||
if not puncs:
|
if not puncs:
|
||||||
return text
|
return text
|
||||||
|
@ -159,14 +159,14 @@ class Punctuation:
|
||||||
return cls._restore([text[0] + current.punc + text[1]] + text[2:], puncs[1:], num)
|
return cls._restore([text[0] + current.punc + text[1]] + text[2:], puncs[1:], num)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
# if __name__ == "__main__":
|
||||||
punc = Punctuation()
|
# punc = Punctuation()
|
||||||
text = "This is. This is, example!"
|
# text = "This is. This is, example!"
|
||||||
|
|
||||||
print(punc.strip(text))
|
# print(punc.strip(text))
|
||||||
|
|
||||||
split_text, puncs = punc.strip_to_restore(text)
|
# split_text, puncs = punc.strip_to_restore(text)
|
||||||
print(split_text, " ---- ", puncs)
|
# print(split_text, " ---- ", puncs)
|
||||||
|
|
||||||
restored_text = punc.restore(split_text, puncs)
|
# restored_text = punc.restore(split_text, puncs)
|
||||||
print(restored_text)
|
# print(restored_text)
|
||||||
|
|
|
@ -383,7 +383,6 @@ class AudioProcessor(object):
|
||||||
def init_from_config(config: "Coqpit"):
|
def init_from_config(config: "Coqpit"):
|
||||||
if "audio" in config:
|
if "audio" in config:
|
||||||
return AudioProcessor(**config.audio)
|
return AudioProcessor(**config.audio)
|
||||||
else:
|
|
||||||
return AudioProcessor(**config)
|
return AudioProcessor(**config)
|
||||||
|
|
||||||
### setting up the parameters ###
|
### setting up the parameters ###
|
||||||
|
|
|
@ -13,7 +13,6 @@ from TTS.tts.utils.speakers import SpeakerManager
|
||||||
# pylint: disable=unused-wildcard-import
|
# pylint: disable=unused-wildcard-import
|
||||||
# pylint: disable=wildcard-import
|
# pylint: disable=wildcard-import
|
||||||
from TTS.tts.utils.synthesis import synthesis, trim_silence
|
from TTS.tts.utils.synthesis import synthesis, trim_silence
|
||||||
from TTS.tts.utils.text import TTSTokenizer
|
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.vocoder.models import setup_model as setup_vocoder_model
|
from TTS.vocoder.models import setup_model as setup_vocoder_model
|
||||||
from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input
|
from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input
|
||||||
|
|
|
@ -314,7 +314,7 @@ class GAN(BaseVocoder):
|
||||||
data_items: List,
|
data_items: List,
|
||||||
verbose: bool,
|
verbose: bool,
|
||||||
num_gpus: int,
|
num_gpus: int,
|
||||||
rank: int = 0, # pylint: disable=unused-argument
|
rank: int = None, # pylint: disable=unused-argument
|
||||||
):
|
):
|
||||||
"""Initiate and return the GAN dataloader.
|
"""Initiate and return the GAN dataloader.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue