Make lint

This commit is contained in:
Eren Gölge 2021-12-07 12:51:58 +00:00
parent 30cfafce56
commit c9972e6f14
14 changed files with 319 additions and 236 deletions

View File

@ -111,8 +111,8 @@ def load_tts_samples(
meta_data_eval_all += meta_data_eval
meta_data_train_all += meta_data_train
# load attention masks for the duration predictor training
if dataset.meta_file_attn_mask:
meta_data = dict(load_attention_mask_meta_data(dataset["meta_file_attn_mask"]))
if d.meta_file_attn_mask:
meta_data = dict(load_attention_mask_meta_data(d["meta_file_attn_mask"]))
for idx, ins in enumerate(meta_data_train_all):
attn_file = meta_data[ins["audio_file"]].strip()
meta_data_train_all[idx].update({"alignment_file": attn_file})

View File

@ -1,7 +1,6 @@
import collections
import os
import random
from multiprocessing import Pool
from typing import Dict, List, Union
import numpy as np
@ -10,7 +9,6 @@ import tqdm
from torch.utils.data import Dataset
from TTS.tts.utils.data import prepare_data, prepare_stop_target, prepare_tensor
from TTS.tts.utils.text import TTSTokenizer
from TTS.utils.audio import AudioProcessor
@ -183,7 +181,7 @@ class TTSDataset(Dataset):
def get_phonemes(self, idx, text):
out_dict = self.phoneme_dataset[idx]
assert text == out_dict["text"], f"{text} != {out_dict['text']}"
assert out_dict["token_ids"].size > 0
assert len(out_dict["token_ids"]) > 0
return out_dict
def get_f0(self, idx):
@ -192,7 +190,8 @@ class TTSDataset(Dataset):
assert wav_file == out_dict["audio_file"]
return out_dict
def get_attn_maks(self, attn_file):
@staticmethod
def get_attn_mask(attn_file):
return np.load(attn_file)
def get_token_ids(self, idx, text):
@ -207,7 +206,7 @@ class TTSDataset(Dataset):
raw_text = item["text"]
wav = np.asarray(self.load_wav(item[]), dtype=np.float32)
wav = np.asarray(self.load_wav(item["audio_file"]), dtype=np.float32)
# apply noise for augmentation
if self.use_noise_augment:
@ -262,7 +261,7 @@ class TTSDataset(Dataset):
idxs = np.argsort(lengths) # ascending order
ignore_idx = []
keep_idx = []
for i, idx in enumerate(idxs):
for idx in idxs:
length = lengths[idx]
if length < min_len or length > max_len:
ignore_idx.append(idx)
@ -277,6 +276,7 @@ class TTSDataset(Dataset):
@staticmethod
def create_buckets(samples, batch_group_size: int):
assert batch_group_size > 0
for i in range(len(samples) // batch_group_size):
offset = i * batch_group_size
end_offset = offset + batch_group_size
@ -319,7 +319,8 @@ class TTSDataset(Dataset):
# shuffle batch groups
# create batches with similar length items
# the larger the `batch_group_size`, the higher the length variety in a batch.
samples = self.create_buckets(samples, self.batch_group_size)
if self.batch_group_size > 0:
samples = self.create_buckets(samples, self.batch_group_size)
# update items to the new sorted items
self.samples = samples
@ -571,6 +572,7 @@ class PhonemeDataset(Dataset):
We use pytorch dataloader because we are lazy.
"""
print("[*] Pre-computing phonemes...")
with tqdm.tqdm(total=len(self)) as pbar:
batch_size = num_workers if num_workers > 0 else 1
dataloder = torch.utils.data.DataLoader(
@ -658,16 +660,21 @@ class F0Dataset:
return len(self.samples)
def precompute(self, num_workers=0):
print("[*] Pre-computing F0s...")
with tqdm.tqdm(total=len(self)) as pbar:
batch_size = num_workers if num_workers > 0 else 1
# we do not normalize at preproessing
normalize_f0 = self.normalize_f0
self.normalize_f0 = False
dataloder = torch.utils.data.DataLoader(
batch_size=batch_size, dataset=self, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn
)
computed_data = []
for batch in dataloder:
f0 = batch["f0"]
computed_data.append([f for f in f0])
computed_data.append(f for f in f0)
pbar.update(batch_size)
self.normalize_f0 = normalize_f0
if self.normalize_f0:
computed_data = [tensor for batch in computed_data for tensor in batch] # flatten
@ -746,80 +753,80 @@ class F0Dataset:
print(f"{indent}| > Number of instances : {len(self.samples)}")
if __name__ == "__main__":
from torch.utils.data import DataLoader
# if __name__ == "__main__":
# from torch.utils.data import DataLoader
from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.text.characters import IPAPhonemes
from TTS.tts.utils.text.phonemizers import ESpeak
# from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
# from TTS.tts.datasets import load_tts_samples
# from TTS.tts.utils.text.characters import IPAPhonemes
# from TTS.tts.utils.text.phonemizers import ESpeak
dataset_config = BaseDatasetConfig(
name="ljspeech",
meta_file_train="metadata.csv",
path="/Users/erengolge/Projects/TTS/recipes/ljspeech/LJSpeech-1.1",
)
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True)
samples = train_samples + eval_samples
# dataset_config = BaseDatasetConfig(
# name="ljspeech",
# meta_file_train="metadata.csv",
# path="/Users/erengolge/Projects/TTS/recipes/ljspeech/LJSpeech-1.1",
# )
# train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True)
# samples = train_samples + eval_samples
phonemizer = ESpeak(language="en-us")
tokenizer = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=phonemizer)
# ph_dataset = PhonemeDataset(samples, tokenizer, phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests")
# ph_dataset.precompute(num_workers=4)
# phonemizer = ESpeak(language="en-us")
# tokenizer = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=phonemizer)
# # ph_dataset = PhonemeDataset(samples, tokenizer, phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests")
# # ph_dataset.precompute(num_workers=4)
# dataloader = DataLoader(ph_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=ph_dataset.collate_fn)
# for batch in dataloader:
# print(batch)
# break
# # dataloader = DataLoader(ph_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=ph_dataset.collate_fn)
# # for batch in dataloader:
# # print(batch)
# # break
audio_config = BaseAudioConfig(
sample_rate=22050,
win_length=1024,
hop_length=256,
num_mels=80,
preemphasis=0.0,
ref_level_db=20,
log_func="np.log",
do_trim_silence=True,
trim_db=45,
mel_fmin=0,
mel_fmax=8000,
spec_gain=1.0,
signal_norm=False,
do_amp_to_db_linear=False,
)
# audio_config = BaseAudioConfig(
# sample_rate=22050,
# win_length=1024,
# hop_length=256,
# num_mels=80,
# preemphasis=0.0,
# ref_level_db=20,
# log_func="np.log",
# do_trim_silence=True,
# trim_db=45,
# mel_fmin=0,
# mel_fmax=8000,
# spec_gain=1.0,
# signal_norm=False,
# do_amp_to_db_linear=False,
# )
ap = AudioProcessor.init_from_config(audio_config)
# ap = AudioProcessor.init_from_config(audio_config)
# f0_dataset = F0Dataset(samples, ap, cache_path="/Users/erengolge/Projects/TTS/f0_tests", verbose=False, precompute_num_workers=4)
# # f0_dataset = F0Dataset(samples, ap, cache_path="/Users/erengolge/Projects/TTS/f0_tests", verbose=False, precompute_num_workers=4)
# dataloader = DataLoader(f0_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=f0_dataset.collate_fn)
# for batch in dataloader:
# print(batch)
# breakpoint()
# break
# # dataloader = DataLoader(f0_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=f0_dataset.collate_fn)
# # for batch in dataloader:
# # print(batch)
# # breakpoint()
# # break
dataset = TTSDataset(
outputs_per_step=1,
compute_linear_spec=False,
samples=samples,
ap=ap,
return_wav=False,
batch_group_size=0,
min_seq_len=0,
max_seq_len=500,
use_noise_augment=False,
verbose=True,
speaker_id_mapping=None,
d_vector_mapping=None,
compute_f0=True,
f0_cache_path="/Users/erengolge/Projects/TTS/f0_tests",
tokenizer=tokenizer,
phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests",
precompute_num_workers=4,
)
# dataset = TTSDataset(
# outputs_per_step=1,
# compute_linear_spec=False,
# samples=samples,
# ap=ap,
# return_wav=False,
# batch_group_size=0,
# min_seq_len=0,
# max_seq_len=500,
# use_noise_augment=False,
# verbose=True,
# speaker_id_mapping=None,
# d_vector_mapping=None,
# compute_f0=True,
# f0_cache_path="/Users/erengolge/Projects/TTS/f0_tests",
# tokenizer=tokenizer,
# phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests",
# precompute_num_workers=4,
# )
dataloader = DataLoader(dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=dataset.collate_fn)
for batch in dataloader:
print(batch)
break
# dataloader = DataLoader(dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=dataset.collate_fn)
# for batch in dataloader:
# print(batch)
# break

View File

@ -199,10 +199,10 @@ def synthesis(
wav = model_outputs.squeeze(0)
else:
if use_griffin_lim:
wav = inv_spectrogram(model_outputs, ap, CONFIG)
wav = inv_spectrogram(model_outputs, model.ap, CONFIG)
# trim silence
if do_trim_silence:
wav = trim_silence(wav, ap)
wav = trim_silence(wav, model.ap)
return_dict = {
"wav": wav,
"alignments": alignments,

View File

@ -1,3 +1,8 @@
from dataclasses import replace
from TTS.tts.configs.shared_configs import CharactersConfig
def parse_symbols():
return {
"pad": _pad,
@ -29,46 +34,49 @@ _diacrilics = "ɚ˞ɫ"
_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
def create_graphemes(
characters=_characters,
punctuations=_punctuations,
pad=_pad,
eos=_eos,
bos=_bos,
blank=_blank,
unique=True,
): # pylint: disable=redefined-outer-name
"""Function to create default characters and phonemes"""
# create graphemes
_graphemes = list(characters)
_graphemes = [bos] + _graphemes if len(bos) > 0 and bos is not None else _graphemes
_graphemes = [eos] + _graphemes if len(bos) > 0 and eos is not None else _graphemes
_graphemes = [pad] + _graphemes if len(bos) > 0 and pad is not None else _graphemes
_graphemes = [blank] + _graphemes if len(bos) > 0 and blank is not None else _graphemes
_graphemes = _graphemes + list(punctuations)
return _graphemes, _phonemes
# def create_graphemes(
# characters=_characters,
# punctuations=_punctuations,
# pad=_pad,
# eos=_eos,
# bos=_bos,
# blank=_blank,
# unique=True,
# ): # pylint: disable=redefined-outer-name
# """Function to create default characters and phonemes"""
# # create graphemes
# = (
# sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
# ) # this is to keep previous models compatible.
# _graphemes = list(characters)
# _graphemes = [bos] + _graphemes if len(bos) > 0 and bos is not None else _graphemes
# _graphemes = [eos] + _graphemes if len(bos) > 0 and eos is not None else _graphemes
# _graphemes = [pad] + _graphemes if len(bos) > 0 and pad is not None else _graphemes
# _graphemes = [blank] + _graphemes if len(bos) > 0 and blank is not None else _graphemes
# _graphemes = _graphemes + list(punctuations)
# return _graphemes, _phonemes
def create_phonemes(
phonemes=_phonemes, punctuations=_punctuations, pad=_pad, eos=_eos, bos=_bos, blank=_blank, unique=True
):
# create phonemes
_phonemes = None
_phonemes_sorted = (
sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
) # this is to keep previous models compatible.
_phonemes = list(_phonemes_sorted)
_phonemes = [bos] + _phonemes if len(bos) > 0 and bos is not None else _phonemes
_phonemes = [eos] + _phonemes if len(bos) > 0 and eos is not None else _phonemes
_phonemes = [pad] + _phonemes if len(bos) > 0 and pad is not None else _phonemes
_phonemes = [blank] + _phonemes if len(bos) > 0 and blank is not None else _phonemes
_phonemes = _phonemes + list(punctuations)
_phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations)
return _phonemes
# def create_phonemes(
# phonemes=_phonemes, punctuations=_punctuations, pad=_pad, eos=_eos, bos=_bos, blank=_blank, unique=True
# ):
# # create phonemes
# _phonemes = None
# _phonemes_sorted = (
# sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
# ) # this is to keep previous models compatible.
# _phonemes = list(_phonemes_sorted)
# _phonemes = [bos] + _phonemes if len(bos) > 0 and bos is not None else _phonemes
# _phonemes = [eos] + _phonemes if len(bos) > 0 and eos is not None else _phonemes
# _phonemes = [pad] + _phonemes if len(bos) > 0 and pad is not None else _phonemes
# _phonemes = [blank] + _phonemes if len(bos) > 0 and blank is not None else _phonemes
# _phonemes = _phonemes + list(punctuations)
# _phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations)
# return _phonemes
graphemes = create_graphemes(_characters, _phonemes, _punctuations, _pad, _eos, _bos)
phonemes = create_phonemes(_phonemes, _punctuations, _pad, _eos, _bos, _blank)
# DEF_GRAPHEMES = create_graphemes(_characters, _phonemes, _punctuations, _pad, _eos, _bos)
# DEF_PHONEMES = create_phonemes(_phonemes, _punctuations, _pad, _eos, _bos, _blank)
class BaseCharacters:
@ -114,7 +122,7 @@ class BaseCharacters:
eos: str,
bos: str,
blank: str,
is_unique: bool = True,
is_unique: bool = False,
is_sorted: bool = True,
) -> None:
self._characters = characters
@ -202,14 +210,20 @@ class BaseCharacters:
_vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
self._vocab = _vocab + list(self._punctuations)
self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
self._id_to_char = {
idx: char for idx, char in enumerate(self.vocab) # pylint: disable=unnecessary-comprehension
}
if self.is_unique:
duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
assert (
len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
), f" [!] There are duplicate characters in the character set. {set([x for x in self.vocab if self.vocab.count(x) > 1])}"
), f" [!] There are duplicate characters in the character set. {duplicates}"
def char_to_id(self, char: str) -> int:
return self._char_to_id[char]
try:
return self._char_to_id[char]
except KeyError as e:
raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
def id_to_char(self, idx: int) -> str:
return self._id_to_char[idx]
@ -229,9 +243,23 @@ class BaseCharacters:
print(f"{indent}| > Num chars: {self.num_chars}")
@staticmethod
def init_from_config(config: "Coqpit"):
return BaseCharacters(
**config.characters if config.characters is not None else {},
def init_from_config(config: "Coqpit"): # pylint: disable=unused-argument
"""Init your character class from a config.
Implement this method for your subclass.
"""
...
def to_config(self) -> "CharactersConfig":
return CharactersConfig(
characters=self._characters,
punctuations=self._punctuations,
pad=self._pad,
eos=self._eos,
bos=self._bos,
blank=self._blank,
is_unique=self.is_unique,
is_sorted=self.is_sorted,
)
@ -275,31 +303,42 @@ class IPAPhonemes(BaseCharacters):
eos: str = _eos,
bos: str = _bos,
blank: str = _blank,
is_unique: bool = True,
is_unique: bool = False,
is_sorted: bool = True,
) -> None:
super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
@staticmethod
def init_from_config(config: "Coqpit"):
"""Init a IPAPhonemes object from a model config
If characters are not defined in the config, it will be set to the default characters and the config
will be updated.
"""
# band-aid for compatibility with old models
if "characters" in config and config.characters is not None:
if "phonemes" in config.characters and config.characters.phonemes is not None:
config.characters["characters"] = config.characters["phonemes"]
return IPAPhonemes(
characters=config.characters["characters"],
punctuations=config.characters["punctuations"],
pad=config.characters["pad"],
eos=config.characters["eos"],
bos=config.characters["bos"],
blank=config.characters["blank"],
is_unique=config.characters["is_unique"],
is_sorted=config.characters["is_sorted"],
)
else:
return IPAPhonemes(
**config.characters if config.characters is not None else {},
return (
IPAPhonemes(
characters=config.characters["characters"],
punctuations=config.characters["punctuations"],
pad=config.characters["pad"],
eos=config.characters["eos"],
bos=config.characters["bos"],
blank=config.characters["blank"],
is_unique=config.characters["is_unique"],
is_sorted=config.characters["is_sorted"],
),
config,
)
# use character set from config
if config.characters is not None:
return IPAPhonemes(**config.characters), config
# return default character set
characters = IPAPhonemes()
new_config = replace(config, characters=characters.to_config())
return characters, new_config
class Graphemes(BaseCharacters):
@ -339,24 +378,42 @@ class Graphemes(BaseCharacters):
eos: str = _eos,
bos: str = _bos,
blank: str = _blank,
is_unique: bool = True,
is_unique: bool = False,
is_sorted: bool = True,
) -> None:
super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
@staticmethod
def init_from_config(config: "Coqpit"):
return Graphemes(
**config.characters if config.characters is not None else {},
)
"""Init a Graphemes object from a model config
If characters are not defined in the config, it will be set to the default characters and the config
will be updated.
"""
if config.characters is not None:
# band-aid for compatibility with old models
if "phonemes" in config.characters:
return (
Graphemes(
characters=config.characters["characters"],
punctuations=config.characters["punctuations"],
pad=config.characters["pad"],
eos=config.characters["eos"],
bos=config.characters["bos"],
blank=config.characters["blank"],
is_unique=config.characters["is_unique"],
is_sorted=config.characters["is_sorted"],
),
config,
)
return Graphemes(**config.characters), config
characters = Graphemes()
new_config = replace(config, characters=characters.to_config())
return characters, new_config
if __name__ == "__main__":
gr = Graphemes()
ph = IPAPhonemes()
print(gr.vocab)
print(ph.vocab)
print(gr.num_chars)
assert "a" == gr.id_to_char(gr.char_to_id("a"))
gr.print_log()
ph.print_log()

View File

@ -1,6 +1,5 @@
import abc
import itertools
from typing import List, Tuple, Union
from typing import List, Tuple
from TTS.tts.utils.text.punctuation import Punctuation
@ -8,6 +7,19 @@ from TTS.tts.utils.text.punctuation import Punctuation
class BasePhonemizer(abc.ABC):
"""Base phonemizer class
Phonemization follows the following steps:
1. Preprocessing:
- remove empty lines
- remove punctuation
- keep track of punctuation marks
2. Phonemization:
- convert text to phonemes
3. Postprocessing:
- join phonemes
- restore punctuation marks
Args:
language (str):
Language used by the phonemizer.
@ -51,40 +63,30 @@ class BasePhonemizer(abc.ABC):
@abc.abstractmethod
def name():
"""The name of the backend"""
...
@classmethod
@abc.abstractmethod
def is_available(cls):
"""Returns True if the backend is installed, False otherwise"""
...
@classmethod
@abc.abstractmethod
def version(cls):
"""Return the backend version as a tuple (major, minor, patch)"""
...
@staticmethod
@abc.abstractmethod
def supported_languages():
"""Return a dict of language codes -> name supported by the backend"""
...
def is_supported_language(self, language):
"""Returns True if `language` is supported by the backend"""
return language in self.supported_languages()
fr"""
Phonemization follows the following steps:
1. Preprocessing:
- remove empty lines
- remove punctuation
- keep track of punctuation marks
2. Phonemization:
- convert text to phonemes
3. Postprocessing:
- join phonemes
- restore punctuation marks
"""
@abc.abstractmethod
def _phonemize(self, text, separator):
"""The main phonemization method"""

View File

@ -28,29 +28,30 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
"1", # UTF8 text encoding
]
cmd.extend(args)
logging.debug("espeakng: executing %s" % repr(cmd))
p = subprocess.Popen(
logging.debug("espeakng: executing %s", repr(cmd))
with subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
res = iter(p.stdout.readline, b"")
if not sync:
) as p:
res = iter(p.stdout.readline, b"")
if not sync:
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
return res
res2 = []
for line in res:
res2.append(line)
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
return res
res2 = []
for line in res:
res2.append(line)
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
p.wait()
p.wait()
return res2
@ -85,7 +86,24 @@ class ESpeak(BasePhonemizer):
def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
if self._ESPEAK_LIB is None:
raise Exception("Unknown backend: %s" % backend)
# band-aid for backwards compatibility
if language == "en":
language = "en-us"
super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs)
if backend is not None:
self.backend = backend
@property
def backend(self):
return self._ESPEAK_LIB
@backend.setter
def backend(self, backend):
if backend not in ["espeak", "espeak-ng"]:
raise Exception("Unknown backend: %s" % backend)
self._ESPEAK_LIB = backend
def auto_set_espeak_lib(self) -> None:
if is_tool("espeak-ng"):
@ -115,24 +133,25 @@ class ESpeak(BasePhonemizer):
# espeak and espeak-ng parses `ipa` differently
if tie:
# use '͡' between phonemes
if _DEF_ESPEAK_LIB == "espeak":
if self.backend == "espeak":
args.append("--ipa=1")
else:
args.append("--ipa=3")
else:
# split with '_'
if _DEF_ESPEAK_LIB == "espeak":
if self.backend == "espeak":
args.append("--ipa=3")
else:
args.append("--ipa=1")
if tie:
args.append("--tie=%s" % tie)
args.append('"' + text + '"')
# compute phonemes
phonemes = ""
for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True):
logging.debug("line: %s" % repr(line))
phonemes += line.decode("utf8").strip()
logging.debug("line: %s", repr(line))
phonemes += line.decode("utf8").strip()[2:] # skip two redundant characters
return phonemes.replace("_", separator)
def _phonemize(self, text, separator=None):
@ -146,7 +165,7 @@ class ESpeak(BasePhonemizer):
Dict: Dictionary of language codes.
"""
if _DEF_ESPEAK_LIB is None:
raise {}
return {}
args = ["--voices"]
langs = {}
count = 0
@ -157,7 +176,7 @@ class ESpeak(BasePhonemizer):
lang_code = cols[1]
lang_name = cols[3]
langs[lang_code] = lang_name
logging.debug("line: %s" % repr(line))
logging.debug("line: %s", repr(line))
count += 1
return langs
@ -168,9 +187,9 @@ class ESpeak(BasePhonemizer):
str: Version of the used backend.
"""
args = ["--version"]
for line in _espeak_exe(_DEF_ESPEAK_LIB, args, sync=True):
for line in _espeak_exe(self.backend, args, sync=True):
version = line.decode("utf8").strip().split()[2]
logging.debug("line: %s" % repr(line))
logging.debug("line: %s", repr(line))
return version
@classmethod

View File

@ -1,5 +1,4 @@
import importlib
from os import stat
from typing import List
import gruut
@ -55,7 +54,7 @@ class Gruut(BasePhonemizer):
def name():
return "gruut"
def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str:
def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str: # pylint: disable=unused-argument
"""Convert input text to phonemes.
Gruut phonemizes the given `str` by seperating each phoneme character with `separator`, even for characters

View File

@ -30,7 +30,7 @@ class JA_JP_Phonemizer(BasePhonemizer):
language = "ja-jp"
def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwargs):
def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwargs): # pylint: disable=unused-argument
super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
@staticmethod
@ -61,12 +61,12 @@ class JA_JP_Phonemizer(BasePhonemizer):
return True
if __name__ == "__main__":
text = "これは、電話をかけるための私の日本語の例のテキストです。"
e = JA_JP_Phonemizer()
print(e.supported_languages())
print(e.version())
print(e.language)
print(e.name())
print(e.is_available())
print("`" + e.phonemize(text) + "`")
# if __name__ == "__main__":
# text = "これは、電話をかけるための私の日本語の例のテキストです。"
# e = JA_JP_Phonemizer()
# print(e.supported_languages())
# print(e.version())
# print(e.language)
# print(e.name())
# print(e.is_available())
# print("`" + e.phonemize(text) + "`")

View File

@ -17,7 +17,7 @@ class MultiPhonemizer:
lang_to_phonemizer_name = DEF_LANG_TO_PHONEMIZER
language = "multi-lingual"
def __init__(self, custom_lang_to_phonemizer: Dict = {}) -> None:
def __init__(self, custom_lang_to_phonemizer: Dict = {}) -> None: # pylint: disable=dangerous-default-value
self.lang_to_phonemizer_name.update(custom_lang_to_phonemizer)
self.lang_to_phonemizer = self.init_phonemizers(self.lang_to_phonemizer_name)
@ -40,16 +40,16 @@ class MultiPhonemizer:
return list(self.lang_to_phonemizer_name.keys())
if __name__ == "__main__":
texts = {
"tr": "Merhaba, bu Türkçe bit örnek!",
"en-us": "Hello, this is English example!",
"de": "Hallo, das ist ein Deutches Beipiel!",
"zh-cn": "这是中国的例子",
}
phonemes = {}
ph = MultiPhonemizer()
for lang, text in texts.items():
phoneme = ph.phonemize(text, lang)
phonemes[lang] = phoneme
print(phonemes)
# if __name__ == "__main__":
# texts = {
# "tr": "Merhaba, bu Türkçe bit örnek!",
# "en-us": "Hello, this is English example!",
# "de": "Hallo, das ist ein Deutches Beipiel!",
# "zh-cn": "这是中国的例子",
# }
# phonemes = {}
# ph = MultiPhonemizer()
# for lang, text in texts.items():
# phoneme = ph.phonemize(text, lang)
# phonemes[lang] = phoneme
# print(phonemes)

View File

@ -25,14 +25,15 @@ class ZH_CN_Phonemizer(BasePhonemizer):
language = "zh-cn"
def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs):
def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs): # pylint: disable=unused-argument
super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
@staticmethod
def name():
return "zh_cn_phonemizer"
def phonemize_zh_cn(self, text: str, separator: str = "|") -> str:
@staticmethod
def phonemize_zh_cn(text: str, separator: str = "|") -> str:
ph = chinese_text_to_phonemes(text, separator)
return ph
@ -50,12 +51,12 @@ class ZH_CN_Phonemizer(BasePhonemizer):
return True
if __name__ == "__main__":
text = "这是,样本中文。"
e = ZH_CN_Phonemizer()
print(e.supported_languages())
print(e.version())
print(e.language)
print(e.name())
print(e.is_available())
print("`" + e.phonemize(text) + "`")
# if __name__ == "__main__":
# text = "这是,样本中文。"
# e = ZH_CN_Phonemizer()
# print(e.supported_languages())
# print(e.version())
# print(e.language)
# print(e.name())
# print(e.is_available())
# print("`" + e.phonemize(text) + "`")

View File

@ -130,7 +130,7 @@ class Punctuation:
return cls._restore(text, puncs, 0)
@classmethod
def _restore(cls, text, puncs, num):
def _restore(cls, text, puncs, num): # pylint: disable=too-many-return-statements
"""Auxiliary method for Punctuation.restore()"""
if not puncs:
return text
@ -159,14 +159,14 @@ class Punctuation:
return cls._restore([text[0] + current.punc + text[1]] + text[2:], puncs[1:], num)
if __name__ == "__main__":
punc = Punctuation()
text = "This is. This is, example!"
# if __name__ == "__main__":
# punc = Punctuation()
# text = "This is. This is, example!"
print(punc.strip(text))
# print(punc.strip(text))
split_text, puncs = punc.strip_to_restore(text)
print(split_text, " ---- ", puncs)
# split_text, puncs = punc.strip_to_restore(text)
# print(split_text, " ---- ", puncs)
restored_text = punc.restore(split_text, puncs)
print(restored_text)
# restored_text = punc.restore(split_text, puncs)
# print(restored_text)

View File

@ -383,8 +383,7 @@ class AudioProcessor(object):
def init_from_config(config: "Coqpit"):
if "audio" in config:
return AudioProcessor(**config.audio)
else:
return AudioProcessor(**config)
return AudioProcessor(**config)
### setting up the parameters ###
def _build_mel_basis(

View File

@ -13,7 +13,6 @@ from TTS.tts.utils.speakers import SpeakerManager
# pylint: disable=unused-wildcard-import
# pylint: disable=wildcard-import
from TTS.tts.utils.synthesis import synthesis, trim_silence
from TTS.tts.utils.text import TTSTokenizer
from TTS.utils.audio import AudioProcessor
from TTS.vocoder.models import setup_model as setup_vocoder_model
from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input

View File

@ -314,7 +314,7 @@ class GAN(BaseVocoder):
data_items: List,
verbose: bool,
num_gpus: int,
rank: int = 0, # pylint: disable=unused-argument
rank: int = None, # pylint: disable=unused-argument
):
"""Initiate and return the GAN dataloader.