mirror of https://github.com/coqui-ai/TTS.git
replace unidecode with anyascii
This commit is contained in:
parent
e08c58db3b
commit
925c08cf95
|
@ -1,18 +1,6 @@
|
||||||
"""
|
|
||||||
Cleaners are transformations that run over the input text at both training and eval time.
|
|
||||||
|
|
||||||
Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
|
|
||||||
hyperparameter. Some cleaners are English-specific. You'll typically want to use:
|
|
||||||
1. "english_cleaners" for English text
|
|
||||||
2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
|
|
||||||
the Unidecode library (https://pypi.python.org/pypi/Unidecode)
|
|
||||||
3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
|
|
||||||
the symbols in symbols.py to match your data).
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from unidecode import unidecode
|
from anyascii import anyascii
|
||||||
|
|
||||||
from TTS.tts.utils.text.chinese_mandarin.numbers import replace_numbers_to_characters_in_text
|
from TTS.tts.utils.text.chinese_mandarin.numbers import replace_numbers_to_characters_in_text
|
||||||
|
|
||||||
|
@ -47,7 +35,7 @@ def collapse_whitespace(text):
|
||||||
|
|
||||||
|
|
||||||
def convert_to_ascii(text):
|
def convert_to_ascii(text):
|
||||||
return unidecode(text)
|
return anyascii(text)
|
||||||
|
|
||||||
|
|
||||||
def remove_aux_symbols(text):
|
def remove_aux_symbols(text):
|
||||||
|
|
|
@ -17,5 +17,5 @@ torch>=1.7
|
||||||
tqdm
|
tqdm
|
||||||
numba==0.52
|
numba==0.52
|
||||||
umap-learn==0.4.6
|
umap-learn==0.4.6
|
||||||
unidecode==0.4.20
|
anyascii
|
||||||
coqpit
|
coqpit
|
||||||
|
|
|
@ -17,7 +17,7 @@ config = GlowTTSConfig(
|
||||||
text_cleaner="english_cleaners",
|
text_cleaner="english_cleaners",
|
||||||
use_phonemes=True,
|
use_phonemes=True,
|
||||||
phoneme_language="zh-CN",
|
phoneme_language="zh-CN",
|
||||||
phoneme_cache_path='tests/data/ljspeech/phoneme_cache/',
|
phoneme_cache_path="tests/data/ljspeech/phoneme_cache/",
|
||||||
run_eval=True,
|
run_eval=True,
|
||||||
test_delay_epochs=-1,
|
test_delay_epochs=-1,
|
||||||
epochs=1,
|
epochs=1,
|
||||||
|
|
|
@ -17,7 +17,7 @@ config = SpeedySpeechConfig(
|
||||||
text_cleaner="english_cleaners",
|
text_cleaner="english_cleaners",
|
||||||
use_phonemes=True,
|
use_phonemes=True,
|
||||||
phoneme_language="zh-CN",
|
phoneme_language="zh-CN",
|
||||||
phoneme_cache_path='tests/data/ljspeech/phoneme_cache/',
|
phoneme_cache_path="tests/data/ljspeech/phoneme_cache/",
|
||||||
run_eval=True,
|
run_eval=True,
|
||||||
test_delay_epochs=-1,
|
test_delay_epochs=-1,
|
||||||
epochs=1,
|
epochs=1,
|
||||||
|
|
|
@ -19,6 +19,7 @@ config = MelganConfig(
|
||||||
seq_len=2048,
|
seq_len=2048,
|
||||||
eval_split_size=1,
|
eval_split_size=1,
|
||||||
print_step=1,
|
print_step=1,
|
||||||
|
discriminator_model_params={"base_channels": 16, "max_channels": 256, "downsample_factors": [4, 4, 4]},
|
||||||
print_eval=True,
|
print_eval=True,
|
||||||
data_path="tests/data/ljspeech",
|
data_path="tests/data/ljspeech",
|
||||||
output_path=output_path,
|
output_path=output_path,
|
||||||
|
|
Loading…
Reference in New Issue