From b4c82685a73f136fb8ecc0ca2da33eacae31ac29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Mon, 24 Jul 2023 12:33:05 +0200 Subject: [PATCH 1/3] Add model entries --- TTS/.models.json | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/TTS/.models.json b/TTS/.models.json index 69ac7514..02873e7b 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -715,6 +715,18 @@ "license": "Apache 2.0" } } + }, + "be": { + "common-voice": { + "glow-tts":{ + "description": "Belarusian GlowTTS model created by @alex73 (Github).", + "hf_url":"", + "default_vocoder": "vocoder_models/be/common-voice/hifigan", + "commit": "c0aabb85", + "license": "CC-BY-SA 4.0", + "contact": "alex73mail@gmail.com" + } + } } }, "vocoder_models": { @@ -866,6 +878,17 @@ "commit": null } } + }, + "be": { + "common-voice": { + "hifigan": { + "hf_url": "https://huggingface.co/coqui/hifigan-be", + "description": "Belarusian HiFiGAN model created by @alex73 (Github).", + "author": "@alex73", + "license": "CC-BY-SA 4.0", + "commit": "c0aabb85" + } + } } }, "voice_conversion_models": { From 562a9509f253a7a40d8769940a94096aacbb3fc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Mon, 4 Sep 2023 13:57:03 +0200 Subject: [PATCH 2/3] Add BE model --- TTS/.models.json | 4 ++-- TTS/utils/manage.py | 15 +++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/TTS/.models.json b/TTS/.models.json index 02873e7b..c39c39fc 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -720,7 +720,7 @@ "common-voice": { "glow-tts":{ "description": "Belarusian GlowTTS model created by @alex73 (Github).", - "hf_url":"", + "github_rls_url":"https://coqui.gateway.scarf.sh/v0.16.6/tts_models--be--common-voice--glow-tts.zip", "default_vocoder": "vocoder_models/be/common-voice/hifigan", "commit": "c0aabb85", "license": "CC-BY-SA 4.0", @@ -882,7 +882,7 @@ "be": { "common-voice": { "hifigan": { - "hf_url": "https://huggingface.co/coqui/hifigan-be", + "github_rls_url": "https://coqui.gateway.scarf.sh/v0.16.6/vocoder_models--be--common-voice--hifigan.zip", "description": "Belarusian HiFiGAN model created by @alex73 (Github).", "author": "@alex73", "license": "CC-BY-SA 4.0", diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index 70d35228..be393adb 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -468,13 +468,16 @@ class ModelManager(object): print(f" > Error: Bad zip file - {file_url}") raise zipfile.BadZipFile # pylint: disable=raise-missing-from # move the files to the outer path - for file_path in z.namelist()[1:]: + for file_path in z.namelist(): src_path = os.path.join(output_folder, file_path) - dst_path = os.path.join(output_folder, os.path.basename(file_path)) - if src_path != dst_path: - copyfile(src_path, dst_path) - # remove the extracted folder - rmtree(os.path.join(output_folder, z.namelist()[0])) + if os.path.isfile(src_path): + dst_path = os.path.join(output_folder, os.path.basename(file_path)) + if src_path != dst_path: + copyfile(src_path, dst_path) + # remove redundant (hidden or not) folders + for file_path in z.namelist(): + if os.path.isdir(os.path.join(output_folder, file_path)): + rmtree(os.path.join(output_folder, file_path)) @staticmethod def _download_tar_file(file_url, output_folder, progress_bar): From 9533f8656cc93ce6fb103d18cb8cf2f8fc0f22bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Mon, 4 Sep 2023 13:58:37 +0200 Subject: [PATCH 3/3] Make style --- TTS/bin/synthesize.py | 2 +- TTS/tts/utils/text/belarusian/phonemizer.py | 5 ++++- TTS/tts/utils/text/phonemizers/__init__.py | 2 +- TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py | 2 +- recipes/bel-alex73/train_glowtts.py | 2 +- tests/text_tests/test_belarusian_phonemizer.py | 5 +++-- 6 files changed, 11 insertions(+), 7 deletions(-) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 5ded3067..6adb9f03 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -392,7 +392,7 @@ If you don't specify any models, then it uses LJSpeech based English model. if args.encoder_path is not None: encoder_path = args.encoder_path encoder_config_path = args.encoder_config_path - + device = args.device if args.use_cuda: device = "cuda" diff --git a/TTS/tts/utils/text/belarusian/phonemizer.py b/TTS/tts/utils/text/belarusian/phonemizer.py index 3c07a209..1922577e 100644 --- a/TTS/tts/utils/text/belarusian/phonemizer.py +++ b/TTS/tts/utils/text/belarusian/phonemizer.py @@ -8,7 +8,9 @@ def init(): import jpype import jpype.imports except ModuleNotFoundError: - raise ModuleNotFoundError("Belarusian phonemizer requires to install module 'jpype1' manually. Try `pip install jpype1`.") + raise ModuleNotFoundError( + "Belarusian phonemizer requires to install module 'jpype1' manually. Try `pip install jpype1`." + ) try: jar_path = os.environ["BEL_FANETYKA_JAR"] @@ -31,4 +33,5 @@ def belarusian_text_to_phonemes(text: str) -> str: init() from org.alex73.fanetyka.impl import FanetykaText + return str(FanetykaText(finder, text).ipa) diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py index 638184fd..f9a0340c 100644 --- a/TTS/tts/utils/text/phonemizers/__init__.py +++ b/TTS/tts/utils/text/phonemizers/__init__.py @@ -1,6 +1,6 @@ from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer -from TTS.tts.utils.text.phonemizers.belarusian_phonemizer import BEL_Phonemizer from TTS.tts.utils.text.phonemizers.base import BasePhonemizer +from TTS.tts.utils.text.phonemizers.belarusian_phonemizer import BEL_Phonemizer from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut from TTS.tts.utils.text.phonemizers.ko_kr_phonemizer import KO_KR_Phonemizer diff --git a/TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py b/TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py index fb620766..e5fcab6e 100644 --- a/TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py +++ b/TTS/tts/utils/text/phonemizers/belarusian_phonemizer.py @@ -1,7 +1,7 @@ from typing import Dict -from TTS.tts.utils.text.phonemizers.base import BasePhonemizer from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes +from TTS.tts.utils.text.phonemizers.base import BasePhonemizer _DEF_BE_PUNCS = ",!." # TODO diff --git a/recipes/bel-alex73/train_glowtts.py b/recipes/bel-alex73/train_glowtts.py index 24b62d79..74866be7 100644 --- a/recipes/bel-alex73/train_glowtts.py +++ b/recipes/bel-alex73/train_glowtts.py @@ -60,7 +60,7 @@ config = GlowTTSConfig( output_path=output_path, add_blank=True, datasets=[dataset_config], -# characters=characters, + # characters=characters, enable_eos_bos_chars=True, mixed_precision=False, save_step=10000, diff --git a/tests/text_tests/test_belarusian_phonemizer.py b/tests/text_tests/test_belarusian_phonemizer.py index 278ee8be..76ba4667 100644 --- a/tests/text_tests/test_belarusian_phonemizer.py +++ b/tests/text_tests/test_belarusian_phonemizer.py @@ -1,6 +1,6 @@ import os -import warnings import unittest +import warnings from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes @@ -17,7 +17,8 @@ class TestText(unittest.TestCase): except KeyError: warnings.warn( "You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file to test Belarusian phonemizer", - Warning) + Warning, + ) return for line in _TEST_CASES.strip().split("\n"):