diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py index b6e9834e..6e418890 100644 --- a/TTS/tts/datasets/__init__.py +++ b/TTS/tts/datasets/__init__.py @@ -110,6 +110,7 @@ def load_tts_samples( formatter = _get_formatter_by_name(formatter_name) # load train set meta_data_train = formatter(root_path, meta_file_train, ignored_speakers=ignored_speakers) + assert len(meta_data_train) > 0, f" [!] No training samples found in {root_path}/{meta_file_train}" meta_data_train = [{**item, **{"language": language, "dataset_name": dataset_name}} for item in meta_data_train] print(f" | > Found {len(meta_data_train)} files in {Path(root_path).resolve()}") # load evaluation split if set diff --git a/TTS/tts/utils/text/korean/phonemizer.py b/TTS/tts/utils/text/korean/phonemizer.py index 7c48ef58..2c69217c 100644 --- a/TTS/tts/utils/text/korean/phonemizer.py +++ b/TTS/tts/utils/text/korean/phonemizer.py @@ -1,9 +1,8 @@ -from g2pkk import G2p from jamo import hangul_to_jamo from TTS.tts.utils.text.korean.korean import normalize -g2p = G2p() +g2p = None def korean_text_to_phonemes(text, character: str = "hangeul") -> str: @@ -17,6 +16,11 @@ def korean_text_to_phonemes(text, character: str = "hangeul") -> str: output = '하늘' (Unicode :\u1112\u1161\u1102\u1173\u11af), (ᄒ + ᅡ + ᄂ + ᅳ + ᆯ) """ + global g2p # pylint: disable=global-statement + if g2p is None: + from g2pkk import G2p + + g2p = G2p() if character == "english": from anyascii import anyascii