diff --git a/TTS/.models.json b/TTS/.models.json index 6ba67b01..d3c56b94 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -157,7 +157,7 @@ "kokoro": { "tacotron2-DDC": { "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.15/tts_models--jp--kokoro--tacotron2-DDC.zip", - "default_vocoder": "vocoder_models/universal/libri-tts/wavegrad", + "default_vocoder": "vocoder_models/ja/kokoro/hifigan_v1", "description": "Tacotron2 with Double Decoder Consistency trained with Kokoro Speech Dataset.", "author": "@kaiidams", "commit": "401fbd89" diff --git a/TTS/tts/utils/text/japanese/phonemizer.py b/TTS/tts/utils/text/japanese/phonemizer.py index a4629a30..969becfd 100644 --- a/TTS/tts/utils/text/japanese/phonemizer.py +++ b/TTS/tts/utils/text/japanese/phonemizer.py @@ -2,8 +2,10 @@ # compatible with Julius https://github.com/julius-speech/segmentation-kit import re +import unicodedata import MeCab +from num2words import num2words _CONVRULES = [ # Conversion of 2 letters @@ -373,8 +375,93 @@ def text2kata(text: str) -> str: return hira2kata("".join(res)) +_ALPHASYMBOL_YOMI = { + "#": "シャープ", + "%": "パーセント", + "&": "アンド", + "+": "プラス", + "-": "マイナス", + ":": "コロン", + ";": "セミコロン", + "<": "小なり", + "=": "イコール", + ">": "大なり", + "@": "アット", + "a": "エー", + "b": "ビー", + "c": "シー", + "d": "ディー", + "e": "イー", + "f": "エフ", + "g": "ジー", + "h": "エイチ", + "i": "アイ", + "j": "ジェー", + "k": "ケー", + "l": "エル", + "m": "エム", + "n": "エヌ", + "o": "オー", + "p": "ピー", + "q": "キュー", + "r": "アール", + "s": "エス", + "t": "ティー", + "u": "ユー", + "v": "ブイ", + "w": "ダブリュー", + "x": "エックス", + "y": "ワイ", + "z": "ゼット", + "α": "アルファ", + "β": "ベータ", + "γ": "ガンマ", + "δ": "デルタ", + "ε": "イプシロン", + "ζ": "ゼータ", + "η": "イータ", + "θ": "シータ", + "ι": "イオタ", + "κ": "カッパ", + "λ": "ラムダ", + "μ": "ミュー", + "ν": "ニュー", + "ξ": "クサイ", + "ο": "オミクロン", + "π": "パイ", + "ρ": "ロー", + "σ": "シグマ", + "τ": "タウ", + "υ": "ウプシロン", + "φ": "ファイ", + "χ": "カイ", + "ψ": "プサイ", + "ω": "オメガ", +} + + +_NUMBER_WITH_SEPARATOR_RX = re.compile("[0-9]{1,3}(,[0-9]{3})+") +_CURRENCY_MAP = {"$": "ドル", "¥": "円", "£": "ポンド", "€": "ユーロ"} +_CURRENCY_RX = re.compile(r"([$¥£€])([0-9.]*[0-9])") +_NUMBER_RX = re.compile(r"[0-9]+(\.[0-9]+)?") + + +def japanese_convert_numbers_to_words(text: str) -> str: + res = _NUMBER_WITH_SEPARATOR_RX.sub(lambda m: m[0].replace(",", ""), text) + res = _CURRENCY_RX.sub(lambda m: m[2] + _CURRENCY_MAP.get(m[1], m[1]), res) + res = _NUMBER_RX.sub(lambda m: num2words(m[0], lang="ja"), res) + return res + + +def japanese_convert_alpha_symbols_to_words(text: str) -> str: + return "".join([_ALPHASYMBOL_YOMI.get(ch, ch) for ch in text.lower()]) + + def japanese_text_to_phonemes(text: str) -> str: """Convert Japanese text to phonemes.""" - res = text2kata(text) + res = unicodedata.normalize("NFKC", text) + res = japanese_convert_numbers_to_words(res) + res = japanese_convert_alpha_symbols_to_words(res) + res = text2kata(res) res = kata2phoneme(res) return res.replace(" ", "") diff --git a/tests/text_tests/test_japanese_phonemizer.py b/tests/text_tests/test_japanese_phonemizer.py index b3b1ece3..423b79b9 100644 --- a/tests/text_tests/test_japanese_phonemizer.py +++ b/tests/text_tests/test_japanese_phonemizer.py @@ -5,11 +5,13 @@ from TTS.tts.utils.text.japanese.phonemizer import japanese_text_to_phonemes _TEST_CASES = """ どちらに行きますか?/dochiraniikimasuka? 今日は温泉に、行きます。/kyo:waoNseNni,ikimasu. -「A」から「Z」までです。/AkaraZmadedesu. +「A」から「Z」までです。/e:karazeqtomadedesu. そうですね!/so:desune! クジラは哺乳類です。/kujirawahonyu:ruidesu. ヴィディオを見ます。/bidioomimasu. -ky o: w a o N s e N n i , i k i m a s u ./kyo:waoNseNni,ikimasu. +今日は8月22日です/kyo:wahachigatsuniju:ninichidesu +xyzとαβγ/eqkusuwaizeqtotoarufabe:tagaNma +値段は$12.34です/nedaNwaju:niteNsaNyoNdorudesu """