Merge pull request #4 from idiap/hindi

feat(xtts): support Hindi for sentence-splitting and fine-tuning
This commit is contained in:
Enno Hermann 2024-04-11 16:49:44 +02:00 committed by GitHub
commit 2ad790d169
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 26 additions and 4 deletions

View File

@ -192,6 +192,7 @@ if __name__ == "__main__":
"hu",
"ko",
"ja",
"hi",
],
)
progress_data = gr.Label(label="Progress:")
@ -370,6 +371,7 @@ if __name__ == "__main__":
"hu",
"ko",
"ja",
"hi",
],
)
tts_text = gr.Textbox(

View File

@ -12,6 +12,7 @@ from num2words import num2words
from spacy.lang.ar import Arabic
from spacy.lang.en import English
from spacy.lang.es import Spanish
from spacy.lang.hi import Hindi
from spacy.lang.ja import Japanese
from spacy.lang.zh import Chinese
from tokenizers import Tokenizer
@ -22,6 +23,7 @@ logger = logging.getLogger(__name__)
def get_spacy_lang(lang):
"""Return Spacy language used for sentence splitting."""
if lang == "zh":
return Chinese()
elif lang == "ja":
@ -30,8 +32,10 @@ def get_spacy_lang(lang):
return Arabic()
elif lang == "es":
return Spanish()
elif lang == "hi":
return Hindi()
else:
# For most languages, Enlish does the job
# For most languages, English does the job
return English()
@ -614,6 +618,7 @@ class VoiceBpeTokenizer:
"ja": 71,
"hu": 224,
"ko": 95,
"hi": 150,
}
@cached_property

View File

@ -14,16 +14,31 @@ There is no need for an excessive amount of training data that spans countless h
### Updates with v2
- Improved voice cloning.
- Voices can be cloned with a single audio file or multiple audio files, without any effect on the runtime.
- 2 new languages: Hungarian and Korean.
- Across the board quality improvements.
### Code
Current implementation only supports inference and GPT encoder training.
### Languages
As of now, XTTS-v2 supports 16 languages: English (en), Spanish (es), French (fr), German (de), Italian (it), Portuguese (pt), Polish (pl), Turkish (tr), Russian (ru), Dutch (nl), Czech (cs), Arabic (ar), Chinese (zh-cn), Japanese (ja), Hungarian (hu) and Korean (ko).
XTTS-v2 supports 17 languages:
Stay tuned as we continue to add support for more languages. If you have any language requests, please feel free to reach out.
- Arabic (ar)
- Chinese (zh-cn)
- Czech (cs)
- Dutch (nl)
- English (en)
- French (fr)
- German (de)
- Hindi (hi)
- Hungarian (hu)
- Italian (it)
- Japanese (ja)
- Korean (ko)
- Polish (pl)
- Portuguese (pt)
- Russian (ru)
- Spanish (es)
- Turkish (tr)
### License
This model is licensed under [Coqui Public Model License](https://coqui.ai/cpml).