From d41686502e3003b6472ad769115dfd710059a87d Mon Sep 17 00:00:00 2001
From: Enno Hermann <enno.hermann@idiap.ch>
Date: Mon, 8 Apr 2024 12:06:45 +0200
Subject: [PATCH] feat(xtts): support hindi for sentence-splitting and
 fine-tuning

The XTTS model itself already supports Hindi, it was just in these components.
---
 TTS/demos/xtts_ft_demo/xtts_demo.py |  2 ++
 TTS/tts/layers/xtts/tokenizer.py    |  7 ++++++-
 docs/source/models/xtts.md          | 21 ++++++++++++++++++---
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/TTS/demos/xtts_ft_demo/xtts_demo.py b/TTS/demos/xtts_ft_demo/xtts_demo.py
index 85168c64..7ac38ed6 100644
--- a/TTS/demos/xtts_ft_demo/xtts_demo.py
+++ b/TTS/demos/xtts_ft_demo/xtts_demo.py
@@ -192,6 +192,7 @@ if __name__ == "__main__":
                     "hu",
                     "ko",
                     "ja",
+                    "hi",
                 ],
             )
             progress_data = gr.Label(label="Progress:")
@@ -370,6 +371,7 @@ if __name__ == "__main__":
                             "hu",
                             "ko",
                             "ja",
+                            "hi",
                         ],
                     )
                     tts_text = gr.Textbox(
diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py
index 1a3cc47a..6cbd374f 100644
--- a/TTS/tts/layers/xtts/tokenizer.py
+++ b/TTS/tts/layers/xtts/tokenizer.py
@@ -11,6 +11,7 @@ from num2words import num2words
 from spacy.lang.ar import Arabic
 from spacy.lang.en import English
 from spacy.lang.es import Spanish
+from spacy.lang.hi import Hindi
 from spacy.lang.ja import Japanese
 from spacy.lang.zh import Chinese
 from tokenizers import Tokenizer
@@ -19,6 +20,7 @@ from TTS.tts.layers.xtts.zh_num2words import TextNorm as zh_num2words
 
 
 def get_spacy_lang(lang):
+    """Return Spacy language used for sentence splitting."""
     if lang == "zh":
         return Chinese()
     elif lang == "ja":
@@ -27,8 +29,10 @@ def get_spacy_lang(lang):
         return Arabic()
     elif lang == "es":
         return Spanish()
+    elif lang == "hi":
+        return Hindi()
     else:
-        # For most languages, Enlish does the job
+        # For most languages, English does the job
         return English()
 
 
@@ -611,6 +615,7 @@ class VoiceBpeTokenizer:
             "ja": 71,
             "hu": 224,
             "ko": 95,
+            "hi": 150,
         }
 
     @cached_property
diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md
index de166741..cc7c36b7 100644
--- a/docs/source/models/xtts.md
+++ b/docs/source/models/xtts.md
@@ -14,16 +14,31 @@ There is no need for an excessive amount of training data that spans countless h
 ### Updates with v2
 - Improved voice cloning.
 - Voices can be cloned with a single audio file or multiple audio files, without any effect on the runtime.
-- 2 new languages: Hungarian and Korean.
 - Across the board quality improvements.
 
 ### Code
 Current implementation only supports inference and GPT encoder training.
 
 ### Languages
-As of now, XTTS-v2 supports 16 languages: English (en), Spanish (es), French (fr), German (de), Italian (it), Portuguese (pt), Polish (pl), Turkish (tr), Russian (ru), Dutch (nl), Czech (cs), Arabic (ar), Chinese (zh-cn), Japanese (ja), Hungarian (hu) and Korean (ko).
+XTTS-v2 supports 17 languages:
 
-Stay tuned as we continue to add support for more languages. If you have any language requests, please feel free to reach out.
+- Arabic (ar)
+- Chinese (zh-cn)
+- Czech (cs)
+- Dutch (nl)
+- English (en)
+- French (fr)
+- German (de)
+- Hindi (hi)
+- Hungarian (hu)
+- Italian (it)
+- Japanese (ja)
+- Korean (ko)
+- Polish (pl)
+- Portuguese (pt)
+- Russian (ru)
+- Spanish (es)
+- Turkish (tr)
 
 ### License
 This model is licensed under [Coqui Public Model License](https://coqui.ai/cpml).