From 5dd217a759d6303b599a75985f3664cff1a36d8a Mon Sep 17 00:00:00 2001
From: Edresson Casanova <edresson1@gmail.com>
Date: Fri, 1 Dec 2023 09:47:09 -0300
Subject: [PATCH] Update XTTS finetuner docs

---
 TTS/demos/xtts_ft_demo/xtts_demo.py |  4 ++--
 docs/source/models/xtts.md          | 18 +++++++++++++++++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/TTS/demos/xtts_ft_demo/xtts_demo.py b/TTS/demos/xtts_ft_demo/xtts_demo.py
index 8e9a88eb..ebb11f29 100644
--- a/TTS/demos/xtts_ft_demo/xtts_demo.py
+++ b/TTS/demos/xtts_ft_demo/xtts_demo.py
@@ -202,7 +202,7 @@ if __name__ == "__main__":
             )
             demo.load(read_logs, None, logs, every=1)
 
-            prompt_compute_btn = gr.Button(value="Step 1 - Create dataset.")
+            prompt_compute_btn = gr.Button(value="Step 1 - Create dataset")
         
             def preprocess_dataset(audio_path, language, out_path, progress=gr.Progress(track_tqdm=True)):
                 clear_gpu_cache()
@@ -315,7 +315,7 @@ if __name__ == "__main__":
                     progress_load = gr.Label(
                         label="Progress:"
                     )
-                    load_btn = gr.Button(value="Step 3 - Load Fine tuned XTTS model")
+                    load_btn = gr.Button(value="Step 3 - Load Fine-tuned XTTS model")
 
                 with gr.Column() as col2:
                     speaker_reference_audio = gr.Textbox(
diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md
index f42e8d8f..92a981d7 100644
--- a/docs/source/models/xtts.md
+++ b/docs/source/models/xtts.md
@@ -182,7 +182,7 @@ To make `XTTS_v2` GPT encoder training easier for beginner users we did a gradio
 - Train the XTTS GPT encoder with the processed data
 - Inference support using the fine-tuned model
 
-The user can run this gradio demos locally or remotely using a Colab Notebook.
+The user can run this gradio demo locally or remotely using a Colab Notebook.
 
 ##### Run demo on Colab
 To make the `XTTS_v2` fine-tuning more accessible for users that do not have good GPUs available we did a Google Colab Notebook.
@@ -191,6 +191,15 @@ The Colab Notebook is available [here](https://colab.research.google.com/drive/1
 
 To learn how to use this Colab Notebook please check the [XTTS fine-tuning video]().
 
+If you are not able to acess the video you need to follow the steps:
+
+1. Open the Colab notebook and start the demo by runining the first two cells (ignore pip install errors in the first one).
+2. Click on the link "Running on public URL:" on the second cell output.
+3. On the first Tab (1 - Data processing) you need to select the audio file or files, wait for upload, and then click on the button "Step 1 - Create dataset" and then wait until the dataset processing is done.
+4. Soon as the dataset processing is done you need to go to the second Tab (2 - Fine-tuning XTTS Encoder) and press the button "Step 2 - Run the training" and then wait until the training is finished. Note that it can take up to 40 minutes.
+5. Soon the training is done you can go to the third Tab (3 - Inference) and then click on the button "Step 3 - Load Fine-tuned XTTS model" and wait until the fine-tuned model is loaded. Then you can do the inference on the model by clicking on the button "Step 4 - Inference".
+
+
 ##### Run demo locally
 
 To run the demo locally you need to do the following steps:
@@ -199,6 +208,13 @@ To run the demo locally you need to do the following steps:
 3. Run the gradio demo using the command `python3 TTS/demos/xtts_ft_demo/xtts_demo.py`
 4. Follow the steps presented on the [XTTS fine-tuning video]() to be able to fine-tune and use the fine-tuned model.
 
+
+If you are not able to acess the video you need to follow the steps:
+
+1. On the first Tab (1 - Data processing) you need to select the audio file or files, wait for upload, and then click on the button "Step 1 - Create dataset" and then wait until the dataset processing is done.
+2. Soon as the dataset processing is done you need to go to the second Tab (2 - Fine-tuning XTTS Encoder) and press the button "Step 2 - Run the training" and then wait until the training is finished. Note that it can take up to 40 minutes.
+3. Soon the training is done you can go to the third Tab (3 - Inference) and then click on the button "Step 3 - Load Fine-tuned XTTS model" and wait until the fine-tuned model is loaded. Then you can do the inference on the model by clicking on the button "Step 4 - Inference".
+
 #### Advanced training
 
 A recipe for `XTTS_v2` GPT encoder training using `LJSpeech` dataset is available at https://github.com/coqui-ai/TTS/tree/dev/recipes/ljspeech/xtts_v1/train_gpt_xtts.py