Add parameters to be able to set then on colab demo

This commit is contained in:
Edresson Casanova 2023-11-27 10:01:48 -03:00
parent 335b8c37b3
commit eaa5355c91
2 changed files with 276 additions and 230 deletions

View File

@ -8,9 +8,9 @@ from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrai
from TTS.utils.manage import ModelManager
def train_gpt(language, num_epochs, batch_size, train_csv, eval_csv, output_path):
def train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, output_path):
# Logging parameters
RUN_NAME = "GPT_XTTSv2.1_FT"
RUN_NAME = "GPT_XTTS_FT"
PROJECT_NAME = "XTTS_trainer"
DASHBOARD_LOGGER = "tensorboard"
LOGGER_URI = None
@ -18,13 +18,11 @@ def train_gpt(language, num_epochs, batch_size, train_csv, eval_csv, output_path
# Set here the path that the checkpoints will be saved. Default: ./run/training/
OUT_PATH = os.path.join(output_path, "run", "training")
# Training Parameters
OPTIMIZER_WD_ONLY_ON_WEIGHTS = True # for multi-gpu training please make it False
START_WITH_EVAL = True # if True it will star with evaluation
START_WITH_EVAL = False # if True it will star with evaluation
BATCH_SIZE = batch_size # set here the batch size
GRAD_ACUMM_STEPS = 1 # set here the grad accumulation steps
# Note: we recommend that BATCH_SIZE * GRAD_ACUMM_STEPS need to be at least 252 for more efficient training. You can increase/decrease BATCH_SIZE but then set GRAD_ACUMM_STEPS accordingly.
GRAD_ACUMM_STEPS = grad_acumm # set here the grad accumulation steps
# Define here the dataset that you want to use for the fine-tuning on.

View File

@ -1,3 +1,4 @@
import argparse
import os
import sys
import tempfile
@ -21,7 +22,6 @@ def clear_gpu_cache():
if torch.cuda.is_available():
torch.cuda.empty_cache()
PORT = 5003
XTTS_MODEL = None
def load_model(xtts_checkpoint, xtts_config, xtts_vocab):
@ -101,11 +101,54 @@ def read_logs():
return f.read()
with gr.Blocks() as demo:
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="""XTTS fine-tuning demo\n\n"""
"""
Example runs:
python3 TTS/demos/xtts_ft_demo/xtts_demo.py --port
""",
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument(
"--port",
type=int,
help="Port to run the gradio demo. Default: 5003",
default=5003,
)
parser.add_argument(
"--out_path",
type=str,
help="Output path (where data and checkpoints will be saved) Default: /tmp/xtts_ft/",
default="/tmp/xtts_ft/",
)
parser.add_argument(
"--num_epochs",
type=int,
help="Number of epochs to train. Default: 10",
default=10,
)
parser.add_argument(
"--batch_size",
type=int,
help="Batch size. Default: 4",
default=4,
)
parser.add_argument(
"--grad_acumm",
type=int,
help="Grad accumulation steps. Default: 1",
default=1,
)
args = parser.parse_args()
with gr.Blocks() as demo:
with gr.Tab("Data processing"):
out_path = gr.Textbox(
label="Output path (where data and checkpoints will be saved):",
value="/tmp/xtts_ft/"
value=args.out_path,
)
# upload_file = gr.Audio(
# sources="upload",
@ -178,18 +221,25 @@ with gr.Blocks() as demo:
label="Eval CSV:",
)
num_epochs = gr.Slider(
label="num_epochs",
label="Number of epochs:",
minimum=1,
maximum=100,
step=1,
value=10,
value=args.num_epochs,
)
batch_size = gr.Slider(
label="batch_size",
label="Batch size:",
minimum=2,
maximum=512,
step=1,
value=4,
value=args.batch_size,
)
grad_acumm = gr.Slider(
label="Grad accumulation steps:",
minimum=2,
maximum=128,
step=1,
value=args.grad_acumm,
)
progress_train = gr.Label(
label="Progress:"
@ -201,10 +251,10 @@ with gr.Blocks() as demo:
demo.load(read_logs, None, logs_tts_train, every=1)
train_btn = gr.Button(value="Step 2 - Run the training")
def train_model(language, train_csv, eval_csv, num_epochs, batch_size, output_path, progress=gr.Progress(track_tqdm=True)):
def train_model(language, train_csv, eval_csv, num_epochs, batch_size, grad_acumm, output_path):
clear_gpu_cache()
config_path, original_xtts_checkpoint, vocab_file, exp_path, speaker_wav = train_gpt(language, num_epochs, batch_size, train_csv, eval_csv, output_path=output_path)
config_path, original_xtts_checkpoint, vocab_file, exp_path, speaker_wav = train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, output_path=output_path)
# copy original files to avoid parameters changes issues
os.system(f"cp {config_path} {exp_path}")
os.system(f"cp {vocab_file} {exp_path}")
@ -295,6 +345,7 @@ with gr.Blocks() as demo:
eval_csv,
num_epochs,
batch_size,
grad_acumm,
out_path,
],
outputs=[progress_train, xtts_config, xtts_vocab, xtts_checkpoint, speaker_reference_audio],
@ -320,12 +371,9 @@ with gr.Blocks() as demo:
outputs=[tts_output_audio, reference_audio],
)
if __name__ == "__main__":
demo.launch(
share=True,
debug=True,
server_port=PORT,
debug=False,
server_port=args.port,
server_name="0.0.0.0"
)