diff --git a/TTS/demos/xtts_ft_demo/utils/formatter.py b/TTS/demos/xtts_ft_demo/utils/formatter.py index 937ee4dd..6d7b770f 100644 --- a/TTS/demos/xtts_ft_demo/utils/formatter.py +++ b/TTS/demos/xtts_ft_demo/utils/formatter.py @@ -44,7 +44,7 @@ def format_audio_list(audio_files, target_language="en", out_path=None, buffer=0 os.makedirs(out_path, exist_ok=True) # Loading Whisper - device = "cuda" if torch.cuda.is_available() else "cpu" + device = "cuda" if torch.cuda.is_available() else "cpu" print("Loading Whisper Model!") asr_model = WhisperModel("large-v2", device=device, compute_type="float16") diff --git a/TTS/demos/xtts_ft_demo/xtts_demo.py b/TTS/demos/xtts_ft_demo/xtts_demo.py index a7fbc0e8..b8ffb231 100644 --- a/TTS/demos/xtts_ft_demo/xtts_demo.py +++ b/TTS/demos/xtts_ft_demo/xtts_demo.py @@ -64,7 +64,7 @@ def run_tts(lang, tts_text, speaker_audio_file): -# define a logger to redirect +# define a logger to redirect class Logger: def __init__(self, filename="log.out"): self.log_file = filename @@ -109,7 +109,7 @@ if __name__ == "__main__": description="""XTTS fine-tuning demo\n\n""" """ Example runs: - python3 TTS/demos/xtts_ft_demo/xtts_demo.py --port + python3 TTS/demos/xtts_ft_demo/xtts_demo.py --port """, formatter_class=argparse.RawTextHelpFormatter, ) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 83812f37..3065d011 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -274,7 +274,7 @@ class Xtts(BaseTTS): for i in range(0, audio.shape[1], 22050 * chunk_length): audio_chunk = audio[:, i : i + 22050 * chunk_length] - # if the chunk is too short ignore it + # if the chunk is too short ignore it if audio_chunk.size(-1) < 22050 * 0.33: continue