mirror of https://github.com/coqui-ai/TTS.git
fix(bin.synthesize): correctly handle boolean arguments
Previously, e.g. `--use_cuda false` would actually set use_cuda=True: https://github.com/coqui-ai/TTS/discussions/3762
This commit is contained in:
parent
a682fa8d56
commit
77722cb0dd
|
@ -35,7 +35,7 @@ Example run:
|
||||||
--data_path /root/LJSpeech-1.1/
|
--data_path /root/LJSpeech-1.1/
|
||||||
--batch_size 32
|
--batch_size 32
|
||||||
--dataset ljspeech
|
--dataset ljspeech
|
||||||
--use_cuda True
|
--use_cuda
|
||||||
""",
|
""",
|
||||||
formatter_class=RawTextHelpFormatter,
|
formatter_class=RawTextHelpFormatter,
|
||||||
)
|
)
|
||||||
|
@ -62,7 +62,7 @@ Example run:
|
||||||
help="Dataset metafile inclusing file paths with transcripts.",
|
help="Dataset metafile inclusing file paths with transcripts.",
|
||||||
)
|
)
|
||||||
parser.add_argument("--data_path", type=str, default="", help="Defines the data path. It overwrites config.json.")
|
parser.add_argument("--data_path", type=str, default="", help="Defines the data path. It overwrites config.json.")
|
||||||
parser.add_argument("--use_cuda", type=bool, default=False, help="enable/disable cuda.")
|
parser.add_argument("--use_cuda", action=argparse.BooleanOptionalAction, default=False, help="enable/disable cuda.")
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--batch_size", default=16, type=int, help="Batch size for the model. Use batch_size=1 if you have no CUDA."
|
"--batch_size", default=16, type=int, help="Batch size for the model. Use batch_size=1 if you have no CUDA."
|
||||||
|
|
|
@ -150,7 +150,7 @@ if __name__ == "__main__":
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
)
|
)
|
||||||
parser.add_argument("--disable_cuda", type=bool, help="Flag to disable cuda.", default=False)
|
parser.add_argument("--disable_cuda", action="store_true", help="Flag to disable cuda.", default=False)
|
||||||
parser.add_argument("--no_eval", help="Do not compute eval?. Default False", default=False, action="store_true")
|
parser.add_argument("--no_eval", help="Do not compute eval?. Default False", default=False, action="store_true")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--formatter_name",
|
"--formatter_name",
|
||||||
|
|
|
@ -75,8 +75,8 @@ if __name__ == "__main__":
|
||||||
type=str,
|
type=str,
|
||||||
help="Path to dataset config file.",
|
help="Path to dataset config file.",
|
||||||
)
|
)
|
||||||
parser.add_argument("--use_cuda", type=bool, help="flag to set cuda.", default=True)
|
parser.add_argument("--use_cuda", action=argparse.BooleanOptionalAction, help="flag to set cuda.", default=True)
|
||||||
parser.add_argument("--eval", type=bool, help="compute eval.", default=True)
|
parser.add_argument("--eval", action=argparse.BooleanOptionalAction, help="compute eval.", default=True)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
|
@ -282,7 +282,7 @@ if __name__ == "__main__":
|
||||||
parser.add_argument("--debug", default=False, action="store_true", help="Save audio files for debug")
|
parser.add_argument("--debug", default=False, action="store_true", help="Save audio files for debug")
|
||||||
parser.add_argument("--save_audio", default=False, action="store_true", help="Save audio files")
|
parser.add_argument("--save_audio", default=False, action="store_true", help="Save audio files")
|
||||||
parser.add_argument("--quantize_bits", type=int, default=0, help="Save quantized audio files if non-zero")
|
parser.add_argument("--quantize_bits", type=int, default=0, help="Save quantized audio files if non-zero")
|
||||||
parser.add_argument("--eval", type=bool, help="compute eval.", default=True)
|
parser.add_argument("--eval", action=argparse.BooleanOptionalAction, help="compute eval.", default=True)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
c = load_config(args.config_path)
|
c = load_config(args.config_path)
|
||||||
|
|
|
@ -80,7 +80,7 @@ if __name__ == "__main__":
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end True"
|
description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end"
|
||||||
)
|
)
|
||||||
parser.add_argument("-i", "--input_dir", type=str, help="Dataset root dir", required=True)
|
parser.add_argument("-i", "--input_dir", type=str, help="Dataset root dir", required=True)
|
||||||
parser.add_argument("-o", "--output_dir", type=str, help="Output Dataset dir", default="")
|
parser.add_argument("-o", "--output_dir", type=str, help="Output Dataset dir", default="")
|
||||||
|
@ -95,20 +95,20 @@ if __name__ == "__main__":
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-t",
|
"-t",
|
||||||
"--trim_just_beginning_and_end",
|
"--trim_just_beginning_and_end",
|
||||||
type=bool,
|
action=argparse.BooleanOptionalAction,
|
||||||
default=True,
|
default=True,
|
||||||
help="If True this script will trim just the beginning and end nonspeech parts. If False all nonspeech parts will be trim. Default True",
|
help="If True this script will trim just the beginning and end nonspeech parts. If False all nonspeech parts will be trimmed.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-c",
|
"-c",
|
||||||
"--use_cuda",
|
"--use_cuda",
|
||||||
type=bool,
|
action=argparse.BooleanOptionalAction,
|
||||||
default=False,
|
default=False,
|
||||||
help="If True use cuda",
|
help="If True use cuda",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--use_onnx",
|
"--use_onnx",
|
||||||
type=bool,
|
action=argparse.BooleanOptionalAction,
|
||||||
default=False,
|
default=False,
|
||||||
help="If True use onnx",
|
help="If True use onnx",
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
"""Command line interface."""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import contextlib
|
import contextlib
|
||||||
|
@ -136,19 +137,8 @@ $ tts --out_path output/path/speech.wav --model_name "<language>/<dataset>/<mode
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def str2bool(v):
|
def parse_args() -> argparse.Namespace:
|
||||||
if isinstance(v, bool):
|
"""Parse arguments."""
|
||||||
return v
|
|
||||||
if v.lower() in ("yes", "true", "t", "y", "1"):
|
|
||||||
return True
|
|
||||||
if v.lower() in ("no", "false", "f", "n", "0"):
|
|
||||||
return False
|
|
||||||
raise argparse.ArgumentTypeError("Boolean value expected.")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description=description.replace(" ```\n", ""),
|
description=description.replace(" ```\n", ""),
|
||||||
formatter_class=RawTextHelpFormatter,
|
formatter_class=RawTextHelpFormatter,
|
||||||
|
@ -156,10 +146,7 @@ def main():
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--list_models",
|
"--list_models",
|
||||||
type=str2bool,
|
action="store_true",
|
||||||
nargs="?",
|
|
||||||
const=True,
|
|
||||||
default=False,
|
|
||||||
help="list available pre-trained TTS and vocoder models.",
|
help="list available pre-trained TTS and vocoder models.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -207,7 +194,7 @@ def main():
|
||||||
default="tts_output.wav",
|
default="tts_output.wav",
|
||||||
help="Output wav file path.",
|
help="Output wav file path.",
|
||||||
)
|
)
|
||||||
parser.add_argument("--use_cuda", type=bool, help="Run model on CUDA.", default=False)
|
parser.add_argument("--use_cuda", action="store_true", help="Run model on CUDA.")
|
||||||
parser.add_argument("--device", type=str, help="Device to run model on.", default="cpu")
|
parser.add_argument("--device", type=str, help="Device to run model on.", default="cpu")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--vocoder_path",
|
"--vocoder_path",
|
||||||
|
@ -226,10 +213,7 @@ def main():
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--pipe_out",
|
"--pipe_out",
|
||||||
help="stdout the generated TTS wav file for shell pipe.",
|
help="stdout the generated TTS wav file for shell pipe.",
|
||||||
type=str2bool,
|
action="store_true",
|
||||||
nargs="?",
|
|
||||||
const=True,
|
|
||||||
default=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# args for multi-speaker synthesis
|
# args for multi-speaker synthesis
|
||||||
|
@ -261,25 +245,18 @@ def main():
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--list_speaker_idxs",
|
"--list_speaker_idxs",
|
||||||
help="List available speaker ids for the defined multi-speaker model.",
|
help="List available speaker ids for the defined multi-speaker model.",
|
||||||
type=str2bool,
|
action="store_true",
|
||||||
nargs="?",
|
|
||||||
const=True,
|
|
||||||
default=False,
|
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--list_language_idxs",
|
"--list_language_idxs",
|
||||||
help="List available language ids for the defined multi-lingual model.",
|
help="List available language ids for the defined multi-lingual model.",
|
||||||
type=str2bool,
|
action="store_true",
|
||||||
nargs="?",
|
|
||||||
const=True,
|
|
||||||
default=False,
|
|
||||||
)
|
)
|
||||||
# aux args
|
# aux args
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--save_spectogram",
|
"--save_spectogram",
|
||||||
type=bool,
|
action="store_true",
|
||||||
help="If true save raw spectogram for further (vocoder) processing in out_path.",
|
help="Save raw spectogram for further (vocoder) processing in out_path.",
|
||||||
default=False,
|
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--reference_wav",
|
"--reference_wav",
|
||||||
|
@ -295,8 +272,8 @@ def main():
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--progress_bar",
|
"--progress_bar",
|
||||||
type=str2bool,
|
action=argparse.BooleanOptionalAction,
|
||||||
help="If true shows a progress bar for the model download. Defaults to True",
|
help="Show a progress bar for the model download.",
|
||||||
default=True,
|
default=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -337,19 +314,23 @@ def main():
|
||||||
]
|
]
|
||||||
if not any(check_args):
|
if not any(check_args):
|
||||||
parser.parse_args(["-h"])
|
parser.parse_args(["-h"])
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
pipe_out = sys.stdout if args.pipe_out else None
|
pipe_out = sys.stdout if args.pipe_out else None
|
||||||
|
|
||||||
with contextlib.redirect_stdout(None if args.pipe_out else sys.stdout):
|
with contextlib.redirect_stdout(None if args.pipe_out else sys.stdout):
|
||||||
# Late-import to make things load faster
|
# Late-import to make things load faster
|
||||||
from TTS.api import TTS
|
|
||||||
from TTS.utils.manage import ModelManager
|
from TTS.utils.manage import ModelManager
|
||||||
from TTS.utils.synthesizer import Synthesizer
|
from TTS.utils.synthesizer import Synthesizer
|
||||||
|
|
||||||
# load model manager
|
# load model manager
|
||||||
path = Path(__file__).parent / "../.models.json"
|
path = Path(__file__).parent / "../.models.json"
|
||||||
manager = ModelManager(path, progress_bar=args.progress_bar)
|
manager = ModelManager(path, progress_bar=args.progress_bar)
|
||||||
api = TTS()
|
|
||||||
|
|
||||||
tts_path = None
|
tts_path = None
|
||||||
tts_config_path = None
|
tts_config_path = None
|
||||||
|
|
|
@ -14,5 +14,5 @@ To run the code, you need to follow the same flow as in TTS.
|
||||||
|
|
||||||
- Define 'config.json' for your needs. Note that, audio parameters should match your TTS model.
|
- Define 'config.json' for your needs. Note that, audio parameters should match your TTS model.
|
||||||
- Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360```
|
- Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360```
|
||||||
- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files.
|
- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files.
|
||||||
- Watch training on Tensorboard as in TTS
|
- Watch training on Tensorboard as in TTS
|
||||||
|
|
|
@ -15,7 +15,7 @@ Run the server with the official models.
|
||||||
```python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan```
|
```python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan```
|
||||||
|
|
||||||
Run the server with the official models on a GPU.
|
Run the server with the official models on a GPU.
|
||||||
```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan --use_cuda True```
|
```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan --use_cuda```
|
||||||
|
|
||||||
Run the server with a custom models.
|
Run the server with a custom models.
|
||||||
```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth --vocoder_config /path/to/vocoder/config.json```
|
```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth --vocoder_config /path/to/vocoder/config.json```
|
||||||
|
|
|
@ -32,7 +32,7 @@ For the GPU version, you need to have the latest NVIDIA drivers installed.
|
||||||
With `nvidia-smi` you can check the CUDA version supported, it must be >= 11.8
|
With `nvidia-smi` you can check the CUDA version supported, it must be >= 11.8
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run --rm --gpus all -v ~/tts-output:/root/tts-output ghcr.io/coqui-ai/tts --text "Hello." --out_path /root/tts-output/hello.wav --use_cuda true
|
docker run --rm --gpus all -v ~/tts-output:/root/tts-output ghcr.io/coqui-ai/tts --text "Hello." --out_path /root/tts-output/hello.wav --use_cuda
|
||||||
```
|
```
|
||||||
|
|
||||||
## Start a server
|
## Start a server
|
||||||
|
@ -50,7 +50,7 @@ python3 TTS/server/server.py --model_name tts_models/en/vctk/vits
|
||||||
```bash
|
```bash
|
||||||
docker run --rm -it -p 5002:5002 --gpus all --entrypoint /bin/bash ghcr.io/coqui-ai/tts
|
docker run --rm -it -p 5002:5002 --gpus all --entrypoint /bin/bash ghcr.io/coqui-ai/tts
|
||||||
python3 TTS/server/server.py --list_models #To get the list of available models
|
python3 TTS/server/server.py --list_models #To get the list of available models
|
||||||
python3 TTS/server/server.py --model_name tts_models/en/vctk/vits --use_cuda true
|
python3 TTS/server/server.py --model_name tts_models/en/vctk/vits --use_cuda
|
||||||
```
|
```
|
||||||
|
|
||||||
Click [there](http://[::1]:5002/) and have fun with the server!
|
Click [there](http://[::1]:5002/) and have fun with the server!
|
||||||
|
|
|
@ -69,14 +69,12 @@ tts --model_name tts_models/multilingual/multi-dataset/bark \
|
||||||
--text "This is an example." \
|
--text "This is an example." \
|
||||||
--out_path "output.wav" \
|
--out_path "output.wav" \
|
||||||
--voice_dir bark_voices/ \
|
--voice_dir bark_voices/ \
|
||||||
--speaker_idx "ljspeech" \
|
--speaker_idx "ljspeech"
|
||||||
--progress_bar True
|
|
||||||
|
|
||||||
# Random voice generation
|
# Random voice generation
|
||||||
tts --model_name tts_models/multilingual/multi-dataset/bark \
|
tts --model_name tts_models/multilingual/multi-dataset/bark \
|
||||||
--text "This is an example." \
|
--text "This is an example." \
|
||||||
--out_path "output.wav" \
|
--out_path "output.wav"
|
||||||
--progress_bar True
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -57,14 +57,12 @@ tts --model_name tts_models/en/multi-dataset/tortoise-v2 \
|
||||||
--text "This is an example." \
|
--text "This is an example." \
|
||||||
--out_path "output.wav" \
|
--out_path "output.wav" \
|
||||||
--voice_dir path/to/tortoise/voices/dir/ \
|
--voice_dir path/to/tortoise/voices/dir/ \
|
||||||
--speaker_idx "lj" \
|
--speaker_idx "lj"
|
||||||
--progress_bar True
|
|
||||||
|
|
||||||
# Random voice generation
|
# Random voice generation
|
||||||
tts --model_name tts_models/en/multi-dataset/tortoise-v2 \
|
tts --model_name tts_models/en/multi-dataset/tortoise-v2 \
|
||||||
--text "This is an example." \
|
--text "This is an example." \
|
||||||
--out_path "output.wav" \
|
--out_path "output.wav"
|
||||||
--progress_bar True
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -72,7 +72,7 @@ You can do inference using one of the available speakers using the following com
|
||||||
--text "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent." \
|
--text "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent." \
|
||||||
--speaker_idx "Ana Florence" \
|
--speaker_idx "Ana Florence" \
|
||||||
--language_idx en \
|
--language_idx en \
|
||||||
--use_cuda true
|
--use_cuda
|
||||||
```
|
```
|
||||||
|
|
||||||
##### Clone a voice
|
##### Clone a voice
|
||||||
|
@ -85,7 +85,7 @@ You can clone a speaker voice using a single or multiple references:
|
||||||
--text "Bugün okula gitmek istemiyorum." \
|
--text "Bugün okula gitmek istemiyorum." \
|
||||||
--speaker_wav /path/to/target/speaker.wav \
|
--speaker_wav /path/to/target/speaker.wav \
|
||||||
--language_idx tr \
|
--language_idx tr \
|
||||||
--use_cuda true
|
--use_cuda
|
||||||
```
|
```
|
||||||
|
|
||||||
###### Multiple references
|
###### Multiple references
|
||||||
|
@ -94,7 +94,7 @@ You can clone a speaker voice using a single or multiple references:
|
||||||
--text "Bugün okula gitmek istemiyorum." \
|
--text "Bugün okula gitmek istemiyorum." \
|
||||||
--speaker_wav /path/to/target/speaker.wav /path/to/target/speaker_2.wav /path/to/target/speaker_3.wav \
|
--speaker_wav /path/to/target/speaker.wav /path/to/target/speaker_2.wav /path/to/target/speaker_3.wav \
|
||||||
--language_idx tr \
|
--language_idx tr \
|
||||||
--use_cuda true
|
--use_cuda
|
||||||
```
|
```
|
||||||
or for all wav files in a directory you can use:
|
or for all wav files in a directory you can use:
|
||||||
|
|
||||||
|
@ -103,7 +103,7 @@ or for all wav files in a directory you can use:
|
||||||
--text "Bugün okula gitmek istemiyorum." \
|
--text "Bugün okula gitmek istemiyorum." \
|
||||||
--speaker_wav /path/to/target/*.wav \
|
--speaker_wav /path/to/target/*.wav \
|
||||||
--language_idx tr \
|
--language_idx tr \
|
||||||
--use_cuda true
|
--use_cuda
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 🐸TTS API
|
#### 🐸TTS API
|
||||||
|
|
|
@ -65,7 +65,7 @@ if not config.model_args.use_aligner:
|
||||||
model_path, config_path, _ = manager.download_model("tts_models/en/ljspeech/tacotron2-DCA")
|
model_path, config_path, _ = manager.download_model("tts_models/en/ljspeech/tacotron2-DCA")
|
||||||
# TODO: make compute_attention python callable
|
# TODO: make compute_attention python callable
|
||||||
os.system(
|
os.system(
|
||||||
f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda true"
|
f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda"
|
||||||
)
|
)
|
||||||
|
|
||||||
# INITIALIZE THE AUDIO PROCESSOR
|
# INITIALIZE THE AUDIO PROCESSOR
|
||||||
|
|
|
@ -64,7 +64,7 @@ if not config.model_args.use_aligner:
|
||||||
model_path, config_path, _ = manager.download_model("tts_models/en/ljspeech/tacotron2-DCA")
|
model_path, config_path, _ = manager.download_model("tts_models/en/ljspeech/tacotron2-DCA")
|
||||||
# TODO: make compute_attention python callable
|
# TODO: make compute_attention python callable
|
||||||
os.system(
|
os.system(
|
||||||
f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda true"
|
f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda"
|
||||||
)
|
)
|
||||||
|
|
||||||
# INITIALIZE THE AUDIO PROCESSOR
|
# INITIALIZE THE AUDIO PROCESSOR
|
||||||
|
|
|
@ -67,7 +67,7 @@ if not config.model_args.use_aligner:
|
||||||
model_path, config_path, _ = manager.download_model("tts_models/en/ljspeech/tacotron2-DCA")
|
model_path, config_path, _ = manager.download_model("tts_models/en/ljspeech/tacotron2-DCA")
|
||||||
# TODO: make compute_attention python callable
|
# TODO: make compute_attention python callable
|
||||||
os.system(
|
os.system(
|
||||||
f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda true"
|
f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda"
|
||||||
)
|
)
|
||||||
|
|
||||||
# INITIALIZE THE AUDIO PROCESSOR
|
# INITIALIZE THE AUDIO PROCESSOR
|
||||||
|
|
|
@ -50,13 +50,13 @@ def run_models(offset=0, step=1):
|
||||||
speaker_id = list(speaker_manager.name_to_id.keys())[0]
|
speaker_id = list(speaker_manager.name_to_id.keys())[0]
|
||||||
run_cli(
|
run_cli(
|
||||||
f"tts --model_name {model_name} "
|
f"tts --model_name {model_name} "
|
||||||
f'--text "This is an example." --out_path "{output_path}" --speaker_idx "{speaker_id}" --language_idx "{language_id}" --progress_bar False'
|
f'--text "This is an example." --out_path "{output_path}" --speaker_idx "{speaker_id}" --language_idx "{language_id}" --no-progress_bar'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# single-speaker model
|
# single-speaker model
|
||||||
run_cli(
|
run_cli(
|
||||||
f"tts --model_name {model_name} "
|
f"tts --model_name {model_name} "
|
||||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False'
|
f'--text "This is an example." --out_path "{output_path}" --no-progress_bar'
|
||||||
)
|
)
|
||||||
# remove downloaded models
|
# remove downloaded models
|
||||||
shutil.rmtree(local_download_dir)
|
shutil.rmtree(local_download_dir)
|
||||||
|
@ -66,7 +66,7 @@ def run_models(offset=0, step=1):
|
||||||
reference_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0032.wav")
|
reference_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0032.wav")
|
||||||
run_cli(
|
run_cli(
|
||||||
f"tts --model_name {model_name} "
|
f"tts --model_name {model_name} "
|
||||||
f'--out_path "{output_path}" --source_wav "{speaker_wav}" --target_wav "{reference_wav}" --progress_bar False'
|
f'--out_path "{output_path}" --source_wav "{speaker_wav}" --target_wav "{reference_wav}" --no-progress_bar'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# only download the model
|
# only download the model
|
||||||
|
@ -83,14 +83,14 @@ def test_xtts():
|
||||||
run_cli(
|
run_cli(
|
||||||
"yes | "
|
"yes | "
|
||||||
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v1.1 "
|
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v1.1 "
|
||||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True '
|
f'--text "This is an example." --out_path "{output_path}" --no-progress_bar --use_cuda '
|
||||||
f'--speaker_wav "{speaker_wav}" --language_idx "en"'
|
f'--speaker_wav "{speaker_wav}" --language_idx "en"'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
run_cli(
|
run_cli(
|
||||||
"yes | "
|
"yes | "
|
||||||
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v1.1 "
|
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v1.1 "
|
||||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False '
|
f'--text "This is an example." --out_path "{output_path}" --no-progress_bar '
|
||||||
f'--speaker_wav "{speaker_wav}" --language_idx "en"'
|
f'--speaker_wav "{speaker_wav}" --language_idx "en"'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -138,14 +138,14 @@ def test_xtts_v2():
|
||||||
run_cli(
|
run_cli(
|
||||||
"yes | "
|
"yes | "
|
||||||
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 "
|
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 "
|
||||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True '
|
f'--text "This is an example." --out_path "{output_path}" --no-progress_bar --use_cuda '
|
||||||
f'--speaker_wav "{speaker_wav}" "{speaker_wav_2}" --language_idx "en"'
|
f'--speaker_wav "{speaker_wav}" "{speaker_wav_2}" --language_idx "en"'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
run_cli(
|
run_cli(
|
||||||
"yes | "
|
"yes | "
|
||||||
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 "
|
f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 "
|
||||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False '
|
f'--text "This is an example." --out_path "{output_path}" --no-progress_bar '
|
||||||
f'--speaker_wav "{speaker_wav}" "{speaker_wav_2}" --language_idx "en"'
|
f'--speaker_wav "{speaker_wav}" "{speaker_wav_2}" --language_idx "en"'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -215,12 +215,12 @@ def test_tortoise():
|
||||||
if use_gpu:
|
if use_gpu:
|
||||||
run_cli(
|
run_cli(
|
||||||
f" tts --model_name tts_models/en/multi-dataset/tortoise-v2 "
|
f" tts --model_name tts_models/en/multi-dataset/tortoise-v2 "
|
||||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True'
|
f'--text "This is an example." --out_path "{output_path}" --no-progress_bar --use_cuda'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
run_cli(
|
run_cli(
|
||||||
f" tts --model_name tts_models/en/multi-dataset/tortoise-v2 "
|
f" tts --model_name tts_models/en/multi-dataset/tortoise-v2 "
|
||||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False'
|
f'--text "This is an example." --out_path "{output_path}" --no-progress_bar'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -231,12 +231,12 @@ def test_bark():
|
||||||
if use_gpu:
|
if use_gpu:
|
||||||
run_cli(
|
run_cli(
|
||||||
f" tts --model_name tts_models/multilingual/multi-dataset/bark "
|
f" tts --model_name tts_models/multilingual/multi-dataset/bark "
|
||||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True'
|
f'--text "This is an example." --out_path "{output_path}" --no-progress_bar --use_cuda'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
run_cli(
|
run_cli(
|
||||||
f" tts --model_name tts_models/multilingual/multi-dataset/bark "
|
f" tts --model_name tts_models/multilingual/multi-dataset/bark "
|
||||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False'
|
f'--text "This is an example." --out_path "{output_path}" --no-progress_bar'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -249,7 +249,7 @@ def test_voice_conversion():
|
||||||
output_path = os.path.join(get_tests_output_path(), "output.wav")
|
output_path = os.path.join(get_tests_output_path(), "output.wav")
|
||||||
run_cli(
|
run_cli(
|
||||||
f"tts --model_name {model_name}"
|
f"tts --model_name {model_name}"
|
||||||
f" --out_path {output_path} --speaker_wav {speaker_wav} --reference_wav {reference_wav} --language_idx {language_id} --progress_bar False"
|
f" --out_path {output_path} --speaker_wav {speaker_wav} --reference_wav {reference_wav} --language_idx {language_id} --no-progress_bar"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue