diff --git a/README.md b/README.md index 934e9443..720585db 100644 --- a/README.md +++ b/README.md @@ -294,99 +294,123 @@ api.tts_with_vc_to_file( ``` ### Command-line `tts` + + + +Synthesize speech on command line. + +You can either use your trained model or choose a model from the provided list. + +If you don't specify any models, then it uses LJSpeech based English model. + #### Single Speaker Models - List provided models: - ``` - $ tts --list_models - ``` + ``` + $ tts --list_models + ``` + - Get model info (for both tts_models and vocoder_models): - - Query by type/name: - The model_info_by_name uses the name as it from the --list_models. - ``` - $ tts --model_info_by_name "///" - ``` - For example: - ``` - $ tts --model_info_by_name tts_models/tr/common-voice/glow-tts - ``` - ``` - $ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2 - ``` - - Query by type/idx: - The model_query_idx uses the corresponding idx from --list_models. - ``` - $ tts --model_info_by_idx "/" - ``` - For example: + - Query by type/name: + The model_info_by_name uses the name as it from the --list_models. + ``` + $ tts --model_info_by_name "///" + ``` + For example: + ``` + $ tts --model_info_by_name tts_models/tr/common-voice/glow-tts + $ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2 + ``` + - Query by type/idx: + The model_query_idx uses the corresponding idx from --list_models. - ``` - $ tts --model_info_by_idx tts_models/3 - ``` + ``` + $ tts --model_info_by_idx "/" + ``` + + For example: + + ``` + $ tts --model_info_by_idx tts_models/3 + ``` + + - Query info for model info by full name: + ``` + $ tts --model_info_by_name "///" + ``` - Run TTS with default models: - ``` - $ tts --text "Text for TTS" --out_path output/path/speech.wav - ``` + ``` + $ tts --text "Text for TTS" --out_path output/path/speech.wav + ``` - Run a TTS model with its default vocoder model: - ``` - $ tts --text "Text for TTS" --model_name "///" --out_path output/path/speech.wav - ``` + ``` + $ tts --text "Text for TTS" --model_name "///" --out_path output/path/speech.wav + ``` + For example: - ``` - $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --out_path output/path/speech.wav - ``` + ``` + $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --out_path output/path/speech.wav + ``` - Run with specific TTS and vocoder models from the list: - ``` - $ tts --text "Text for TTS" --model_name "///" --vocoder_name "///" --out_path output/path/speech.wav - ``` + ``` + $ tts --text "Text for TTS" --model_name "///" --vocoder_name "///" --out_path output/path/speech.wav + ``` For example: - ``` - $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav - ``` - + ``` + $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav + ``` - Run your own TTS model (Using Griffin-Lim Vocoder): - ``` - $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav - ``` + ``` + $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav + ``` - Run your own TTS and Vocoder models: - ``` - $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav - --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json - ``` + + ``` + $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav + --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json + ``` #### Multi-speaker Models - List the available speakers and choose a among them: - ``` - $ tts --model_name "//" --list_speaker_idxs - ``` + ``` + $ tts --model_name "//" --list_speaker_idxs + ``` - Run the multi-speaker TTS model with the target speaker ID: - ``` - $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "//" --speaker_idx - ``` + ``` + $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "//" --speaker_idx + ``` - Run your own multi-speaker TTS model: - ``` - $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx - ``` + ``` + $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx + ``` + +### Voice Conversion Models + +``` +$ tts --out_path output/path/speech.wav --model_name "//" --source_wav --target_wav +``` + + ## Directory Structure ``` diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index e8de18b0..99fc2a58 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -12,6 +12,121 @@ from TTS.api import TTS from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer +description = """ +Synthesize speech on command line. + +You can either use your trained model or choose a model from the provided list. + +If you don't specify any models, then it uses LJSpeech based English model. + +#### Single Speaker Models + +- List provided models: + + ``` + $ tts --list_models + ``` + +- Get model info (for both tts_models and vocoder_models): + + - Query by type/name: + The model_info_by_name uses the name as it from the --list_models. + ``` + $ tts --model_info_by_name "///" + ``` + For example: + ``` + $ tts --model_info_by_name tts_models/tr/common-voice/glow-tts + $ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2 + ``` + - Query by type/idx: + The model_query_idx uses the corresponding idx from --list_models. + + ``` + $ tts --model_info_by_idx "/" + ``` + + For example: + + ``` + $ tts --model_info_by_idx tts_models/3 + ``` + + - Query info for model info by full name: + ``` + $ tts --model_info_by_name "///" + ``` + +- Run TTS with default models: + + ``` + $ tts --text "Text for TTS" --out_path output/path/speech.wav + ``` + +- Run a TTS model with its default vocoder model: + + ``` + $ tts --text "Text for TTS" --model_name "///" --out_path output/path/speech.wav + ``` + + For example: + + ``` + $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --out_path output/path/speech.wav + ``` + +- Run with specific TTS and vocoder models from the list: + + ``` + $ tts --text "Text for TTS" --model_name "///" --vocoder_name "///" --out_path output/path/speech.wav + ``` + + For example: + + ``` + $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav + ``` + +- Run your own TTS model (Using Griffin-Lim Vocoder): + + ``` + $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav + ``` + +- Run your own TTS and Vocoder models: + + ``` + $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav + --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json + ``` + +#### Multi-speaker Models + +- List the available speakers and choose a among them: + + ``` + $ tts --model_name "//" --list_speaker_idxs + ``` + +- Run the multi-speaker TTS model with the target speaker ID: + + ``` + $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "//" --speaker_idx + ``` + +- Run your own multi-speaker TTS model: + + ``` + $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx + ``` + +### Voice Conversion Models + +``` +$ tts --out_path output/path/speech.wav --model_name "//" --source_wav --target_wav +``` +""" + def str2bool(v): if isinstance(v, bool): @@ -24,92 +139,6 @@ def str2bool(v): def main(): - description = """Synthesize speech on command line. - -You can either use your trained model or choose a model from the provided list. - -If you don't specify any models, then it uses LJSpeech based English model. - -## Example Runs - -### Single Speaker Models - -- List provided models: - - ``` - $ tts --list_models - ``` - -- Query info for model info by idx: - - ``` - $ tts --model_info_by_idx "/" - ``` - -- Query info for model info by full name: - - ``` - $ tts --model_info_by_name "///" - ``` - -- Run TTS with default models: - - ``` - $ tts --text "Text for TTS" - ``` - -- Run a TTS model with its default vocoder model: - - ``` - $ tts --text "Text for TTS" --model_name "/// - ``` - -- Run with specific TTS and vocoder models from the list: - - ``` - $ tts --text "Text for TTS" --model_name "///" --vocoder_name "///" --output_path - ``` - -- Run your own TTS model (Using Griffin-Lim Vocoder): - - ``` - $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav - ``` - -- Run your own TTS and Vocoder models: - ``` - $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav - --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json - ``` - -### Multi-speaker Models - -- List the available speakers and choose as among them: - - ``` - $ tts --model_name "//" --list_speaker_idxs - ``` - -- Run the multi-speaker TTS model with the target speaker ID: - - ``` - $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "//" --speaker_idx - ``` - -- Run your own multi-speaker TTS model: - - ``` - $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx - ``` - -### Voice Conversion Models - - ``` - $ tts --out_path output/path/speech.wav --model_name "//" --source_wav --target_wav - ``` - """ - # We remove Markdown code formatting programmatically here to allow us to copy-and-paste from main README to keep - # documentation in sync more easily. parser = argparse.ArgumentParser( description=description.replace(" ```\n", ""), formatter_class=RawTextHelpFormatter, diff --git a/scripts/sync_readme.py b/scripts/sync_readme.py new file mode 100644 index 00000000..58428681 --- /dev/null +++ b/scripts/sync_readme.py @@ -0,0 +1,32 @@ +import argparse +from pathlib import Path + + +def replace_between_markers(content, marker: str, replacement: str) -> str: + start_marker = f"\n\n" + end_marker = f"\n\n\n" + start_index = content.index(start_marker) + len(start_marker) + end_index = content.index(end_marker) + content = content[:start_index] + replacement + content[end_index:] + return content + + +def sync_readme(): + ap = argparse.ArgumentParser() + ap.add_argument("--check", action="store_true", default=False) + args = ap.parse_args() + readme_path = Path(__file__).parent.parent / "README.md" + orig_content = readme_path.read_text() + from TTS.bin.synthesize import description + + new_content = replace_between_markers(orig_content, "tts-readme", description.strip()) + if args.check: + if orig_content != new_content: + print("README.md is out of sync; please edit TTS/bin/TTS_README.md and run scripts/sync_readme.py") + exit(42) + readme_path.write_text(new_content) + print("Updated README.md") + + +if __name__ == "__main__": + sync_readme() diff --git a/tests/aux_tests/test_readme.py b/tests/aux_tests/test_readme.py new file mode 100644 index 00000000..32b26fc6 --- /dev/null +++ b/tests/aux_tests/test_readme.py @@ -0,0 +1,9 @@ +import subprocess +import sys +from pathlib import Path + + +def test_readme_up_to_date(): + root = Path(__file__).parent.parent.parent + sync_readme = root / "scripts" / "sync_readme.py" + subprocess.check_call([sys.executable, str(sync_readme), "--check"], cwd=root)