mirror of https://github.com/coqui-ai/TTS.git
Merge pull request #217 from idiap/stdout
fix(bin): log to stdout in cli tools
This commit is contained in:
commit
370fb1da81
|
@ -2,6 +2,7 @@ import argparse
|
||||||
import importlib
|
import importlib
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -18,7 +19,7 @@ from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
|
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
# pylint: disable=bad-option-value
|
# pylint: disable=bad-option-value
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
@ -102,7 +103,7 @@ def compute_embeddings(
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""
|
description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""
|
||||||
|
|
|
@ -5,6 +5,7 @@ import argparse
|
||||||
import glob
|
import glob
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
@ -18,7 +19,7 @@ from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Run preprocessing process."""
|
"""Run preprocessing process."""
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stderr, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
|
parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
|
||||||
parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
|
parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
|
import sys
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
@ -53,7 +54,7 @@ def compute_encoder_accuracy(dataset_items, encoder_manager):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="""Compute the accuracy of the encoder.\n\n"""
|
description="""Compute the accuracy of the encoder.\n\n"""
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
@ -273,7 +274,7 @@ def main(args): # pylint: disable=redefined-outer-name
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True)
|
parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True)
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
|
import sys
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
from TTS.config import load_config
|
from TTS.config import load_config
|
||||||
|
@ -10,7 +11,7 @@ from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
# pylint: disable=bad-option-value
|
# pylint: disable=bad-option-value
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
|
import sys
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
from tqdm.contrib.concurrent import process_map
|
from tqdm.contrib.concurrent import process_map
|
||||||
|
@ -20,7 +21,7 @@ def compute_phonemes(item):
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
# pylint: disable=W0601
|
# pylint: disable=W0601
|
||||||
global c, phonemizer
|
global c, phonemizer
|
||||||
|
|
|
@ -4,6 +4,7 @@ import logging
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import sys
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
@ -77,7 +78,7 @@ def preprocess_audios():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end"
|
description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end"
|
||||||
|
|
|
@ -311,8 +311,9 @@ def parse_args() -> argparse.Namespace:
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
"""Entry point for `tts` command line interface."""
|
"""Entry point for `tts` command line interface."""
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
stream = sys.stderr if args.pipe_out else sys.stdout
|
||||||
|
setup_logger("TTS", level=logging.INFO, stream=stream, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
pipe_out = sys.stdout if args.pipe_out else None
|
pipe_out = sys.stdout if args.pipe_out else None
|
||||||
|
|
||||||
|
|
|
@ -322,7 +322,7 @@ def main(args): # pylint: disable=redefined-outer-name
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()
|
args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
from trainer import Trainer, TrainerArgs
|
from trainer import Trainer, TrainerArgs
|
||||||
|
@ -17,7 +18,7 @@ class TrainTTSArgs(TrainerArgs):
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Run `tts` model training directly by a `config.json` file."""
|
"""Run `tts` model training directly by a `config.json` file."""
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
# init trainer args
|
# init trainer args
|
||||||
train_args = TrainTTSArgs()
|
train_args = TrainTTSArgs()
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
from trainer import Trainer, TrainerArgs
|
from trainer import Trainer, TrainerArgs
|
||||||
|
@ -18,7 +19,7 @@ class TrainVocoderArgs(TrainerArgs):
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Run `tts` model training directly by a `config.json` file."""
|
"""Run `tts` model training directly by a `config.json` file."""
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
# init trainer args
|
# init trainer args
|
||||||
train_args = TrainVocoderArgs()
|
train_args = TrainVocoderArgs()
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
|
import sys
|
||||||
from itertools import product as cartesian_product
|
from itertools import product as cartesian_product
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -17,7 +18,7 @@ from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset
|
||||||
from TTS.vocoder.models import setup_model
|
from TTS.vocoder.models import setup_model
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--model_path", type=str, help="Path to model checkpoint.")
|
parser.add_argument("--model_path", type=str, help="Path to model checkpoint.")
|
||||||
|
|
|
@ -216,7 +216,7 @@ def processor(directory, subset, force_process):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
if len(sys.argv) != 4:
|
if len(sys.argv) != 4:
|
||||||
print("Usage: python prepare_data.py save_directory user password")
|
print("Usage: python prepare_data.py save_directory user password")
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
|
@ -25,7 +25,7 @@ from TTS.utils.manage import ModelManager
|
||||||
from TTS.utils.synthesizer import Synthesizer
|
from TTS.utils.synthesizer import Synthesizer
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||||
|
|
||||||
|
|
||||||
def create_argparser() -> argparse.ArgumentParser:
|
def create_argparser() -> argparse.ArgumentParser:
|
||||||
|
|
|
@ -2,9 +2,10 @@
|
||||||
import datetime
|
import datetime
|
||||||
import importlib
|
import importlib
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Callable, Dict, Optional, TypeVar, Union
|
from typing import Any, Callable, Dict, Optional, TextIO, TypeVar, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from packaging.version import Version
|
from packaging.version import Version
|
||||||
|
@ -107,25 +108,34 @@ def setup_logger(
|
||||||
level: int = logging.INFO,
|
level: int = logging.INFO,
|
||||||
*,
|
*,
|
||||||
formatter: Optional[logging.Formatter] = None,
|
formatter: Optional[logging.Formatter] = None,
|
||||||
screen: bool = False,
|
stream: Optional[TextIO] = None,
|
||||||
tofile: bool = False,
|
log_dir: Optional[Union[str, os.PathLike[Any]]] = None,
|
||||||
log_dir: str = "logs",
|
|
||||||
log_name: str = "log",
|
log_name: str = "log",
|
||||||
) -> None:
|
) -> None:
|
||||||
|
"""Set up a logger.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
logger_name: Name of the logger to set up
|
||||||
|
level: Logging level
|
||||||
|
formatter: Formatter for the logger
|
||||||
|
stream: Add a StreamHandler for the given stream, e.g. sys.stderr or sys.stdout
|
||||||
|
log_dir: Folder to write the log file (no file created if None)
|
||||||
|
log_name: Prefix of the log file name
|
||||||
|
"""
|
||||||
lg = logging.getLogger(logger_name)
|
lg = logging.getLogger(logger_name)
|
||||||
if formatter is None:
|
if formatter is None:
|
||||||
formatter = logging.Formatter(
|
formatter = logging.Formatter(
|
||||||
"%(asctime)s.%(msecs)03d - %(levelname)-8s - %(name)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S"
|
"%(asctime)s.%(msecs)03d - %(levelname)-8s - %(name)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S"
|
||||||
)
|
)
|
||||||
lg.setLevel(level)
|
lg.setLevel(level)
|
||||||
if tofile:
|
if log_dir is not None:
|
||||||
Path(log_dir).mkdir(exist_ok=True, parents=True)
|
Path(log_dir).mkdir(exist_ok=True, parents=True)
|
||||||
log_file = Path(log_dir) / f"{log_name}_{get_timestamp()}.log"
|
log_file = Path(log_dir) / f"{log_name}_{get_timestamp()}.log"
|
||||||
fh = logging.FileHandler(log_file, mode="w")
|
fh = logging.FileHandler(log_file, mode="w")
|
||||||
fh.setFormatter(formatter)
|
fh.setFormatter(formatter)
|
||||||
lg.addHandler(fh)
|
lg.addHandler(fh)
|
||||||
if screen:
|
if stream is not None:
|
||||||
sh = logging.StreamHandler()
|
sh = logging.StreamHandler(stream)
|
||||||
sh.setFormatter(formatter)
|
sh.setFormatter(formatter)
|
||||||
lg.addHandler(sh)
|
lg.addHandler(sh)
|
||||||
|
|
||||||
|
|
|
@ -163,12 +163,13 @@ from TTS.api import TTS
|
||||||
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")
|
||||||
|
|
||||||
# generate speech by cloning a voice using default settings
|
# generate speech by cloning a voice using default settings
|
||||||
tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
|
tts.tts_to_file(
|
||||||
|
text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
|
||||||
file_path="output.wav",
|
file_path="output.wav",
|
||||||
speaker="Ana Florence",
|
speaker="Ana Florence",
|
||||||
language="en",
|
language="en",
|
||||||
split_sentences=True
|
split_sentences=True
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -230,6 +231,11 @@ out = model.inference(
|
||||||
torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
|
torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You can also use the Coqui speakers:
|
||||||
|
|
||||||
|
```python
|
||||||
|
gpt_cond_latent, speaker_embedding = model.speaker_manager.speakers["Ana Florence"].values()
|
||||||
|
```
|
||||||
|
|
||||||
#### Streaming manually
|
#### Streaming manually
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue