mirror of https://github.com/coqui-ai/TTS.git
Merge branch 'p3_11' into dev
This commit is contained in:
commit
6b9ebf5aab
|
@ -18,7 +18,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
python-version: [3.9, "3.10", "3.11"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
python-version: [3.9, "3.10", "3.11"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
python-version: [3.9, "3.10", "3.11"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
|
|
@ -21,7 +21,7 @@ jobs:
|
|||
fi
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8
|
||||
python-version: 3.9
|
||||
- run: |
|
||||
python -m pip install -U pip setuptools wheel build
|
||||
- run: |
|
||||
|
@ -36,7 +36,7 @@ jobs:
|
|||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.7", "3.8", "3.9", "3.10"]
|
||||
python-version: ["3.9", "3.10", "3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/setup-python@v2
|
||||
|
@ -64,14 +64,6 @@ jobs:
|
|||
with:
|
||||
name: "sdist"
|
||||
path: "dist/"
|
||||
- uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: "wheel-3.7"
|
||||
path: "dist/"
|
||||
- uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: "wheel-3.8"
|
||||
path: "dist/"
|
||||
- uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: "wheel-3.9"
|
||||
|
@ -80,6 +72,10 @@ jobs:
|
|||
with:
|
||||
name: "wheel-3.10"
|
||||
path: "dist/"
|
||||
- uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: "wheel-3.11"
|
||||
path: "dist/"
|
||||
- run: |
|
||||
ls -lh dist/
|
||||
- name: Setup PyPI config
|
||||
|
@ -91,7 +87,7 @@ jobs:
|
|||
EOF
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.8
|
||||
python-version: 3.9
|
||||
- run: |
|
||||
python -m pip install twine
|
||||
- run: |
|
||||
|
|
|
@ -42,6 +42,6 @@ jobs:
|
|||
run: |
|
||||
python3 -m pip install .[all]
|
||||
python3 setup.py egg_info
|
||||
- name: Lint check
|
||||
run: |
|
||||
make lint
|
||||
# - name: Lint check
|
||||
# run: |
|
||||
# make lint
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
python-version: [3.9, "3.10", "3.11"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
python-version: [3.9, "3.10", "3.11"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
python-version: [3.9, "3.10", "3.11"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
python-version: [3.9, "3.10", "3.11"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
python-version: [3.9, "3.10", "3.11"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
@ -43,6 +43,7 @@ jobs:
|
|||
run: python3 -m pip install --upgrade pip setuptools wheel
|
||||
- name: Replace scarf urls
|
||||
run: |
|
||||
sed -i 's/https:\/\/coqui.gateway.scarf.sh\/hf\/bark\//https:\/\/huggingface.co\/erogol\/bark\/resolve\/main\//g' TTS/.models.json
|
||||
sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
|
||||
- name: Install TTS
|
||||
run: |
|
||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.7, 3.8, 3.9, "3.10"]
|
||||
python-version: [3.9, "3.10", "3.11"]
|
||||
experimental: [false]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
|
|
@ -23,7 +23,7 @@ colormap = (
|
|||
[0, 0, 0],
|
||||
[183, 183, 183],
|
||||
],
|
||||
dtype=np.float,
|
||||
dtype=float,
|
||||
)
|
||||
/ 255
|
||||
)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import os
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict
|
||||
|
||||
from TTS.tts.configs.shared_configs import BaseTTSConfig
|
||||
|
@ -46,11 +46,11 @@ class BarkConfig(BaseTTSConfig):
|
|||
"""
|
||||
|
||||
model: str = "bark"
|
||||
audio: BarkAudioConfig = BarkAudioConfig()
|
||||
audio: BarkAudioConfig = field(default_factory=BarkAudioConfig)
|
||||
num_chars: int = 0
|
||||
semantic_config: GPTConfig = GPTConfig()
|
||||
fine_config: FineGPTConfig = FineGPTConfig()
|
||||
coarse_config: GPTConfig = GPTConfig()
|
||||
semantic_config: GPTConfig = field(default_factory=GPTConfig)
|
||||
fine_config: FineGPTConfig = field(default_factory=FineGPTConfig)
|
||||
coarse_config: GPTConfig = field(default_factory=GPTConfig)
|
||||
CONTEXT_WINDOW_SIZE: int = 1024
|
||||
SEMANTIC_RATE_HZ: float = 49.9
|
||||
SEMANTIC_VOCAB_SIZE: int = 10_000
|
||||
|
|
|
@ -113,7 +113,7 @@ class FastPitchConfig(BaseTTSConfig):
|
|||
base_model: str = "forward_tts"
|
||||
|
||||
# model specific params
|
||||
model_args: ForwardTTSArgs = ForwardTTSArgs()
|
||||
model_args: ForwardTTSArgs = field(default_factory=ForwardTTSArgs)
|
||||
|
||||
# multi-speaker settings
|
||||
num_speakers: int = 0
|
||||
|
|
|
@ -107,7 +107,7 @@ class FastSpeechConfig(BaseTTSConfig):
|
|||
base_model: str = "forward_tts"
|
||||
|
||||
# model specific params
|
||||
model_args: ForwardTTSArgs = ForwardTTSArgs(use_pitch=False)
|
||||
model_args: ForwardTTSArgs = field(default_factory=lambda: ForwardTTSArgs(use_pitch=False))
|
||||
|
||||
# multi-speaker settings
|
||||
num_speakers: int = 0
|
||||
|
|
|
@ -123,7 +123,7 @@ class Fastspeech2Config(BaseTTSConfig):
|
|||
base_model: str = "forward_tts"
|
||||
|
||||
# model specific params
|
||||
model_args: ForwardTTSArgs = ForwardTTSArgs(use_pitch=True, use_energy=True)
|
||||
model_args: ForwardTTSArgs = field(default_factory=lambda: ForwardTTSArgs(use_pitch=True, use_energy=True))
|
||||
|
||||
# multi-speaker settings
|
||||
num_speakers: int = 0
|
||||
|
|
|
@ -103,26 +103,28 @@ class SpeedySpeechConfig(BaseTTSConfig):
|
|||
base_model: str = "forward_tts"
|
||||
|
||||
# set model args as SpeedySpeech
|
||||
model_args: ForwardTTSArgs = ForwardTTSArgs(
|
||||
use_pitch=False,
|
||||
encoder_type="residual_conv_bn",
|
||||
encoder_params={
|
||||
"kernel_size": 4,
|
||||
"dilations": 4 * [1, 2, 4] + [1],
|
||||
"num_conv_blocks": 2,
|
||||
"num_res_blocks": 13,
|
||||
},
|
||||
decoder_type="residual_conv_bn",
|
||||
decoder_params={
|
||||
"kernel_size": 4,
|
||||
"dilations": 4 * [1, 2, 4, 8] + [1],
|
||||
"num_conv_blocks": 2,
|
||||
"num_res_blocks": 17,
|
||||
},
|
||||
out_channels=80,
|
||||
hidden_channels=128,
|
||||
positional_encoding=True,
|
||||
detach_duration_predictor=True,
|
||||
model_args: ForwardTTSArgs = field(
|
||||
default_factory=lambda: ForwardTTSArgs(
|
||||
use_pitch=False,
|
||||
encoder_type="residual_conv_bn",
|
||||
encoder_params={
|
||||
"kernel_size": 4,
|
||||
"dilations": 4 * [1, 2, 4] + [1],
|
||||
"num_conv_blocks": 2,
|
||||
"num_res_blocks": 13,
|
||||
},
|
||||
decoder_type="residual_conv_bn",
|
||||
decoder_params={
|
||||
"kernel_size": 4,
|
||||
"dilations": 4 * [1, 2, 4, 8] + [1],
|
||||
"num_conv_blocks": 2,
|
||||
"num_res_blocks": 17,
|
||||
},
|
||||
out_channels=80,
|
||||
hidden_channels=128,
|
||||
positional_encoding=True,
|
||||
detach_duration_predictor=True,
|
||||
)
|
||||
)
|
||||
|
||||
# multi-speaker settings
|
||||
|
|
|
@ -70,7 +70,7 @@ class TortoiseConfig(BaseTTSConfig):
|
|||
model: str = "tortoise"
|
||||
# model specific params
|
||||
model_args: TortoiseArgs = field(default_factory=TortoiseArgs)
|
||||
audio: TortoiseAudioConfig = TortoiseAudioConfig()
|
||||
audio: TortoiseAudioConfig = field(default_factory=TortoiseAudioConfig)
|
||||
model_dir: str = None
|
||||
|
||||
# settings
|
||||
|
|
|
@ -10,15 +10,11 @@ License: MIT
|
|||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import fairseq
|
||||
import torch
|
||||
from einops import pack, unpack
|
||||
from torch import nn
|
||||
from torchaudio.functional import resample
|
||||
|
||||
logging.root.setLevel(logging.ERROR)
|
||||
|
||||
|
||||
from transformers import HubertModel
|
||||
def round_down_nearest_multiple(num, divisor):
|
||||
return num // divisor * divisor
|
||||
|
||||
|
@ -49,22 +45,11 @@ class CustomHubert(nn.Module):
|
|||
self.target_sample_hz = target_sample_hz
|
||||
self.seq_len_multiple_of = seq_len_multiple_of
|
||||
self.output_layer = output_layer
|
||||
|
||||
if device is not None:
|
||||
self.to(device)
|
||||
|
||||
model_path = Path(checkpoint_path)
|
||||
|
||||
assert model_path.exists(), f"path {checkpoint_path} does not exist"
|
||||
|
||||
checkpoint = torch.load(checkpoint_path)
|
||||
load_model_input = {checkpoint_path: checkpoint}
|
||||
model, *_ = fairseq.checkpoint_utils.load_model_ensemble_and_task(load_model_input)
|
||||
|
||||
self.model = HubertModel.from_pretrained("facebook/hubert-base-ls960")
|
||||
if device is not None:
|
||||
model[0].to(device)
|
||||
|
||||
self.model = model[0]
|
||||
self.model.to(device)
|
||||
self.model.eval()
|
||||
|
||||
@property
|
||||
|
@ -81,19 +66,13 @@ class CustomHubert(nn.Module):
|
|||
if exists(self.seq_len_multiple_of):
|
||||
wav_input = curtail_to_multiple(wav_input, self.seq_len_multiple_of)
|
||||
|
||||
embed = self.model(
|
||||
outputs = self.model.forward(
|
||||
wav_input,
|
||||
features_only=True,
|
||||
mask=False, # thanks to @maitycyrus for noticing that mask is defaulted to True in the fairseq code
|
||||
output_layer=self.output_layer,
|
||||
output_hidden_states=True,
|
||||
)
|
||||
|
||||
embed, packed_shape = pack([embed["x"]], "* d")
|
||||
|
||||
# codebook_indices = self.kmeans.predict(embed.cpu().detach().numpy())
|
||||
|
||||
codebook_indices = torch.from_numpy(embed.cpu().detach().numpy()).to(device) # .long()
|
||||
|
||||
embed = outputs["hidden_states"][self.output_layer]
|
||||
embed, packed_shape = pack([embed], "* d")
|
||||
codebook_indices = torch.from_numpy(embed.cpu().detach().numpy()).to(device)
|
||||
if flatten:
|
||||
return codebook_indices
|
||||
|
||||
|
|
|
@ -130,7 +130,7 @@ def generate_voice(
|
|||
# generate semantic tokens
|
||||
# Load the HuBERT model
|
||||
hubert_manager = HubertManager()
|
||||
hubert_manager.make_sure_hubert_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert"])
|
||||
# hubert_manager.make_sure_hubert_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert"])
|
||||
hubert_manager.make_sure_tokenizer_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert_tokenizer"])
|
||||
|
||||
hubert_model = CustomHubert(checkpoint_path=model.config.LOCAL_MODEL_PATHS["hubert"]).to(model.device)
|
||||
|
|
|
@ -165,7 +165,7 @@ class BCELossMasked(nn.Module):
|
|||
|
||||
def __init__(self, pos_weight: float = None):
|
||||
super().__init__()
|
||||
self.pos_weight = nn.Parameter(torch.tensor([pos_weight]), requires_grad=False)
|
||||
self.register_buffer("pos_weight", torch.tensor([pos_weight]))
|
||||
|
||||
def forward(self, x, target, length):
|
||||
"""
|
||||
|
@ -191,10 +191,15 @@ class BCELossMasked(nn.Module):
|
|||
mask = sequence_mask(sequence_length=length, max_len=target.size(1))
|
||||
num_items = mask.sum()
|
||||
loss = functional.binary_cross_entropy_with_logits(
|
||||
x.masked_select(mask), target.masked_select(mask), pos_weight=self.pos_weight, reduction="sum"
|
||||
x.masked_select(mask),
|
||||
target.masked_select(mask),
|
||||
pos_weight=self.pos_weight.to(x.device),
|
||||
reduction="sum",
|
||||
)
|
||||
else:
|
||||
loss = functional.binary_cross_entropy_with_logits(x, target, pos_weight=self.pos_weight, reduction="sum")
|
||||
loss = functional.binary_cross_entropy_with_logits(
|
||||
x, target, pos_weight=self.pos_weight.to(x.device), reduction="sum"
|
||||
)
|
||||
num_items = torch.numel(x)
|
||||
loss = loss / num_items
|
||||
return loss
|
||||
|
|
|
@ -207,7 +207,7 @@ def maximum_path_numpy(value, mask, max_neg_val=None):
|
|||
device = value.device
|
||||
dtype = value.dtype
|
||||
value = value.cpu().detach().numpy()
|
||||
mask = mask.cpu().detach().numpy().astype(np.bool)
|
||||
mask = mask.cpu().detach().numpy().astype(bool)
|
||||
|
||||
b, t_x, t_y = value.shape
|
||||
direction = np.zeros(value.shape, dtype=np.int64)
|
||||
|
|
|
@ -540,7 +540,10 @@ class AudioProcessor(object):
|
|||
|
||||
def _griffin_lim(self, S):
|
||||
angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
|
||||
S_complex = np.abs(S).astype(np.complex)
|
||||
try:
|
||||
S_complex = np.abs(S).astype(np.complex)
|
||||
except AttributeError: # np.complex is deprecated since numpy 1.20.0
|
||||
S_complex = np.abs(S).astype(complex)
|
||||
y = self._istft(S_complex * angles)
|
||||
if not np.isfinite(y).all():
|
||||
print(" [!] Waveform is not finite everywhere. Skipping the GL.")
|
||||
|
|
|
@ -264,12 +264,15 @@ class ModelManager(object):
|
|||
model_download_uri = os.path.join(URI_PREFIX, f"{lang}.tar.gz")
|
||||
self._download_tar_file(model_download_uri, output_path, self.progress_bar)
|
||||
|
||||
def set_model_url(self, model_item: Dict):
|
||||
@staticmethod
|
||||
def set_model_url(model_item: Dict):
|
||||
model_item["model_url"] = None
|
||||
if "github_rls_url" in model_item:
|
||||
model_item["model_url"] = model_item["github_rls_url"]
|
||||
elif "hf_url" in model_item:
|
||||
model_item["model_url"] = model_item["hf_url"]
|
||||
elif "fairseq" in model_item["model_name"]:
|
||||
model_item["model_url"] = "https://coqui.gateway.scarf.sh/fairseq/"
|
||||
return model_item
|
||||
|
||||
def _set_model_item(self, model_name):
|
||||
|
@ -285,10 +288,12 @@ class ModelManager(object):
|
|||
"author": "fairseq",
|
||||
"description": "this model is released by Meta under Fairseq repo. Visit https://github.com/facebookresearch/fairseq/tree/main/examples/mms for more info.",
|
||||
}
|
||||
model_item["model_name"] = model_name
|
||||
else:
|
||||
# get model from models.json
|
||||
model_item = self.models_dict[model_type][lang][dataset][model]
|
||||
model_item["model_type"] = model_type
|
||||
model_item = self.set_model_url(model_item)
|
||||
return model_item, model_full_name, model
|
||||
|
||||
def download_model(self, model_name):
|
||||
|
@ -324,7 +329,9 @@ class ModelManager(object):
|
|||
# find downloaded files
|
||||
output_model_path = output_path
|
||||
output_config_path = None
|
||||
if model not in ["tortoise-v2", "bark"] and "fairseq" not in model_name: # TODO:This is stupid but don't care for now.
|
||||
if (
|
||||
model not in ["tortoise-v2", "bark"] and "fairseq" not in model_name
|
||||
): # TODO:This is stupid but don't care for now.
|
||||
output_model_path, output_config_path = self._find_files(output_path)
|
||||
# update paths in the config.json
|
||||
self._update_paths(output_path, output_config_path)
|
||||
|
|
|
@ -794,8 +794,8 @@ class FreeVCConfig(BaseVCConfig):
|
|||
|
||||
model: str = "freevc"
|
||||
# model specific params
|
||||
model_args: FreeVCArgs = FreeVCArgs()
|
||||
audio: FreeVCAudioConfig = FreeVCAudioConfig()
|
||||
model_args: FreeVCArgs = field(default_factory=FreeVCArgs)
|
||||
audio: FreeVCAudioConfig = field(default_factory=FreeVCAudioConfig)
|
||||
|
||||
# optimizer
|
||||
# TODO with training support
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[build-system]
|
||||
requires = ["setuptools", "wheel", "cython==0.29.28", "numpy==1.21.6", "packaging"]
|
||||
requires = ["setuptools", "wheel", "cython==0.29.30", "numpy==1.22.0", "packaging"]
|
||||
|
||||
[flake8]
|
||||
max-line-length=120
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
# core deps
|
||||
numpy==1.21.6;python_version<"3.10"
|
||||
numpy;python_version=="3.10"
|
||||
cython==0.29.28
|
||||
numpy==1.22.0;python_version<="3.10"
|
||||
numpy==1.24.3;python_version>"3.10"
|
||||
cython==0.29.30
|
||||
scipy>=1.4.0
|
||||
torch>=1.7
|
||||
torchaudio
|
||||
soundfile
|
||||
librosa==0.10.0.*
|
||||
numba==0.55.1;python_version<"3.9"
|
||||
numba==0.56.4;python_version>="3.9"
|
||||
numba==0.57.0;python_version>="3.9"
|
||||
inflect==5.6.0
|
||||
tqdm
|
||||
anyascii
|
||||
|
@ -26,14 +26,14 @@ pandas
|
|||
# deps for training
|
||||
matplotlib
|
||||
# coqui stack
|
||||
trainer==0.0.20
|
||||
trainer
|
||||
# config management
|
||||
coqpit>=0.0.16
|
||||
# chinese g2p deps
|
||||
jieba
|
||||
pypinyin
|
||||
# japanese g2p deps
|
||||
mecab-python3==1.0.5
|
||||
mecab-python3==1.0.6
|
||||
unidic-lite==1.0.8
|
||||
# gruut+supported langs
|
||||
gruut[de,es,fr]==2.2.3
|
||||
|
@ -51,5 +51,3 @@ einops
|
|||
transformers
|
||||
#deps for bark
|
||||
encodec
|
||||
#deps for fairseq models
|
||||
fairseq
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
[build_py]
|
||||
build-lib=temp_build
|
||||
build_lib=temp_build
|
||||
|
||||
[bdist_wheel]
|
||||
bdist-dir=temp_build
|
||||
bdist_dir=temp_build
|
||||
|
||||
[install_lib]
|
||||
build-dir=temp_build
|
||||
build_dir=temp_build
|
||||
|
|
9
setup.py
9
setup.py
|
@ -32,8 +32,8 @@ from Cython.Build import cythonize
|
|||
from setuptools import Extension, find_packages, setup
|
||||
|
||||
python_version = sys.version.split()[0]
|
||||
if Version(python_version) < Version("3.7") or Version(python_version) >= Version("3.11"):
|
||||
raise RuntimeError("TTS requires python >= 3.7 and < 3.11 " "but your Python version is {}".format(sys.version))
|
||||
if Version(python_version) < Version("3.9") or Version(python_version) >= Version("3.12"):
|
||||
raise RuntimeError("TTS requires python >= 3.9 and < 3.12 " "but your Python version is {}".format(sys.version))
|
||||
|
||||
|
||||
cwd = os.path.dirname(os.path.abspath(__file__))
|
||||
|
@ -114,15 +114,14 @@ setup(
|
|||
"dev": requirements_dev,
|
||||
"notebooks": requirements_notebooks,
|
||||
},
|
||||
python_requires=">=3.7.0, <3.11",
|
||||
python_requires=">=3.9.0, <3.12",
|
||||
entry_points={"console_scripts": ["tts=TTS.bin.synthesize:main", "tts-server = TTS.server.server:main"]},
|
||||
classifiers=[
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Development Status :: 3 - Alpha",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Intended Audience :: Developers",
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import unittest
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from coqpit import Coqpit
|
||||
|
||||
|
@ -86,11 +86,11 @@ class TestTTSTokenizer(unittest.TestCase):
|
|||
enable_eos_bos_chars: bool = True
|
||||
use_phonemes: bool = True
|
||||
add_blank: bool = False
|
||||
characters: str = Characters()
|
||||
characters: str = field(default_factory=Characters)
|
||||
phonemizer: str = "espeak"
|
||||
phoneme_language: str = "tr"
|
||||
text_cleaner: str = "phoneme_cleaners"
|
||||
characters = Characters()
|
||||
characters = field(default_factory=Characters)
|
||||
|
||||
tokenizer_ph, _ = TTSTokenizer.init_from_config(TokenizerConfig())
|
||||
tokenizer_ph.phonemizer.backend = "espeak"
|
||||
|
|
|
@ -16,7 +16,7 @@ from TTS.utils.audio import AudioProcessor
|
|||
|
||||
torch.manual_seed(1)
|
||||
use_cuda = torch.cuda.is_available()
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
device = torch.device("cuda" if use_cuda else "cpu")
|
||||
|
||||
config_global = TacotronConfig(num_chars=32, num_speakers=5, out_channels=513, decoder_output_dim=80)
|
||||
|
||||
|
@ -288,7 +288,6 @@ class TacotronCapacitronTrainTest(unittest.TestCase):
|
|||
batch["text_input"].shape[0], batch["stop_targets"].size(1) // config.r, -1
|
||||
)
|
||||
batch["stop_targets"] = (batch["stop_targets"].sum(2) > 0.0).unsqueeze(2).float().squeeze()
|
||||
|
||||
model = Tacotron(config).to(device)
|
||||
criterion = model.get_criterion()
|
||||
optimizer = model.get_optimizer()
|
||||
|
|
|
@ -15,7 +15,7 @@ def run_models(offset=0, step=1):
|
|||
print(" > Run synthesizer with all the models.")
|
||||
output_path = os.path.join(get_tests_output_path(), "output.wav")
|
||||
manager = ModelManager(output_prefix=get_tests_output_path(), progress_bar=False)
|
||||
model_names = manager.list_models()
|
||||
model_names = [name for name in manager.list_models() if "bark" not in name]
|
||||
for model_name in model_names[offset::step]:
|
||||
print(f"\n > Run - {model_name}")
|
||||
model_path, _, _ = manager.download_model(model_name)
|
||||
|
@ -79,6 +79,15 @@ def test_models_offset_2_step_3():
|
|||
run_models(offset=2, step=3)
|
||||
|
||||
|
||||
def test_bark():
|
||||
"""Bark is too big to run on github actions. We need to test it locally"""
|
||||
output_path = os.path.join(get_tests_output_path(), "output.wav")
|
||||
run_cli(
|
||||
f" tts --model_name tts_models/multilingual/multi-dataset/bark "
|
||||
f'--text "This is an example." --out_path "{output_path}" --progress_bar False'
|
||||
)
|
||||
|
||||
|
||||
def test_voice_conversion():
|
||||
print(" > Run voice conversion inference using YourTTS model.")
|
||||
model_name = "tts_models/multilingual/multi-dataset/your_tts"
|
||||
|
|
Loading…
Reference in New Issue