Merge branch 'p3_11' into dev

This commit is contained in:
Eren G??lge 2023-06-28 12:13:04 +02:00
commit 6b9ebf5aab
32 changed files with 114 additions and 116 deletions

View File

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: [3.7, 3.8, 3.9, "3.10"] python-version: [3.9, "3.10", "3.11"]
experimental: [false] experimental: [false]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: [3.7, 3.8, 3.9, "3.10"] python-version: [3.9, "3.10", "3.11"]
experimental: [false] experimental: [false]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: [3.7, 3.8, 3.9, "3.10"] python-version: [3.9, "3.10", "3.11"]
experimental: [false] experimental: [false]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@ -21,7 +21,7 @@ jobs:
fi fi
- uses: actions/setup-python@v2 - uses: actions/setup-python@v2
with: with:
python-version: 3.8 python-version: 3.9
- run: | - run: |
python -m pip install -U pip setuptools wheel build python -m pip install -U pip setuptools wheel build
- run: | - run: |
@ -36,7 +36,7 @@ jobs:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
strategy: strategy:
matrix: matrix:
python-version: ["3.7", "3.8", "3.9", "3.10"] python-version: ["3.9", "3.10", "3.11"]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
- uses: actions/setup-python@v2 - uses: actions/setup-python@v2
@ -64,14 +64,6 @@ jobs:
with: with:
name: "sdist" name: "sdist"
path: "dist/" path: "dist/"
- uses: actions/download-artifact@v2
with:
name: "wheel-3.7"
path: "dist/"
- uses: actions/download-artifact@v2
with:
name: "wheel-3.8"
path: "dist/"
- uses: actions/download-artifact@v2 - uses: actions/download-artifact@v2
with: with:
name: "wheel-3.9" name: "wheel-3.9"
@ -80,6 +72,10 @@ jobs:
with: with:
name: "wheel-3.10" name: "wheel-3.10"
path: "dist/" path: "dist/"
- uses: actions/download-artifact@v2
with:
name: "wheel-3.11"
path: "dist/"
- run: | - run: |
ls -lh dist/ ls -lh dist/
- name: Setup PyPI config - name: Setup PyPI config
@ -91,7 +87,7 @@ jobs:
EOF EOF
- uses: actions/setup-python@v2 - uses: actions/setup-python@v2
with: with:
python-version: 3.8 python-version: 3.9
- run: | - run: |
python -m pip install twine python -m pip install twine
- run: | - run: |

View File

@ -42,6 +42,6 @@ jobs:
run: | run: |
python3 -m pip install .[all] python3 -m pip install .[all]
python3 setup.py egg_info python3 setup.py egg_info
- name: Lint check # - name: Lint check
run: | # run: |
make lint # make lint

View File

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: [3.7, 3.8, 3.9, "3.10"] python-version: [3.9, "3.10", "3.11"]
experimental: [false] experimental: [false]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: [3.7, 3.8, 3.9, "3.10"] python-version: [3.9, "3.10", "3.11"]
experimental: [false] experimental: [false]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: [3.7, 3.8, 3.9, "3.10"] python-version: [3.9, "3.10", "3.11"]
experimental: [false] experimental: [false]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: [3.7, 3.8, 3.9, "3.10"] python-version: [3.9, "3.10", "3.11"]
experimental: [false] experimental: [false]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: [3.7, 3.8, 3.9, "3.10"] python-version: [3.9, "3.10", "3.11"]
experimental: [false] experimental: [false]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@ -43,6 +43,7 @@ jobs:
run: python3 -m pip install --upgrade pip setuptools wheel run: python3 -m pip install --upgrade pip setuptools wheel
- name: Replace scarf urls - name: Replace scarf urls
run: | run: |
sed -i 's/https:\/\/coqui.gateway.scarf.sh\/hf\/bark\//https:\/\/huggingface.co\/erogol\/bark\/resolve\/main\//g' TTS/.models.json
sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
- name: Install TTS - name: Install TTS
run: | run: |

View File

@ -18,7 +18,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
python-version: [3.7, 3.8, 3.9, "3.10"] python-version: [3.9, "3.10", "3.11"]
experimental: [false] experimental: [false]
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3

View File

@ -23,7 +23,7 @@ colormap = (
[0, 0, 0], [0, 0, 0],
[183, 183, 183], [183, 183, 183],
], ],
dtype=np.float, dtype=float,
) )
/ 255 / 255
) )

View File

@ -1,5 +1,5 @@
import os import os
from dataclasses import dataclass from dataclasses import dataclass, field
from typing import Dict from typing import Dict
from TTS.tts.configs.shared_configs import BaseTTSConfig from TTS.tts.configs.shared_configs import BaseTTSConfig
@ -46,11 +46,11 @@ class BarkConfig(BaseTTSConfig):
""" """
model: str = "bark" model: str = "bark"
audio: BarkAudioConfig = BarkAudioConfig() audio: BarkAudioConfig = field(default_factory=BarkAudioConfig)
num_chars: int = 0 num_chars: int = 0
semantic_config: GPTConfig = GPTConfig() semantic_config: GPTConfig = field(default_factory=GPTConfig)
fine_config: FineGPTConfig = FineGPTConfig() fine_config: FineGPTConfig = field(default_factory=FineGPTConfig)
coarse_config: GPTConfig = GPTConfig() coarse_config: GPTConfig = field(default_factory=GPTConfig)
CONTEXT_WINDOW_SIZE: int = 1024 CONTEXT_WINDOW_SIZE: int = 1024
SEMANTIC_RATE_HZ: float = 49.9 SEMANTIC_RATE_HZ: float = 49.9
SEMANTIC_VOCAB_SIZE: int = 10_000 SEMANTIC_VOCAB_SIZE: int = 10_000

View File

@ -113,7 +113,7 @@ class FastPitchConfig(BaseTTSConfig):
base_model: str = "forward_tts" base_model: str = "forward_tts"
# model specific params # model specific params
model_args: ForwardTTSArgs = ForwardTTSArgs() model_args: ForwardTTSArgs = field(default_factory=ForwardTTSArgs)
# multi-speaker settings # multi-speaker settings
num_speakers: int = 0 num_speakers: int = 0

View File

@ -107,7 +107,7 @@ class FastSpeechConfig(BaseTTSConfig):
base_model: str = "forward_tts" base_model: str = "forward_tts"
# model specific params # model specific params
model_args: ForwardTTSArgs = ForwardTTSArgs(use_pitch=False) model_args: ForwardTTSArgs = field(default_factory=lambda: ForwardTTSArgs(use_pitch=False))
# multi-speaker settings # multi-speaker settings
num_speakers: int = 0 num_speakers: int = 0

View File

@ -123,7 +123,7 @@ class Fastspeech2Config(BaseTTSConfig):
base_model: str = "forward_tts" base_model: str = "forward_tts"
# model specific params # model specific params
model_args: ForwardTTSArgs = ForwardTTSArgs(use_pitch=True, use_energy=True) model_args: ForwardTTSArgs = field(default_factory=lambda: ForwardTTSArgs(use_pitch=True, use_energy=True))
# multi-speaker settings # multi-speaker settings
num_speakers: int = 0 num_speakers: int = 0

View File

@ -103,26 +103,28 @@ class SpeedySpeechConfig(BaseTTSConfig):
base_model: str = "forward_tts" base_model: str = "forward_tts"
# set model args as SpeedySpeech # set model args as SpeedySpeech
model_args: ForwardTTSArgs = ForwardTTSArgs( model_args: ForwardTTSArgs = field(
use_pitch=False, default_factory=lambda: ForwardTTSArgs(
encoder_type="residual_conv_bn", use_pitch=False,
encoder_params={ encoder_type="residual_conv_bn",
"kernel_size": 4, encoder_params={
"dilations": 4 * [1, 2, 4] + [1], "kernel_size": 4,
"num_conv_blocks": 2, "dilations": 4 * [1, 2, 4] + [1],
"num_res_blocks": 13, "num_conv_blocks": 2,
}, "num_res_blocks": 13,
decoder_type="residual_conv_bn", },
decoder_params={ decoder_type="residual_conv_bn",
"kernel_size": 4, decoder_params={
"dilations": 4 * [1, 2, 4, 8] + [1], "kernel_size": 4,
"num_conv_blocks": 2, "dilations": 4 * [1, 2, 4, 8] + [1],
"num_res_blocks": 17, "num_conv_blocks": 2,
}, "num_res_blocks": 17,
out_channels=80, },
hidden_channels=128, out_channels=80,
positional_encoding=True, hidden_channels=128,
detach_duration_predictor=True, positional_encoding=True,
detach_duration_predictor=True,
)
) )
# multi-speaker settings # multi-speaker settings

View File

@ -70,7 +70,7 @@ class TortoiseConfig(BaseTTSConfig):
model: str = "tortoise" model: str = "tortoise"
# model specific params # model specific params
model_args: TortoiseArgs = field(default_factory=TortoiseArgs) model_args: TortoiseArgs = field(default_factory=TortoiseArgs)
audio: TortoiseAudioConfig = TortoiseAudioConfig() audio: TortoiseAudioConfig = field(default_factory=TortoiseAudioConfig)
model_dir: str = None model_dir: str = None
# settings # settings

View File

@ -10,15 +10,11 @@ License: MIT
import logging import logging
from pathlib import Path from pathlib import Path
import fairseq
import torch import torch
from einops import pack, unpack from einops import pack, unpack
from torch import nn from torch import nn
from torchaudio.functional import resample from torchaudio.functional import resample
from transformers import HubertModel
logging.root.setLevel(logging.ERROR)
def round_down_nearest_multiple(num, divisor): def round_down_nearest_multiple(num, divisor):
return num // divisor * divisor return num // divisor * divisor
@ -49,22 +45,11 @@ class CustomHubert(nn.Module):
self.target_sample_hz = target_sample_hz self.target_sample_hz = target_sample_hz
self.seq_len_multiple_of = seq_len_multiple_of self.seq_len_multiple_of = seq_len_multiple_of
self.output_layer = output_layer self.output_layer = output_layer
if device is not None: if device is not None:
self.to(device) self.to(device)
self.model = HubertModel.from_pretrained("facebook/hubert-base-ls960")
model_path = Path(checkpoint_path)
assert model_path.exists(), f"path {checkpoint_path} does not exist"
checkpoint = torch.load(checkpoint_path)
load_model_input = {checkpoint_path: checkpoint}
model, *_ = fairseq.checkpoint_utils.load_model_ensemble_and_task(load_model_input)
if device is not None: if device is not None:
model[0].to(device) self.model.to(device)
self.model = model[0]
self.model.eval() self.model.eval()
@property @property
@ -81,19 +66,13 @@ class CustomHubert(nn.Module):
if exists(self.seq_len_multiple_of): if exists(self.seq_len_multiple_of):
wav_input = curtail_to_multiple(wav_input, self.seq_len_multiple_of) wav_input = curtail_to_multiple(wav_input, self.seq_len_multiple_of)
embed = self.model( outputs = self.model.forward(
wav_input, wav_input,
features_only=True, output_hidden_states=True,
mask=False, # thanks to @maitycyrus for noticing that mask is defaulted to True in the fairseq code
output_layer=self.output_layer,
) )
embed = outputs["hidden_states"][self.output_layer]
embed, packed_shape = pack([embed["x"]], "* d") embed, packed_shape = pack([embed], "* d")
codebook_indices = torch.from_numpy(embed.cpu().detach().numpy()).to(device)
# codebook_indices = self.kmeans.predict(embed.cpu().detach().numpy())
codebook_indices = torch.from_numpy(embed.cpu().detach().numpy()).to(device) # .long()
if flatten: if flatten:
return codebook_indices return codebook_indices

View File

@ -130,7 +130,7 @@ def generate_voice(
# generate semantic tokens # generate semantic tokens
# Load the HuBERT model # Load the HuBERT model
hubert_manager = HubertManager() hubert_manager = HubertManager()
hubert_manager.make_sure_hubert_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert"]) # hubert_manager.make_sure_hubert_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert"])
hubert_manager.make_sure_tokenizer_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert_tokenizer"]) hubert_manager.make_sure_tokenizer_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert_tokenizer"])
hubert_model = CustomHubert(checkpoint_path=model.config.LOCAL_MODEL_PATHS["hubert"]).to(model.device) hubert_model = CustomHubert(checkpoint_path=model.config.LOCAL_MODEL_PATHS["hubert"]).to(model.device)

View File

@ -165,7 +165,7 @@ class BCELossMasked(nn.Module):
def __init__(self, pos_weight: float = None): def __init__(self, pos_weight: float = None):
super().__init__() super().__init__()
self.pos_weight = nn.Parameter(torch.tensor([pos_weight]), requires_grad=False) self.register_buffer("pos_weight", torch.tensor([pos_weight]))
def forward(self, x, target, length): def forward(self, x, target, length):
""" """
@ -191,10 +191,15 @@ class BCELossMasked(nn.Module):
mask = sequence_mask(sequence_length=length, max_len=target.size(1)) mask = sequence_mask(sequence_length=length, max_len=target.size(1))
num_items = mask.sum() num_items = mask.sum()
loss = functional.binary_cross_entropy_with_logits( loss = functional.binary_cross_entropy_with_logits(
x.masked_select(mask), target.masked_select(mask), pos_weight=self.pos_weight, reduction="sum" x.masked_select(mask),
target.masked_select(mask),
pos_weight=self.pos_weight.to(x.device),
reduction="sum",
) )
else: else:
loss = functional.binary_cross_entropy_with_logits(x, target, pos_weight=self.pos_weight, reduction="sum") loss = functional.binary_cross_entropy_with_logits(
x, target, pos_weight=self.pos_weight.to(x.device), reduction="sum"
)
num_items = torch.numel(x) num_items = torch.numel(x)
loss = loss / num_items loss = loss / num_items
return loss return loss

View File

@ -207,7 +207,7 @@ def maximum_path_numpy(value, mask, max_neg_val=None):
device = value.device device = value.device
dtype = value.dtype dtype = value.dtype
value = value.cpu().detach().numpy() value = value.cpu().detach().numpy()
mask = mask.cpu().detach().numpy().astype(np.bool) mask = mask.cpu().detach().numpy().astype(bool)
b, t_x, t_y = value.shape b, t_x, t_y = value.shape
direction = np.zeros(value.shape, dtype=np.int64) direction = np.zeros(value.shape, dtype=np.int64)

View File

@ -540,7 +540,10 @@ class AudioProcessor(object):
def _griffin_lim(self, S): def _griffin_lim(self, S):
angles = np.exp(2j * np.pi * np.random.rand(*S.shape)) angles = np.exp(2j * np.pi * np.random.rand(*S.shape))
S_complex = np.abs(S).astype(np.complex) try:
S_complex = np.abs(S).astype(np.complex)
except AttributeError: # np.complex is deprecated since numpy 1.20.0
S_complex = np.abs(S).astype(complex)
y = self._istft(S_complex * angles) y = self._istft(S_complex * angles)
if not np.isfinite(y).all(): if not np.isfinite(y).all():
print(" [!] Waveform is not finite everywhere. Skipping the GL.") print(" [!] Waveform is not finite everywhere. Skipping the GL.")

View File

@ -264,14 +264,17 @@ class ModelManager(object):
model_download_uri = os.path.join(URI_PREFIX, f"{lang}.tar.gz") model_download_uri = os.path.join(URI_PREFIX, f"{lang}.tar.gz")
self._download_tar_file(model_download_uri, output_path, self.progress_bar) self._download_tar_file(model_download_uri, output_path, self.progress_bar)
def set_model_url(self, model_item: Dict): @staticmethod
def set_model_url(model_item: Dict):
model_item["model_url"] = None model_item["model_url"] = None
if "github_rls_url" in model_item: if "github_rls_url" in model_item:
model_item["model_url"] = model_item["github_rls_url"] model_item["model_url"] = model_item["github_rls_url"]
elif "hf_url" in model_item: elif "hf_url" in model_item:
model_item["model_url"] = model_item["hf_url"] model_item["model_url"] = model_item["hf_url"]
elif "fairseq" in model_item["model_name"]:
model_item["model_url"] = "https://coqui.gateway.scarf.sh/fairseq/"
return model_item return model_item
def _set_model_item(self, model_name): def _set_model_item(self, model_name):
# fetch model info from the dict # fetch model info from the dict
model_type, lang, dataset, model = model_name.split("/") model_type, lang, dataset, model = model_name.split("/")
@ -285,10 +288,12 @@ class ModelManager(object):
"author": "fairseq", "author": "fairseq",
"description": "this model is released by Meta under Fairseq repo. Visit https://github.com/facebookresearch/fairseq/tree/main/examples/mms for more info.", "description": "this model is released by Meta under Fairseq repo. Visit https://github.com/facebookresearch/fairseq/tree/main/examples/mms for more info.",
} }
model_item["model_name"] = model_name
else: else:
# get model from models.json # get model from models.json
model_item = self.models_dict[model_type][lang][dataset][model] model_item = self.models_dict[model_type][lang][dataset][model]
model_item["model_type"] = model_type model_item["model_type"] = model_type
model_item = self.set_model_url(model_item)
return model_item, model_full_name, model return model_item, model_full_name, model
def download_model(self, model_name): def download_model(self, model_name):
@ -324,7 +329,9 @@ class ModelManager(object):
# find downloaded files # find downloaded files
output_model_path = output_path output_model_path = output_path
output_config_path = None output_config_path = None
if model not in ["tortoise-v2", "bark"] and "fairseq" not in model_name: # TODO:This is stupid but don't care for now. if (
model not in ["tortoise-v2", "bark"] and "fairseq" not in model_name
): # TODO:This is stupid but don't care for now.
output_model_path, output_config_path = self._find_files(output_path) output_model_path, output_config_path = self._find_files(output_path)
# update paths in the config.json # update paths in the config.json
self._update_paths(output_path, output_config_path) self._update_paths(output_path, output_config_path)

View File

@ -794,8 +794,8 @@ class FreeVCConfig(BaseVCConfig):
model: str = "freevc" model: str = "freevc"
# model specific params # model specific params
model_args: FreeVCArgs = FreeVCArgs() model_args: FreeVCArgs = field(default_factory=FreeVCArgs)
audio: FreeVCAudioConfig = FreeVCAudioConfig() audio: FreeVCAudioConfig = field(default_factory=FreeVCAudioConfig)
# optimizer # optimizer
# TODO with training support # TODO with training support

View File

@ -1,5 +1,5 @@
[build-system] [build-system]
requires = ["setuptools", "wheel", "cython==0.29.28", "numpy==1.21.6", "packaging"] requires = ["setuptools", "wheel", "cython==0.29.30", "numpy==1.22.0", "packaging"]
[flake8] [flake8]
max-line-length=120 max-line-length=120

View File

@ -1,14 +1,14 @@
# core deps # core deps
numpy==1.21.6;python_version<"3.10" numpy==1.22.0;python_version<="3.10"
numpy;python_version=="3.10" numpy==1.24.3;python_version>"3.10"
cython==0.29.28 cython==0.29.30
scipy>=1.4.0 scipy>=1.4.0
torch>=1.7 torch>=1.7
torchaudio torchaudio
soundfile soundfile
librosa==0.10.0.* librosa==0.10.0.*
numba==0.55.1;python_version<"3.9" numba==0.55.1;python_version<"3.9"
numba==0.56.4;python_version>="3.9" numba==0.57.0;python_version>="3.9"
inflect==5.6.0 inflect==5.6.0
tqdm tqdm
anyascii anyascii
@ -26,14 +26,14 @@ pandas
# deps for training # deps for training
matplotlib matplotlib
# coqui stack # coqui stack
trainer==0.0.20 trainer
# config management # config management
coqpit>=0.0.16 coqpit>=0.0.16
# chinese g2p deps # chinese g2p deps
jieba jieba
pypinyin pypinyin
# japanese g2p deps # japanese g2p deps
mecab-python3==1.0.5 mecab-python3==1.0.6
unidic-lite==1.0.8 unidic-lite==1.0.8
# gruut+supported langs # gruut+supported langs
gruut[de,es,fr]==2.2.3 gruut[de,es,fr]==2.2.3
@ -51,5 +51,3 @@ einops
transformers transformers
#deps for bark #deps for bark
encodec encodec
#deps for fairseq models
fairseq

View File

@ -1,8 +1,8 @@
[build_py] [build_py]
build-lib=temp_build build_lib=temp_build
[bdist_wheel] [bdist_wheel]
bdist-dir=temp_build bdist_dir=temp_build
[install_lib] [install_lib]
build-dir=temp_build build_dir=temp_build

View File

@ -32,8 +32,8 @@ from Cython.Build import cythonize
from setuptools import Extension, find_packages, setup from setuptools import Extension, find_packages, setup
python_version = sys.version.split()[0] python_version = sys.version.split()[0]
if Version(python_version) < Version("3.7") or Version(python_version) >= Version("3.11"): if Version(python_version) < Version("3.9") or Version(python_version) >= Version("3.12"):
raise RuntimeError("TTS requires python >= 3.7 and < 3.11 " "but your Python version is {}".format(sys.version)) raise RuntimeError("TTS requires python >= 3.9 and < 3.12 " "but your Python version is {}".format(sys.version))
cwd = os.path.dirname(os.path.abspath(__file__)) cwd = os.path.dirname(os.path.abspath(__file__))
@ -114,15 +114,14 @@ setup(
"dev": requirements_dev, "dev": requirements_dev,
"notebooks": requirements_notebooks, "notebooks": requirements_notebooks,
}, },
python_requires=">=3.7.0, <3.11", python_requires=">=3.9.0, <3.12",
entry_points={"console_scripts": ["tts=TTS.bin.synthesize:main", "tts-server = TTS.server.server:main"]}, entry_points={"console_scripts": ["tts=TTS.bin.synthesize:main", "tts-server = TTS.server.server:main"]},
classifiers=[ classifiers=[
"Programming Language :: Python", "Programming Language :: Python",
"Programming Language :: Python :: 3", "Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Development Status :: 3 - Alpha", "Development Status :: 3 - Alpha",
"Intended Audience :: Science/Research", "Intended Audience :: Science/Research",
"Intended Audience :: Developers", "Intended Audience :: Developers",

View File

@ -1,5 +1,5 @@
import unittest import unittest
from dataclasses import dataclass from dataclasses import dataclass, field
from coqpit import Coqpit from coqpit import Coqpit
@ -86,11 +86,11 @@ class TestTTSTokenizer(unittest.TestCase):
enable_eos_bos_chars: bool = True enable_eos_bos_chars: bool = True
use_phonemes: bool = True use_phonemes: bool = True
add_blank: bool = False add_blank: bool = False
characters: str = Characters() characters: str = field(default_factory=Characters)
phonemizer: str = "espeak" phonemizer: str = "espeak"
phoneme_language: str = "tr" phoneme_language: str = "tr"
text_cleaner: str = "phoneme_cleaners" text_cleaner: str = "phoneme_cleaners"
characters = Characters() characters = field(default_factory=Characters)
tokenizer_ph, _ = TTSTokenizer.init_from_config(TokenizerConfig()) tokenizer_ph, _ = TTSTokenizer.init_from_config(TokenizerConfig())
tokenizer_ph.phonemizer.backend = "espeak" tokenizer_ph.phonemizer.backend = "espeak"

View File

@ -16,7 +16,7 @@ from TTS.utils.audio import AudioProcessor
torch.manual_seed(1) torch.manual_seed(1)
use_cuda = torch.cuda.is_available() use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") device = torch.device("cuda" if use_cuda else "cpu")
config_global = TacotronConfig(num_chars=32, num_speakers=5, out_channels=513, decoder_output_dim=80) config_global = TacotronConfig(num_chars=32, num_speakers=5, out_channels=513, decoder_output_dim=80)
@ -288,7 +288,6 @@ class TacotronCapacitronTrainTest(unittest.TestCase):
batch["text_input"].shape[0], batch["stop_targets"].size(1) // config.r, -1 batch["text_input"].shape[0], batch["stop_targets"].size(1) // config.r, -1
) )
batch["stop_targets"] = (batch["stop_targets"].sum(2) > 0.0).unsqueeze(2).float().squeeze() batch["stop_targets"] = (batch["stop_targets"].sum(2) > 0.0).unsqueeze(2).float().squeeze()
model = Tacotron(config).to(device) model = Tacotron(config).to(device)
criterion = model.get_criterion() criterion = model.get_criterion()
optimizer = model.get_optimizer() optimizer = model.get_optimizer()

View File

@ -15,7 +15,7 @@ def run_models(offset=0, step=1):
print(" > Run synthesizer with all the models.") print(" > Run synthesizer with all the models.")
output_path = os.path.join(get_tests_output_path(), "output.wav") output_path = os.path.join(get_tests_output_path(), "output.wav")
manager = ModelManager(output_prefix=get_tests_output_path(), progress_bar=False) manager = ModelManager(output_prefix=get_tests_output_path(), progress_bar=False)
model_names = manager.list_models() model_names = [name for name in manager.list_models() if "bark" not in name]
for model_name in model_names[offset::step]: for model_name in model_names[offset::step]:
print(f"\n > Run - {model_name}") print(f"\n > Run - {model_name}")
model_path, _, _ = manager.download_model(model_name) model_path, _, _ = manager.download_model(model_name)
@ -79,6 +79,15 @@ def test_models_offset_2_step_3():
run_models(offset=2, step=3) run_models(offset=2, step=3)
def test_bark():
"""Bark is too big to run on github actions. We need to test it locally"""
output_path = os.path.join(get_tests_output_path(), "output.wav")
run_cli(
f" tts --model_name tts_models/multilingual/multi-dataset/bark "
f'--text "This is an example." --out_path "{output_path}" --progress_bar False'
)
def test_voice_conversion(): def test_voice_conversion():
print(" > Run voice conversion inference using YourTTS model.") print(" > Run voice conversion inference using YourTTS model.")
model_name = "tts_models/multilingual/multi-dataset/your_tts" model_name = "tts_models/multilingual/multi-dataset/your_tts"