mirror of https://github.com/coqui-ai/TTS.git
style: run black
This commit is contained in:
parent
c86cf9b2ef
commit
efdafd5a7f
|
@ -1,4 +1,5 @@
|
||||||
"""Get detailed info about the working environment."""
|
"""Get detailed info about the working environment."""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
"""Find all the unique characters in a dataset"""
|
"""Find all the unique characters in a dataset"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
"""Find all the unique characters in a dataset"""
|
"""Find all the unique characters in a dataset"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
"""Search a good noise schedule for WaveGrad for a given number of inference iterations"""
|
"""Search a good noise schedule for WaveGrad for a given number of inference iterations"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
from itertools import product as cartesian_product
|
from itertools import product as cartesian_product
|
||||||
|
|
||||||
|
|
|
@ -457,9 +457,11 @@ class TTSDataset(Dataset):
|
||||||
|
|
||||||
# lengths adjusted by the reduction factor
|
# lengths adjusted by the reduction factor
|
||||||
mel_lengths_adjusted = [
|
mel_lengths_adjusted = [
|
||||||
m.shape[1] + (self.outputs_per_step - (m.shape[1] % self.outputs_per_step))
|
(
|
||||||
if m.shape[1] % self.outputs_per_step
|
m.shape[1] + (self.outputs_per_step - (m.shape[1] % self.outputs_per_step))
|
||||||
else m.shape[1]
|
if m.shape[1] % self.outputs_per_step
|
||||||
|
else m.shape[1]
|
||||||
|
)
|
||||||
for m in mel
|
for m in mel
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
Much of this code is adapted from Andrej Karpathy's NanoGPT
|
Much of this code is adapted from Andrej Karpathy's NanoGPT
|
||||||
(https://github.com/karpathy/nanoGPT)
|
(https://github.com/karpathy/nanoGPT)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import math
|
import math
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
Much of this code is adapted from Andrej Karpathy's NanoGPT
|
Much of this code is adapted from Andrej Karpathy's NanoGPT
|
||||||
(https://github.com/karpathy/nanoGPT)
|
(https://github.com/karpathy/nanoGPT)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import math
|
import math
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
|
@ -187,9 +187,9 @@ class GPT(nn.Module):
|
||||||
def get_grad_norm_parameter_groups(self):
|
def get_grad_norm_parameter_groups(self):
|
||||||
return {
|
return {
|
||||||
"conditioning_encoder": list(self.conditioning_encoder.parameters()),
|
"conditioning_encoder": list(self.conditioning_encoder.parameters()),
|
||||||
"conditioning_perceiver": list(self.conditioning_perceiver.parameters())
|
"conditioning_perceiver": (
|
||||||
if self.use_perceiver_resampler
|
list(self.conditioning_perceiver.parameters()) if self.use_perceiver_resampler else None
|
||||||
else None,
|
),
|
||||||
"gpt": list(self.gpt.parameters()),
|
"gpt": list(self.gpt.parameters()),
|
||||||
"heads": list(self.text_head.parameters()) + list(self.mel_head.parameters()),
|
"heads": list(self.text_head.parameters()) + list(self.mel_head.parameters()),
|
||||||
}
|
}
|
||||||
|
|
|
@ -186,9 +186,9 @@ class XTTSDataset(torch.utils.data.Dataset):
|
||||||
"wav_lengths": torch.tensor(wav.shape[-1], dtype=torch.long),
|
"wav_lengths": torch.tensor(wav.shape[-1], dtype=torch.long),
|
||||||
"filenames": audiopath,
|
"filenames": audiopath,
|
||||||
"conditioning": cond.unsqueeze(1),
|
"conditioning": cond.unsqueeze(1),
|
||||||
"cond_lens": torch.tensor(cond_len, dtype=torch.long)
|
"cond_lens": (
|
||||||
if cond_len is not torch.nan
|
torch.tensor(cond_len, dtype=torch.long) if cond_len is not torch.nan else torch.tensor([cond_len])
|
||||||
else torch.tensor([cond_len]),
|
),
|
||||||
"cond_idxs": torch.tensor(cond_idxs) if cond_idxs is not torch.nan else torch.tensor([cond_idxs]),
|
"cond_idxs": torch.tensor(cond_idxs) if cond_idxs is not torch.nan else torch.tensor([cond_idxs]),
|
||||||
}
|
}
|
||||||
return res
|
return res
|
||||||
|
|
|
@ -225,14 +225,11 @@ class Bark(BaseTTS):
|
||||||
|
|
||||||
return return_dict
|
return return_dict
|
||||||
|
|
||||||
def eval_step(self):
|
def eval_step(self): ...
|
||||||
...
|
|
||||||
|
|
||||||
def forward(self):
|
def forward(self): ...
|
||||||
...
|
|
||||||
|
|
||||||
def inference(self):
|
def inference(self): ...
|
||||||
...
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def init_from_config(config: "BarkConfig", **kwargs): # pylint: disable=unused-argument
|
def init_from_config(config: "BarkConfig", **kwargs): # pylint: disable=unused-argument
|
||||||
|
|
|
@ -369,9 +369,11 @@ class BaseTTS(BaseTrainerModel):
|
||||||
d_vector = (random.sample(sorted(d_vector), 1),)
|
d_vector = (random.sample(sorted(d_vector), 1),)
|
||||||
|
|
||||||
aux_inputs = {
|
aux_inputs = {
|
||||||
"speaker_id": None
|
"speaker_id": (
|
||||||
if not self.config.use_speaker_embedding
|
None
|
||||||
else random.sample(sorted(self.speaker_manager.name_to_id.values()), 1),
|
if not self.config.use_speaker_embedding
|
||||||
|
else random.sample(sorted(self.speaker_manager.name_to_id.values()), 1)
|
||||||
|
),
|
||||||
"d_vector": d_vector,
|
"d_vector": d_vector,
|
||||||
"style_wav": None, # TODO: handle GST style input
|
"style_wav": None, # TODO: handle GST style input
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,12 +101,16 @@ class Tacotron(BaseTacotron):
|
||||||
num_mel=self.decoder_output_dim,
|
num_mel=self.decoder_output_dim,
|
||||||
encoder_output_dim=self.encoder_in_features,
|
encoder_output_dim=self.encoder_in_features,
|
||||||
capacitron_VAE_embedding_dim=self.capacitron_vae.capacitron_VAE_embedding_dim,
|
capacitron_VAE_embedding_dim=self.capacitron_vae.capacitron_VAE_embedding_dim,
|
||||||
speaker_embedding_dim=self.embedded_speaker_dim
|
speaker_embedding_dim=(
|
||||||
if self.use_speaker_embedding and self.capacitron_vae.capacitron_use_speaker_embedding
|
self.embedded_speaker_dim
|
||||||
else None,
|
if self.use_speaker_embedding and self.capacitron_vae.capacitron_use_speaker_embedding
|
||||||
text_summary_embedding_dim=self.capacitron_vae.capacitron_text_summary_embedding_dim
|
else None
|
||||||
if self.capacitron_vae.capacitron_use_text_summary_embeddings
|
),
|
||||||
else None,
|
text_summary_embedding_dim=(
|
||||||
|
self.capacitron_vae.capacitron_text_summary_embedding_dim
|
||||||
|
if self.capacitron_vae.capacitron_use_text_summary_embeddings
|
||||||
|
else None
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# backward pass decoder
|
# backward pass decoder
|
||||||
|
@ -171,9 +175,9 @@ class Tacotron(BaseTacotron):
|
||||||
encoder_outputs, *capacitron_vae_outputs = self.compute_capacitron_VAE_embedding(
|
encoder_outputs, *capacitron_vae_outputs = self.compute_capacitron_VAE_embedding(
|
||||||
encoder_outputs,
|
encoder_outputs,
|
||||||
reference_mel_info=[mel_specs, mel_lengths],
|
reference_mel_info=[mel_specs, mel_lengths],
|
||||||
text_info=[inputs, text_lengths]
|
text_info=(
|
||||||
if self.capacitron_vae.capacitron_use_text_summary_embeddings
|
[inputs, text_lengths] if self.capacitron_vae.capacitron_use_text_summary_embeddings else None
|
||||||
else None,
|
),
|
||||||
speaker_embedding=embedded_speakers if self.capacitron_vae.capacitron_use_speaker_embedding else None,
|
speaker_embedding=embedded_speakers if self.capacitron_vae.capacitron_use_speaker_embedding else None,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -237,13 +241,13 @@ class Tacotron(BaseTacotron):
|
||||||
# B x capacitron_VAE_embedding_dim
|
# B x capacitron_VAE_embedding_dim
|
||||||
encoder_outputs, *_ = self.compute_capacitron_VAE_embedding(
|
encoder_outputs, *_ = self.compute_capacitron_VAE_embedding(
|
||||||
encoder_outputs,
|
encoder_outputs,
|
||||||
reference_mel_info=[aux_input["style_mel"], reference_mel_length]
|
reference_mel_info=(
|
||||||
if aux_input["style_mel"] is not None
|
[aux_input["style_mel"], reference_mel_length] if aux_input["style_mel"] is not None else None
|
||||||
else None,
|
),
|
||||||
text_info=[style_text_embedding, style_text_length] if aux_input["style_text"] is not None else None,
|
text_info=[style_text_embedding, style_text_length] if aux_input["style_text"] is not None else None,
|
||||||
speaker_embedding=aux_input["d_vectors"]
|
speaker_embedding=(
|
||||||
if self.capacitron_vae.capacitron_use_speaker_embedding
|
aux_input["d_vectors"] if self.capacitron_vae.capacitron_use_speaker_embedding else None
|
||||||
else None,
|
),
|
||||||
)
|
)
|
||||||
if self.num_speakers > 1:
|
if self.num_speakers > 1:
|
||||||
if not self.use_d_vector_file:
|
if not self.use_d_vector_file:
|
||||||
|
|
|
@ -113,12 +113,14 @@ class Tacotron2(BaseTacotron):
|
||||||
num_mel=self.decoder_output_dim,
|
num_mel=self.decoder_output_dim,
|
||||||
encoder_output_dim=self.encoder_in_features,
|
encoder_output_dim=self.encoder_in_features,
|
||||||
capacitron_VAE_embedding_dim=self.capacitron_vae.capacitron_VAE_embedding_dim,
|
capacitron_VAE_embedding_dim=self.capacitron_vae.capacitron_VAE_embedding_dim,
|
||||||
speaker_embedding_dim=self.embedded_speaker_dim
|
speaker_embedding_dim=(
|
||||||
if self.capacitron_vae.capacitron_use_speaker_embedding
|
self.embedded_speaker_dim if self.capacitron_vae.capacitron_use_speaker_embedding else None
|
||||||
else None,
|
),
|
||||||
text_summary_embedding_dim=self.capacitron_vae.capacitron_text_summary_embedding_dim
|
text_summary_embedding_dim=(
|
||||||
if self.capacitron_vae.capacitron_use_text_summary_embeddings
|
self.capacitron_vae.capacitron_text_summary_embedding_dim
|
||||||
else None,
|
if self.capacitron_vae.capacitron_use_text_summary_embeddings
|
||||||
|
else None
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# backward pass decoder
|
# backward pass decoder
|
||||||
|
@ -191,9 +193,11 @@ class Tacotron2(BaseTacotron):
|
||||||
encoder_outputs, *capacitron_vae_outputs = self.compute_capacitron_VAE_embedding(
|
encoder_outputs, *capacitron_vae_outputs = self.compute_capacitron_VAE_embedding(
|
||||||
encoder_outputs,
|
encoder_outputs,
|
||||||
reference_mel_info=[mel_specs, mel_lengths],
|
reference_mel_info=[mel_specs, mel_lengths],
|
||||||
text_info=[embedded_inputs.transpose(1, 2), text_lengths]
|
text_info=(
|
||||||
if self.capacitron_vae.capacitron_use_text_summary_embeddings
|
[embedded_inputs.transpose(1, 2), text_lengths]
|
||||||
else None,
|
if self.capacitron_vae.capacitron_use_text_summary_embeddings
|
||||||
|
else None
|
||||||
|
),
|
||||||
speaker_embedding=embedded_speakers if self.capacitron_vae.capacitron_use_speaker_embedding else None,
|
speaker_embedding=embedded_speakers if self.capacitron_vae.capacitron_use_speaker_embedding else None,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
@ -265,13 +269,13 @@ class Tacotron2(BaseTacotron):
|
||||||
# B x capacitron_VAE_embedding_dim
|
# B x capacitron_VAE_embedding_dim
|
||||||
encoder_outputs, *_ = self.compute_capacitron_VAE_embedding(
|
encoder_outputs, *_ = self.compute_capacitron_VAE_embedding(
|
||||||
encoder_outputs,
|
encoder_outputs,
|
||||||
reference_mel_info=[aux_input["style_mel"], reference_mel_length]
|
reference_mel_info=(
|
||||||
if aux_input["style_mel"] is not None
|
[aux_input["style_mel"], reference_mel_length] if aux_input["style_mel"] is not None else None
|
||||||
else None,
|
),
|
||||||
text_info=[style_text_embedding, style_text_length] if aux_input["style_text"] is not None else None,
|
text_info=[style_text_embedding, style_text_length] if aux_input["style_text"] is not None else None,
|
||||||
speaker_embedding=aux_input["d_vectors"]
|
speaker_embedding=(
|
||||||
if self.capacitron_vae.capacitron_use_speaker_embedding
|
aux_input["d_vectors"] if self.capacitron_vae.capacitron_use_speaker_embedding else None
|
||||||
else None,
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.num_speakers > 1:
|
if self.num_speakers > 1:
|
||||||
|
|
|
@ -715,8 +715,9 @@ class Tortoise(BaseTTS):
|
||||||
self.autoregressive = self.autoregressive.to(self.device)
|
self.autoregressive = self.autoregressive.to(self.device)
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Generating autoregressive samples..")
|
print("Generating autoregressive samples..")
|
||||||
with self.temporary_cuda(self.autoregressive) as autoregressive, torch.autocast(
|
with (
|
||||||
device_type="cuda", dtype=torch.float16, enabled=half
|
self.temporary_cuda(self.autoregressive) as autoregressive,
|
||||||
|
torch.autocast(device_type="cuda", dtype=torch.float16, enabled=half),
|
||||||
):
|
):
|
||||||
for b in tqdm(range(num_batches), disable=not verbose):
|
for b in tqdm(range(num_batches), disable=not verbose):
|
||||||
codes = autoregressive.inference_speech(
|
codes = autoregressive.inference_speech(
|
||||||
|
@ -737,8 +738,9 @@ class Tortoise(BaseTTS):
|
||||||
self.autoregressive_batch_size = orig_batch_size # in the case of single_sample
|
self.autoregressive_batch_size = orig_batch_size # in the case of single_sample
|
||||||
|
|
||||||
clip_results = []
|
clip_results = []
|
||||||
with self.temporary_cuda(self.clvp) as clvp, torch.autocast(
|
with (
|
||||||
device_type="cuda", dtype=torch.float16, enabled=half
|
self.temporary_cuda(self.clvp) as clvp,
|
||||||
|
torch.autocast(device_type="cuda", dtype=torch.float16, enabled=half),
|
||||||
):
|
):
|
||||||
for batch in tqdm(samples, disable=not verbose):
|
for batch in tqdm(samples, disable=not verbose):
|
||||||
for i in range(batch.shape[0]):
|
for i in range(batch.shape[0]):
|
||||||
|
|
|
@ -1887,9 +1887,11 @@ class Vits(BaseTTS):
|
||||||
import onnxruntime as ort
|
import onnxruntime as ort
|
||||||
|
|
||||||
providers = [
|
providers = [
|
||||||
"CPUExecutionProvider"
|
(
|
||||||
if cuda is False
|
"CPUExecutionProvider"
|
||||||
else ("CUDAExecutionProvider", {"cudnn_conv_algo_search": "DEFAULT"})
|
if cuda is False
|
||||||
|
else ("CUDAExecutionProvider", {"cudnn_conv_algo_search": "DEFAULT"})
|
||||||
|
)
|
||||||
]
|
]
|
||||||
sess_options = ort.SessionOptions()
|
sess_options = ort.SessionOptions()
|
||||||
self.onnx_sess = ort.InferenceSession(
|
self.onnx_sess = ort.InferenceSession(
|
||||||
|
|
|
@ -207,6 +207,7 @@ class SSIMLoss(_Loss):
|
||||||
https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf,
|
https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf,
|
||||||
DOI:`10.1109/TIP.2003.819861`
|
DOI:`10.1109/TIP.2003.819861`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__constants__ = ["kernel_size", "k1", "k2", "sigma", "kernel", "reduction"]
|
__constants__ = ["kernel_size", "k1", "k2", "sigma", "kernel", "reduction"]
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
"""Set of default text cleaners"""
|
"""Set of default text cleaners"""
|
||||||
|
|
||||||
# TODO: pick the cleaner for languages dynamically
|
# TODO: pick the cleaner for languages dynamically
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
|
@ -36,13 +36,16 @@ def stream_url(
|
||||||
if start_byte:
|
if start_byte:
|
||||||
req.headers["Range"] = "bytes={}-".format(start_byte)
|
req.headers["Range"] = "bytes={}-".format(start_byte)
|
||||||
|
|
||||||
with urllib.request.urlopen(req) as upointer, tqdm(
|
with (
|
||||||
unit="B",
|
urllib.request.urlopen(req) as upointer,
|
||||||
unit_scale=True,
|
tqdm(
|
||||||
unit_divisor=1024,
|
unit="B",
|
||||||
total=url_size,
|
unit_scale=True,
|
||||||
disable=not progress_bar,
|
unit_divisor=1024,
|
||||||
) as pbar:
|
total=url_size,
|
||||||
|
disable=not progress_bar,
|
||||||
|
) as pbar,
|
||||||
|
):
|
||||||
num_bytes = 0
|
num_bytes = 0
|
||||||
while True:
|
while True:
|
||||||
chunk = upointer.read(block_size)
|
chunk = upointer.read(block_size)
|
||||||
|
|
|
@ -357,9 +357,11 @@ class BaseVC(BaseTrainerModel):
|
||||||
d_vector = (random.sample(sorted(d_vector), 1),)
|
d_vector = (random.sample(sorted(d_vector), 1),)
|
||||||
|
|
||||||
aux_inputs = {
|
aux_inputs = {
|
||||||
"speaker_id": None
|
"speaker_id": (
|
||||||
if not self.config.use_speaker_embedding
|
None
|
||||||
else random.sample(sorted(self.speaker_manager.name_to_id.values()), 1),
|
if not self.config.use_speaker_embedding
|
||||||
|
else random.sample(sorted(self.speaker_manager.name_to_id.values()), 1)
|
||||||
|
),
|
||||||
"d_vector": d_vector,
|
"d_vector": d_vector,
|
||||||
"style_wav": None, # TODO: handle GST style input
|
"style_wav": None, # TODO: handle GST style input
|
||||||
}
|
}
|
||||||
|
|
|
@ -544,8 +544,7 @@ class FreeVC(BaseVC):
|
||||||
audio = audio[0][0].data.cpu().float().numpy()
|
audio = audio[0][0].data.cpu().float().numpy()
|
||||||
return audio
|
return audio
|
||||||
|
|
||||||
def eval_step():
|
def eval_step(): ...
|
||||||
...
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def init_from_config(config: FreeVCConfig, samples: Union[List[List], List[Dict]] = None, verbose=True):
|
def init_from_config(config: FreeVCConfig, samples: Union[List[List], List[Dict]] = None, verbose=True):
|
||||||
|
@ -558,5 +557,4 @@ class FreeVC(BaseVC):
|
||||||
if eval:
|
if eval:
|
||||||
self.eval()
|
self.eval()
|
||||||
|
|
||||||
def train_step():
|
def train_step(): ...
|
||||||
...
|
|
||||||
|
|
|
@ -155,7 +155,9 @@ def compute_mask_indices(
|
||||||
|
|
||||||
class WavLMConfig:
|
class WavLMConfig:
|
||||||
def __init__(self, cfg=None):
|
def __init__(self, cfg=None):
|
||||||
self.extractor_mode: str = "default" # mode for feature extractor. default has a single group norm with d groups in the first conv block, whereas layer_norm has layer norms in every block (meant to use with normalize=True)
|
self.extractor_mode: str = (
|
||||||
|
"default" # mode for feature extractor. default has a single group norm with d groups in the first conv block, whereas layer_norm has layer norms in every block (meant to use with normalize=True)
|
||||||
|
)
|
||||||
self.encoder_layers: int = 12 # num encoder layers in the transformer
|
self.encoder_layers: int = 12 # num encoder layers in the transformer
|
||||||
|
|
||||||
self.encoder_embed_dim: int = 768 # encoder embedding dimension
|
self.encoder_embed_dim: int = 768 # encoder embedding dimension
|
||||||
|
@ -164,7 +166,9 @@ class WavLMConfig:
|
||||||
self.activation_fn: str = "gelu" # activation function to use
|
self.activation_fn: str = "gelu" # activation function to use
|
||||||
|
|
||||||
self.layer_norm_first: bool = False # apply layernorm first in the transformer
|
self.layer_norm_first: bool = False # apply layernorm first in the transformer
|
||||||
self.conv_feature_layers: str = "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2" # string describing convolutional feature extraction layers in form of a python list that contains [(dim, kernel_size, stride), ...]
|
self.conv_feature_layers: str = (
|
||||||
|
"[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2" # string describing convolutional feature extraction layers in form of a python list that contains [(dim, kernel_size, stride), ...]
|
||||||
|
)
|
||||||
self.conv_bias: bool = False # include bias in conv encoder
|
self.conv_bias: bool = False # include bias in conv encoder
|
||||||
self.feature_grad_mult: float = 1.0 # multiply feature extractor var grads by this
|
self.feature_grad_mult: float = 1.0 # multiply feature extractor var grads by this
|
||||||
|
|
||||||
|
|
|
@ -234,8 +234,12 @@ class TestZH_CN_Phonemizer(unittest.TestCase):
|
||||||
class TestBN_Phonemizer(unittest.TestCase):
|
class TestBN_Phonemizer(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.phonemizer = BN_Phonemizer()
|
self.phonemizer = BN_Phonemizer()
|
||||||
self._TEST_CASES = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন"
|
self._TEST_CASES = (
|
||||||
self._EXPECTED = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয় তখনও যেন।"
|
"রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন"
|
||||||
|
)
|
||||||
|
self._EXPECTED = (
|
||||||
|
"রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয় তখনও যেন।"
|
||||||
|
)
|
||||||
|
|
||||||
def test_phonemize(self):
|
def test_phonemize(self):
|
||||||
self.assertEqual(self.phonemizer.phonemize(self._TEST_CASES, separator=""), self._EXPECTED)
|
self.assertEqual(self.phonemizer.phonemize(self._TEST_CASES, separator=""), self._EXPECTED)
|
||||||
|
|
|
@ -115,20 +115,14 @@ class TestFreeVC(unittest.TestCase):
|
||||||
output_wav.shape[0] + config.audio.hop_length == source_wav.shape[0]
|
output_wav.shape[0] + config.audio.hop_length == source_wav.shape[0]
|
||||||
), f"{output_wav.shape} != {source_wav.shape}"
|
), f"{output_wav.shape} != {source_wav.shape}"
|
||||||
|
|
||||||
def test_train_step(self):
|
def test_train_step(self): ...
|
||||||
...
|
|
||||||
|
|
||||||
def test_train_eval_log(self):
|
def test_train_eval_log(self): ...
|
||||||
...
|
|
||||||
|
|
||||||
def test_test_run(self):
|
def test_test_run(self): ...
|
||||||
...
|
|
||||||
|
|
||||||
def test_load_checkpoint(self):
|
def test_load_checkpoint(self): ...
|
||||||
...
|
|
||||||
|
|
||||||
def test_get_criterion(self):
|
def test_get_criterion(self): ...
|
||||||
...
|
|
||||||
|
|
||||||
def test_init_from_config(self):
|
def test_init_from_config(self): ...
|
||||||
...
|
|
||||||
|
|
Loading…
Reference in New Issue