mirror of https://github.com/coqui-ai/TTS.git
1.7 MiB
1.7 MiB
None
<html lang="en">
<head>
</head>
</html>
In [1]:
%load_ext autoreload %autoreload 2 import os import sys import io import torch import time import numpy as np from collections import OrderedDict from matplotlib import pylab as plt %pylab inline rcParams["figure.figsize"] = (16,5) sys.path.append('/home/erogol/projects/') import librosa import librosa.display from TTS.models.tacotron import Tacotron from TTS.layers import * from TTS.utils.data import * from TTS.utils.audio import AudioProcessor from TTS.utils.generic_utils import load_config from TTS.utils.text import text_to_sequence import IPython from IPython.display import Audio from utils import *
Populating the interactive namespace from numpy and matplotlib
/home/erogol/miniconda3/envs/pytorch4/lib/python3.6/site-packages/IPython/core/magics/pylab.py:160: UserWarning: pylab import has clobbered these variables: ['plt'] `%matplotlib` prevents importing * from pylab and numpy "\n`%matplotlib` prevents importing * from pylab and numpy"
In [2]:
ls /data/shared/erogol_models/May-23-2018_06:20AM-loc-sen-attn-bug-fix-debug
best_model.pth.tar checkpoint_41360.pth.tar checkpoint_10152.pth.tar checkpoint_41736.pth.tar checkpoint_10528.pth.tar checkpoint_42112.pth.tar checkpoint_10904.pth.tar checkpoint_42488.pth.tar checkpoint_1128.pth.tar checkpoint_42864.pth.tar checkpoint_11280.pth.tar checkpoint_43240.pth.tar checkpoint_11656.pth.tar checkpoint_43616.pth.tar checkpoint_12032.pth.tar checkpoint_43992.pth.tar checkpoint_12408.pth.tar checkpoint_44368.pth.tar checkpoint_12784.pth.tar checkpoint_44744.pth.tar checkpoint_13160.pth.tar checkpoint_4512.pth.tar checkpoint_13536.pth.tar checkpoint_45120.pth.tar checkpoint_13912.pth.tar checkpoint_45496.pth.tar checkpoint_14288.pth.tar checkpoint_45872.pth.tar checkpoint_14664.pth.tar checkpoint_46248.pth.tar checkpoint_1504.pth.tar checkpoint_46624.pth.tar checkpoint_15040.pth.tar checkpoint_47000.pth.tar checkpoint_15416.pth.tar checkpoint_47376.pth.tar checkpoint_15792.pth.tar checkpoint_47752.pth.tar checkpoint_16168.pth.tar checkpoint_48128.pth.tar checkpoint_16544.pth.tar checkpoint_48504.pth.tar checkpoint_16920.pth.tar checkpoint_4888.pth.tar checkpoint_17296.pth.tar checkpoint_48880.pth.tar checkpoint_17672.pth.tar checkpoint_49256.pth.tar checkpoint_18048.pth.tar checkpoint_49632.pth.tar checkpoint_18424.pth.tar checkpoint_50008.pth.tar checkpoint_1880.pth.tar checkpoint_50384.pth.tar checkpoint_18800.pth.tar checkpoint_50760.pth.tar checkpoint_19176.pth.tar checkpoint_51136.pth.tar checkpoint_19552.pth.tar checkpoint_51512.pth.tar checkpoint_19928.pth.tar checkpoint_51888.pth.tar checkpoint_20304.pth.tar checkpoint_52264.pth.tar checkpoint_20680.pth.tar checkpoint_5264.pth.tar checkpoint_21056.pth.tar checkpoint_52640.pth.tar checkpoint_21432.pth.tar checkpoint_53016.pth.tar checkpoint_21808.pth.tar checkpoint_53392.pth.tar checkpoint_22184.pth.tar checkpoint_53768.pth.tar checkpoint_2256.pth.tar checkpoint_54144.pth.tar checkpoint_22560.pth.tar checkpoint_54520.pth.tar checkpoint_22936.pth.tar checkpoint_54896.pth.tar checkpoint_23312.pth.tar checkpoint_55272.pth.tar checkpoint_23688.pth.tar checkpoint_55648.pth.tar checkpoint_24064.pth.tar checkpoint_56024.pth.tar checkpoint_24440.pth.tar checkpoint_5640.pth.tar checkpoint_24816.pth.tar checkpoint_56400.pth.tar checkpoint_25192.pth.tar checkpoint_56776.pth.tar checkpoint_25568.pth.tar checkpoint_57152.pth.tar checkpoint_25944.pth.tar checkpoint_57528.pth.tar checkpoint_2632.pth.tar checkpoint_57904.pth.tar checkpoint_26320.pth.tar checkpoint_58280.pth.tar checkpoint_26696.pth.tar checkpoint_58656.pth.tar checkpoint_27072.pth.tar checkpoint_59032.pth.tar checkpoint_27448.pth.tar checkpoint_59408.pth.tar checkpoint_27824.pth.tar checkpoint_59784.pth.tar checkpoint_28200.pth.tar checkpoint_6016.pth.tar checkpoint_28576.pth.tar checkpoint_60160.pth.tar checkpoint_28952.pth.tar checkpoint_60536.pth.tar checkpoint_29328.pth.tar checkpoint_60912.pth.tar checkpoint_29704.pth.tar checkpoint_61288.pth.tar checkpoint_3008.pth.tar checkpoint_61664.pth.tar checkpoint_30080.pth.tar checkpoint_62040.pth.tar checkpoint_30456.pth.tar checkpoint_62416.pth.tar checkpoint_30832.pth.tar checkpoint_62792.pth.tar checkpoint_31208.pth.tar checkpoint_63168.pth.tar checkpoint_31584.pth.tar checkpoint_63544.pth.tar checkpoint_31960.pth.tar checkpoint_6392.pth.tar checkpoint_32336.pth.tar checkpoint_63920.pth.tar checkpoint_32712.pth.tar checkpoint_64296.pth.tar checkpoint_33088.pth.tar checkpoint_64672.pth.tar checkpoint_33464.pth.tar checkpoint_65048.pth.tar checkpoint_3384.pth.tar checkpoint_65424.pth.tar checkpoint_33840.pth.tar checkpoint_65800.pth.tar checkpoint_34216.pth.tar checkpoint_66176.pth.tar checkpoint_34592.pth.tar checkpoint_66552.pth.tar checkpoint_34968.pth.tar checkpoint_66928.pth.tar checkpoint_35344.pth.tar checkpoint_67304.pth.tar checkpoint_35720.pth.tar checkpoint_6768.pth.tar checkpoint_36096.pth.tar checkpoint_67680.pth.tar checkpoint_36472.pth.tar checkpoint_68056.pth.tar checkpoint_36848.pth.tar checkpoint_68432.pth.tar checkpoint_37224.pth.tar checkpoint_68808.pth.tar checkpoint_376.pth.tar checkpoint_69184.pth.tar checkpoint_3760.pth.tar checkpoint_7144.pth.tar checkpoint_37600.pth.tar checkpoint_752.pth.tar checkpoint_37976.pth.tar checkpoint_7520.pth.tar checkpoint_38352.pth.tar checkpoint_7896.pth.tar checkpoint_38728.pth.tar checkpoint_8272.pth.tar checkpoint_39104.pth.tar checkpoint_8648.pth.tar checkpoint_39480.pth.tar checkpoint_9024.pth.tar checkpoint_39856.pth.tar checkpoint_9400.pth.tar checkpoint_40232.pth.tar checkpoint_9776.pth.tar checkpoint_40608.pth.tar checkpoints/ checkpoint_40984.pth.tar config.json checkpoint_4136.pth.tar events.out.tfevents.1527081612.mlc1
In [3]:
def tts(model, text, CONFIG, use_cuda, ap, figures=True): waveform, alignment, spectrogram, stop_tokens = create_speech(model, text, CONFIG, use_cuda, ap) return waveform def text2audio(text, model, CONFIG, use_cuda, ap): wavs = [] for sen in text.split('.'): if len(sen) < 3: continue sen+='.' sen = sen.strip() print(sen) wav = tts(model, sen, CONFIG, use_cuda, ap) wavs.append(wav) wavs.append(np.zeros(10000)) # audio = np.stack(wavs) # IPython.display.display(Audio(audio, rate=CONFIG.sample_rate)) return wavs
In [4]:
# Set constants ROOT_PATH = '/data/shared/erogol_models/May-23-2018_06:20AM-loc-sen-attn-bug-fix-debug' MODEL_PATH_TMP = ROOT_PATH + '/checkpoint_{}.pth.tar' CONFIG_PATH = ROOT_PATH + '/config.json' OUT_FOLDER = ROOT_PATH + '/test/' CONFIG = load_config(CONFIG_PATH) use_cuda = True
In [5]:
# check_idxs = [50008, 100016, 200032, 266208] check_idxs = [69184]
In [6]:
# load the model model = Tacotron(CONFIG.embedding_size, CONFIG.num_freq, CONFIG.num_mels, CONFIG.r) # load the audio processor ap = AudioProcessor(CONFIG.sample_rate, CONFIG.num_mels, CONFIG.min_level_db, CONFIG.frame_shift_ms, CONFIG.frame_length_ms, CONFIG.preemphasis, CONFIG.ref_level_db, CONFIG.num_freq, CONFIG.power, griffin_lim_iters=30) for idx in check_idxs: MODEL_PATH = MODEL_PATH_TMP.format(idx) print(MODEL_PATH) # load model state if use_cuda: cp = torch.load(MODEL_PATH) else: cp = torch.load(MODEL_PATH, map_location=lambda storage, loc: storage) # load the model model.load_state_dict(cp['model']) if use_cuda: model.cuda() model.eval() model.decoder.max_decoder_steps = 400 text = "Voice is natural, voice is human. That’s why we are fascinated with creating usable voice technology for our machines. But to create voice systems, an extremely large amount of voice data is required. Most of the data used by large companies isn’t available to the majority of people. We think that stifles innovation. So we’ve launched Project Common Voice, a project to help make voice recognition open to everyone." wavs = text2audio(text, model, CONFIG, use_cuda, ap) audio = np.concatenate(wavs) IPython.display.display(Audio(audio, rate=CONFIG.sample_rate))
| > Number of characters : 149 /data/shared/erogol_models/May-23-2018_06:20AM-loc-sen-attn-bug-fix-debug/checkpoint_69184.pth.tar Voice is natural, voice is human. That’s why we are fascinated with creating usable voice technology for our machines. But to create voice systems, an extremely large amount of voice data is required. Most of the data used by large companies isn’t available to the majority of people. We think that stifles innovation. So we’ve launched Project Common Voice, a project to help make voice recognition open to everyone.
Your browser does not support the audio element.