diff --git a/synthesis.py b/synthesis.py deleted file mode 100644 index c1ff8290..00000000 --- a/synthesis.py +++ /dev/null @@ -1,92 +0,0 @@ -# -*- coding: utf-8 -*- - -from network import * -from data import inv_spectrogram, find_endpoint, save_wav, spectrogram -import numpy as np -import argparse -import os -import sys -import io -from text import text_to_sequence - -use_cuda = torch.cuda.is_available() - - -def main(args): - - # Make model - if use_cuda: - model = nn.DataParallel(Tacotron().cuda()) - - # Load checkpoint - try: - checkpoint = torch.load(os.path.join( - hp.checkpoint_path, 'checkpoint_%d.pth.tar' % args.restore_step)) - model.load_state_dict(checkpoint['model']) - print("\n--------model restored at step %d--------\n" % - args.restore_step) - - except: - raise FileNotFoundError("\n------------Model not exists------------\n") - - # Evaluation - model = model.eval() - - # Make result folder if not exists - if not os.path.exists(hp.output_path): - os.mkdir(hp.output_path) - - # Sentences for generation - sentences = [ - "I try my best to translate text to speech. But I know I need more work", - "The new Firefox, Fast for good.", - "Technology is continually providing us with new ways to create and publish stories.", - "For these stories to achieve their full impact, it requires tool.", - "I am allien and I am here to destron your world." - ] - - # Synthesis and save to wav files - for i, text in enumerate(sentences): - wav = generate(model, text) - path = os.path.join(hp.output_path, 'result_%d_%d.wav' % - (args.restore_step, i + 1)) - with open(path, 'wb') as f: - f.write(wav) - - f.close() - print("save wav file at step %d ..." % (i + 1)) - - -def generate(model, text): - - # Text to index sequence - cleaner_names = [x.strip() for x in hp.cleaners.split(',')] - seq = np.expand_dims(np.asarray(text_to_sequence( - text, cleaner_names), dtype=np.int32), axis=0) - - # Provide [GO] Frame - mel_input = np.zeros([seq.shape[0], hp.num_mels, 1], dtype=np.float32) - - # Variables - characters = Variable(torch.from_numpy(seq).type( - torch.cuda.LongTensor), volatile=True).cuda() - mel_input = Variable(torch.from_numpy(mel_input).type( - torch.cuda.FloatTensor), volatile=True).cuda() - - # Spectrogram to wav - _, linear_output = model.forward(characters, mel_input) - wav = inv_spectrogram(linear_output[0].data.cpu().numpy()) - wav = wav[:find_endpoint(wav)] - out = io.BytesIO() - save_wav(wav, out) - - return out.getvalue() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--restore_step', type=int, - help='Global step to restore checkpoint', default=0) - parser.add_argument('--batch_size', type=int, help='Batch size', default=1) - args = parser.parse_args() - main(args)