diff --git a/server/README.md b/server/README.md new file mode 100644 index 00000000..4326b183 --- /dev/null +++ b/server/README.md @@ -0,0 +1,6 @@ +## TTS example web-server +Steps to run: +1. Download one of the models given on the main page. +2. Set paths and other options in server/conf.json. +3. Run the server ```python server/server.py -c conf.json``` +4. Go to ```localhost:[given_port]``` and enjoy. \ No newline at end of file diff --git a/server/conf.json b/server/conf.json new file mode 100644 index 00000000..031ac6ec --- /dev/null +++ b/server/conf.json @@ -0,0 +1,7 @@ +{ + "model_path":"/home/egolge/projects/models/May-22-2018_03_24PM-e6112f7", + "model_name":"checkpoint_272976.pth.tar", + "model_config":"config.json", + "port": 5000, + "use_cuda": true +} \ No newline at end of file diff --git a/server/server.py b/server/server.py new file mode 100644 index 00000000..459161ff --- /dev/null +++ b/server/server.py @@ -0,0 +1,34 @@ +#!flask/bin/python +import argparse +from synthesizer import Synthesizer +from TTS.utils.generic_utils import load_config +from flask import (Flask, Response, request, + render_template, send_file) + +parser = argparse.ArgumentParser() +parser.add_argument('-c', '--config_path', type=str, + help='path to config file for training') +args = parser.parse_args() + +config = load_config(args.config_path) +app = Flask(__name__) +synthesizer = Synthesizer() +synthesizer.load_model(config.model_path, config.model_name, + config.model_config, config.use_cuda) + +@app.route('/') +def index(): + return render_template('index.html') + +@app.route('/api/tts', methods=['GET']) +def tts(): + text = request.args.get('text') + print(" > Model input: {}".format(text)) + data = synthesizer.tts(text) + return send_file(data, + attachment_filename="testing.wav", + as_attachment=True, + mimetype='audio/wav') + +if __name__ == '__main__': + app.run(debug=True, host='0.0.0.0', port=config.port) \ No newline at end of file diff --git a/server/synthesizer.py b/server/synthesizer.py new file mode 100644 index 00000000..f477c23a --- /dev/null +++ b/server/synthesizer.py @@ -0,0 +1,63 @@ +import io +import os +import librosa +import torch +import numpy as np +from TTS.utils.text import text_to_sequence +from TTS.utils.generic_utils import load_config +from TTS.utils.audio import AudioProcessor +from TTS.models.tacotron import Tacotron +from matplotlib import pylab as plt + + +class Synthesizer(object): + + def load_model(self, model_path, model_name, model_config, use_cuda): + model_config = os.path.join(model_path, model_config) + self.model_file = os.path.join(model_path, model_name) + print(" > Loading model ...") + print(" | > model config: ", model_config) + print(" | > model file: ", self.model_file) + config = load_config(model_config) + self.config = config + self.use_cuda = use_cuda + self.model = Tacotron(config.embedding_size, config.num_freq, config.num_mels, config.r) + self.ap = AudioProcessor(config.sample_rate, config.num_mels, config.min_level_db, + config.frame_shift_ms, config.frame_length_ms, config.preemphasis, + config.ref_level_db, config.num_freq, config.power, griffin_lim_iters=30) + # load model state + if use_cuda: + cp = torch.load(self.model_file) + else: + cp = torch.load(self.model_file, map_location=lambda storage, loc: storage) + # load the model + self.model.load_state_dict(cp['model']) + if use_cuda: + self.model.cuda() + self.model.eval() + + def save_wav(self, wav, path): + wav *= 32767 / max(0.01, np.max(np.abs(wav))) + librosa.output.write_wav(path, wav.astype(np.float), self.config.sample_rate, norm=True) + + def tts(self, text): + text_cleaner = [self.config.text_cleaner] + wavs = [] + for sen in text.split('.'): + if len(sen) < 3: + continue + sen +='.' + sen = sen.strip() + seq = np.array(text_to_sequence(text, text_cleaner)) + chars_var = torch.from_numpy(seq).unsqueeze(0) + if self.use_cuda: + chars_var = chars_var.cuda() + mel_out, linear_out, alignments, stop_tokens = self.model.forward(chars_var) + linear_out = linear_out[0].data.cpu().numpy() + wav = self.ap.inv_spectrogram(linear_out.T) + wav = wav[:self.ap.find_endpoint(wav)] + out = io.BytesIO() + wavs.append(wav) + wavs.append(np.zeros(10000)) + self.save_wav(wav, out) + return out \ No newline at end of file diff --git a/server/templates/index.html b/server/templates/index.html new file mode 100644 index 00000000..40a53ff8 --- /dev/null +++ b/server/templates/index.html @@ -0,0 +1,104 @@ + + + +
+ + + + + + +It is "work-in-progress" with an "far-to-be-alpha" release.
+