useing epitran and new phoneme list

This commit is contained in:
Eren Golge 2019-01-03 03:04:25 +01:00
parent 7edb53ce63
commit 444451dc8e
1 changed files with 38 additions and 32 deletions

View File

@ -1,7 +1,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import re import re
import phonemizer import epitran
epi = epitran.Epitran('eng-Latn')
# valid_symbols = [ # valid_symbols = [
# 'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1', # 'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1',
@ -16,56 +17,61 @@ import phonemizer
_phonemes = { _phonemes = {
'aa', '$',
'ae', '&',
'ah', 'a',
'ao',
'aw',
'ax',
'ay',
'b', 'b',
'ch',
'd', 'd',
'dh', 'd͡ʒ',
'eh', 'e',
'ey',
'f', 'f',
'g', 'h',
'hh',
'i', 'i',
'ih', 'j',
'iy',
'jh',
'k', 'k',
'l', 'l',
'm', 'm',
'n', 'n',
'ng', 'o',
'ow',
'oy',
'p', 'p',
'pau',
'r',
's', 's',
'sh',
'ssil',
't', 't',
'th', 't͡ʃ',
'uh', 'u',
'uw',
'v', 'v',
'w', 'w',
'y', 'z',
'z' '£',
} 'à',
'â',
'æ',
'è',
'é',
'ê',
'ð',
'ü',
'ŋ',
'ɑ',
'ɔ',
'ə',
'ɛ',
'ɡ',
'ɪ',
'ɹ',
'ɹ̩',
'ʃ',
'ʊ',
'ʌ',
'ʒ',
'θ'
}
_phonemes = set(_phonemes) _phonemes = set(_phonemes)
def text2phone(text): def text2phone(text):
seperator = phonemizer.separator.Separator('', '', ' ')
try: try:
ph = phonemizer.phonemize(text, separator=seperator, strip=True, njobs=1) ph = epi.trans_list(text, normpunc=True)
except: except:
ph = None ph = None
return ph return ph