From 7c671cae5ace66c0ba8fc0eac2c3b158fe9153d6 Mon Sep 17 00:00:00 2001
From: erogol <erogol@hotmail.com>
Date: Wed, 8 Jul 2020 10:26:20 +0200
Subject: [PATCH] newer number normalization

---
 utils/text/number_norm.py | 81 +++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 41 deletions(-)

diff --git a/utils/text/number_norm.py b/utils/text/number_norm.py
index 54fa1093..7b539bff 100644
--- a/utils/text/number_norm.py
+++ b/utils/text/number_norm.py
@@ -1,10 +1,8 @@
-# -*- coding: utf-8 -*-
 """ from https://github.com/keithito/tacotron """
 
 import inflect
 import re
 
-
 _inflect = inflect.engine()
 _comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
 _decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
@@ -15,58 +13,59 @@ _number_re = re.compile(r'[0-9]+')
 
 
 def _remove_commas(m):
-  return m.group(1).replace(',', '')
+    return m.group(1).replace(',', '')
 
 
 def _expand_decimal_point(m):
-  return m.group(1).replace('.', ' point ')
+    return m.group(1).replace('.', ' point ')
 
 
 def _expand_dollars(m):
-  match = m.group(1)
-  parts = match.split('.')
-  if len(parts) > 2:
-    return match + ' dollars'  # Unexpected format
-  dollars = int(parts[0]) if parts[0] else 0
-  cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
-  if dollars and cents:
-    dollar_unit = 'dollar' if dollars == 1 else 'dollars'
-    cent_unit = 'cent' if cents == 1 else 'cents'
-    return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
-  elif dollars:
-    dollar_unit = 'dollar' if dollars == 1 else 'dollars'
-    return '%s %s' % (dollars, dollar_unit)
-  elif cents:
-    cent_unit = 'cent' if cents == 1 else 'cents'
-    return '%s %s' % (cents, cent_unit)
-  else:
-    return 'zero dollars'
+    match = m.group(1)
+    parts = match.split('.')
+    if len(parts) > 2:
+        return match + ' dollars'  # Unexpected format
+    dollars = int(parts[0]) if parts[0] else 0
+    cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
+    if dollars and cents:
+        dollar_unit = 'dollar' if dollars == 1 else 'dollars'
+        cent_unit = 'cent' if cents == 1 else 'cents'
+        return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
+    elif dollars:
+        dollar_unit = 'dollar' if dollars == 1 else 'dollars'
+        return '%s %s' % (dollars, dollar_unit)
+    elif cents:
+        cent_unit = 'cent' if cents == 1 else 'cents'
+        return '%s %s' % (cents, cent_unit)
+    else:
+        return 'zero dollars'
 
 
 def _expand_ordinal(m):
-  return _inflect.number_to_words(m.group(0))
+    return _inflect.number_to_words(m.group(0))
 
 
 def _expand_number(m):
-  num = int(m.group(0))
-  if num > 1000 and num < 3000:
-    if num == 2000:
-      return 'two thousand'
-    elif num > 2000 and num < 2010:
-      return 'two thousand ' + _inflect.number_to_words(num % 100)
-    elif num % 100 == 0:
-      return _inflect.number_to_words(num // 100) + ' hundred'
-    else:
-      return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
-  else:
+    num = int(m.group(0))
+    if 1000 < num < 3000:
+        if num == 2000:
+            return 'two thousand'
+        if 2000 < num < 2010:
+            return 'two thousand ' + _inflect.number_to_words(num % 100)
+        if num % 100 == 0:
+            return _inflect.number_to_words(num // 100) + ' hundred'
+        return _inflect.number_to_words(num,
+                                        andword='',
+                                        zero='oh',
+                                        group=2).replace(', ', ' ')
     return _inflect.number_to_words(num, andword='')
 
 
 def normalize_numbers(text):
-  text = re.sub(_comma_number_re, _remove_commas, text)
-  text = re.sub(_pounds_re, r'\1 pounds', text)
-  text = re.sub(_dollars_re, _expand_dollars, text)
-  text = re.sub(_decimal_number_re, _expand_decimal_point, text)
-  text = re.sub(_ordinal_re, _expand_ordinal, text)
-  text = re.sub(_number_re, _expand_number, text)
-  return text
\ No newline at end of file
+    text = re.sub(_comma_number_re, _remove_commas, text)
+    text = re.sub(_pounds_re, r'\1 pounds', text)
+    text = re.sub(_dollars_re, _expand_dollars, text)
+    text = re.sub(_decimal_number_re, _expand_decimal_point, text)
+    text = re.sub(_ordinal_re, _expand_ordinal, text)
+    text = re.sub(_number_re, _expand_number, text)
+    return text