import math import numpy as np import torch from torch import nn from torch.nn import functional as F def convert_pad_shape(pad_shape): l = pad_shape[::-1] pad_shape = [item for sublist in l for item in sublist] return pad_shape def shift_1d(x): x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1] return x def sequence_mask(length, max_length=None): if max_length is None: max_length = length.max() x = torch.arange(max_length, dtype=length.dtype, device=length.device) return x.unsqueeze(0) < length.unsqueeze(1) def maximum_path(value, mask, max_neg_val=-np.inf): """ Numpy-friendly version. It's about 4 times faster than torch version. value: [b, t_x, t_y] mask: [b, t_x, t_y] """ value = value * mask device = value.device dtype = value.dtype value = value.cpu().detach().numpy() mask = mask.cpu().detach().numpy().astype(np.bool) b, t_x, t_y = value.shape direction = np.zeros(value.shape, dtype=np.int64) v = np.zeros((b, t_x), dtype=np.float32) x_range = np.arange(t_x, dtype=np.float32).reshape(1, -1) for j in range(t_y): v0 = np.pad(v, [[0, 0], [1, 0]], mode="constant", constant_values=max_neg_val)[:, :-1] v1 = v max_mask = (v1 >= v0) v_max = np.where(max_mask, v1, v0) direction[:, :, j] = max_mask index_mask = (x_range <= j) v = np.where(index_mask, v_max + value[:, :, j], max_neg_val) direction = np.where(mask, direction, 1) path = np.zeros(value.shape, dtype=np.float32) index = mask[:, :, 0].sum(1).astype(np.int64) - 1 index_range = np.arange(b) for j in reversed(range(t_y)): path[index_range, index, j] = 1 index = index + direction[index_range, index, j] - 1 path = path * mask.astype(np.float32) path = torch.from_numpy(path).to(device=device, dtype=dtype) return path