paddle|动手从头实现LSTM

【paddle|动手从头实现LSTM】从头实现了单层LSTM,多层LSTM和多层双向LSTM。
其中多层LSTM:下一层LSTM会将上层LSTM的隐藏状态作为输入,
双向LSTM:将输入数据的时序反过来,最后再将得到的隐藏状态的时序反过来与正向得到的隐藏状态进行拼接。
实现代码如下:

import math import paddle import paddle.nn as nn import torch.nn.functional as F import numpy as np#自定义LSTM实现 class CustomLSTM(nn.Layer): def __init__(self, input_sz, hidden_sz,num_layers=2,bias=True,bidirectional=False): super().__init__() self.input_size = input_sz self.hidden_size = hidden_sz self.bias = bias self.num_layers = num_layers self.bidirectional = bidirectional self.num_directions = 2 if bidirectional else 1 # self._all_weights = {} self.param_names = [] for layer in range(self.num_layers): self.param_names.append([]) for direction in range(self.num_directions): self.input_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions # i_t W = self.create_parameter([self.input_size, self.hidden_size * 4]) U = self.create_parameter([self.hidden_size, self.hidden_size * 4]) b = self.create_parameter([self.hidden_size * 4])# print(self.W_c) layer_params = (W, U, b)suffix = '_reverse' if direction == 1 else '' self.param_name = ['weight_W{}{}', 'weight_U{}{}'] if bias: self.param_name += ['bias_{}{}'] self.param_name = [x.format(layer, suffix) for x in self.param_name] for name, param in zip(self.param_name, layer_params): setattr(self, name, param) self.param_names[layer].append(self.param_name)#self.init_weights() self.all_weights = [[[getattr(self, weight) for weight in weights] for weights in weights_layer] for weights_layer in self.param_names]def forward(self, x, init_states=None): """Assumes x is of shape (batch, sequence, feature)""" batch_size, seq_sz, _ = x.shape if init_states is None: num_directions = 2 if self.bidirectional else 1 h_t, c_t = (paddle.zeros((self.num_layers * num_directions,batch_size,self.hidden_size)), paddle.zeros((self.num_layers * num_directions,batch_size,self.hidden_size))) else: h_t, c_t = init_statesfor layer in range(self.num_layers): hidden_seq = [] hidden_seq_reverse = [] self.weight_layer = self.all_weights[layer] for direction in range(self.num_directions): # self.param_name = self.param_names[layer] self.weight = self.weight_layer[direction] HS = self.hidden_size h_t, c_t = h_t[0].unsqueeze(0),c_t[0].unsqueeze(0) for t in range(seq_sz): x_t = x[:, t, :] # batch the computations into a single matrix multiplication# gates = x_t @ getattr(self,self.param_name[0]) + h_t @ getattr(self,self.param_name[1]) \ #+ getattr(self,self.param_name[2]) if self.bias: gates = x_t @ self.weight[0] + h_t @ self.weight[1] \ + self.weight[2] else: gates = x_t @ self.weight[0] + h_t @ self.weight[1]gates = gates[0] i_t, f_t, g_t, o_t = ( paddle.nn.functional.sigmoid(gates[:, :HS]),# input paddle.nn.functional.sigmoid(gates[:, HS:HS * 2]),# forget paddle.tanh(gates[:, HS * 2:HS * 3]), paddle.nn.functional.sigmoid(gates[:, HS * 3:]),# output ) c_t = f_t * c_t + i_t * g_t h_t = o_t * paddle.tanh(c_t)if direction == 0: if isinstance(hidden_seq, list): hidden_seq = h_t[0].unsqueeze(1) else: hidden_seq = paddle.concat((hidden_seq, h_t[0].unsqueeze(1)), axis=1)if direction == 1: if isinstance(hidden_seq_reverse, list): hidden_seq_reverse = h_t[0].unsqueeze(1) else: hidden_seq_reverse = paddle.concat((hidden_seq_reverse, h_t[0].unsqueeze(1)), axis=1) x = paddle.to_tensor(x.detach().cpu().numpy()[:,::-1,:].copy()) if direction == 1: hidden_seq_reverse = paddle.to_tensor( hidden_seq_reverse.detach().cpu().numpy()[:, ::-1, :].copy()) hidden_seq = paddle.concat((hidden_seq, hidden_seq_reverse),axis=2) x = hidden_seq return hidden_seq, (h_t, c_t)

具体资源下载:从头实现LSTM,从开发到使用和比较完整代码

    推荐阅读