| @@ -695,13 +695,7 @@ int GRU_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) c | |||||
| int GRU_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | int GRU_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | ||||
| { | { | ||||
| if (bottom_blobs.size() != 2 || top_blobs.size() != 2) | |||||
| { | |||||
| return forward(bottom_blobs[0], top_blobs[0], opt); | |||||
| } | |||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int elembits = bottom_blob.elembits(); | int elembits = bottom_blob.elembits(); | ||||
| #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | ||||
| @@ -720,24 +714,72 @@ int GRU_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top | |||||
| #endif | #endif | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| Mat& hidden_state = top_blobs[1]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| //Copy previous states | |||||
| hidden_state = bottom_blobs[1].clone(opt.blob_allocator); | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| hidden = bottom_blobs[1].clone(hidden_allocator); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 4u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 4u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| int ret = gru(bottom_blob, top_blob, direction, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden_state, opt); | |||||
| int ret = gru(bottom_blob, top_blob, direction, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden, opt); | |||||
| if (ret != 0) | if (ret != 0) | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = gru(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = gru(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const float* pf = top_blob_forward.row(i); | |||||
| const float* pr = top_blob_reverse.row(i); | |||||
| float* ptr = top_blob.row(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(float)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(float)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| top_blobs[1] = hidden; | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1625,16 +1667,29 @@ int GRU_arm::forward_fp16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_allocator; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // copy previous states | |||||
| Mat hidden; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| @@ -1643,7 +1698,42 @@ int GRU_arm::forward_fp16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = gru_fp16s(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = gru_fp16s(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const __fp16* pf = top_blob_forward.row<const __fp16>(i); | |||||
| const __fp16* pr = top_blob_reverse.row<const __fp16>(i); | |||||
| __fp16* ptr = top_blob.row<__fp16>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(__fp16)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(__fp16)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1711,16 +1801,29 @@ int GRU_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Ma | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_allocator; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // copy previous states | |||||
| Mat hidden; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| @@ -1729,7 +1832,42 @@ int GRU_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Ma | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = gru_fp16sa(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = gru_fp16sa(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const __fp16* pf = top_blob_forward.row<const __fp16>(i); | |||||
| const __fp16* pr = top_blob_reverse.row<const __fp16>(i); | |||||
| __fp16* ptr = top_blob.row<__fp16>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(__fp16)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(__fp16)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| top_blobs[1] = hidden; | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -2365,16 +2503,29 @@ int GRU_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_allocator; | |||||
| cast_bfloat16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // copy previous states | |||||
| Mat hidden; | |||||
| cast_bfloat16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| @@ -2383,7 +2534,42 @@ int GRU_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_bfloat16(hidden, top_blobs[1], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = gru_bf16s(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = gru_bf16s(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const unsigned short* pf = top_blob_forward.row<const unsigned short>(i); | |||||
| const unsigned short* pr = top_blob_reverse.row<const unsigned short>(i); | |||||
| unsigned short* ptr = top_blob.row<unsigned short>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(unsigned short)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(unsigned short)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| cast_float32_to_bfloat16(hidden, top_blobs[1], opt); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -423,13 +423,7 @@ int LSTM_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) | |||||
| int LSTM_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | int LSTM_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | ||||
| { | { | ||||
| if (bottom_blobs.size() != 3 || top_blobs.size() != 3) | |||||
| { | |||||
| return forward(bottom_blobs[0], top_blobs[0], opt); | |||||
| } | |||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int elembits = bottom_blob.elembits(); | int elembits = bottom_blob.elembits(); | ||||
| #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | ||||
| @@ -448,26 +442,82 @@ int LSTM_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& to | |||||
| #endif | #endif | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| Mat& hidden_state = top_blobs[1]; | |||||
| Mat& cell_state = top_blobs[2]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| Mat hidden; | |||||
| Mat cell; | |||||
| Allocator* hidden_cell_allocator = top_blobs.size() == 3 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 3) | |||||
| { | |||||
| hidden = bottom_blobs[1].clone(hidden_cell_allocator); | |||||
| cell = bottom_blobs[2].clone(hidden_cell_allocator); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| //Copy previous states | |||||
| hidden_state = bottom_blobs[1].clone(opt.blob_allocator); | |||||
| cell_state = bottom_blobs[2].clone(opt.blob_allocator); | |||||
| cell.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (cell.empty()) | |||||
| return -100; | |||||
| cell.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 4u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 4u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| int ret = lstm(bottom_blob, top_blob, direction, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden_state, cell_state, opt); | |||||
| int ret = lstm(bottom_blob, top_blob, direction, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden, cell, opt); | |||||
| if (ret != 0) | if (ret != 0) | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| Mat cell0 = cell.row_range(0, 1); | |||||
| int ret0 = lstm(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, cell0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| Mat cell1 = cell.row_range(1, 1); | |||||
| int ret1 = lstm(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, cell1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const float* pf = top_blob_forward.row(i); | |||||
| const float* pr = top_blob_reverse.row(i); | |||||
| float* ptr = top_blob.row(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(float)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(float)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 3) | |||||
| { | |||||
| top_blobs[1] = hidden; | |||||
| top_blobs[2] = cell; | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1182,17 +1232,35 @@ int LSTM_arm::forward_fp16s(const std::vector<Mat>& bottom_blobs, std::vector<Ma | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | |||||
| return -100; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| // copy previous states | |||||
| Mat hidden; | Mat hidden; | ||||
| Mat cell; | Mat cell; | ||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| cast_float16_to_float32(bottom_blobs[2], cell, opt); | |||||
| Allocator* hidden_cell_allocator = top_blobs.size() == 3 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 3) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_cell_allocator; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| cast_float16_to_float32(bottom_blobs[2], cell, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| cell.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (cell.empty()) | |||||
| return -100; | |||||
| cell.fill(0.f); | |||||
| } | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | |||||
| return -100; | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| @@ -1202,8 +1270,45 @@ int LSTM_arm::forward_fp16s(const std::vector<Mat>& bottom_blobs, std::vector<Ma | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| cast_float32_to_float16(cell, top_blobs[2], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| Mat cell0 = cell.row_range(0, 1); | |||||
| int ret0 = lstm_fp16s(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, cell0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| Mat cell1 = cell.row_range(1, 1); | |||||
| int ret1 = lstm_fp16s(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, cell1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const __fp16* pf = top_blob_forward.row<const __fp16>(i); | |||||
| const __fp16* pr = top_blob_reverse.row<const __fp16>(i); | |||||
| __fp16* ptr = top_blob.row<__fp16>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(__fp16)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(__fp16)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 3) | |||||
| { | |||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| cast_float32_to_float16(cell, top_blobs[2], opt); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1277,17 +1382,35 @@ int LSTM_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<M | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | |||||
| return -100; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| // copy previous states | |||||
| Mat hidden; | Mat hidden; | ||||
| Mat cell; | Mat cell; | ||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| cast_float16_to_float32(bottom_blobs[2], cell, opt); | |||||
| Allocator* hidden_cell_allocator = top_blobs.size() == 3 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 3) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_cell_allocator; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| cast_float16_to_float32(bottom_blobs[2], cell, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| cell.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (cell.empty()) | |||||
| return -100; | |||||
| cell.fill(0.f); | |||||
| } | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | |||||
| return -100; | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| @@ -1297,8 +1420,45 @@ int LSTM_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<M | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| cast_float32_to_float16(cell, top_blobs[2], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| Mat cell0 = cell.row_range(0, 1); | |||||
| int ret0 = lstm_fp16sa(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, cell0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| Mat cell1 = cell.row_range(1, 1); | |||||
| int ret1 = lstm_fp16sa(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, cell1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const __fp16* pf = top_blob_forward.row<const __fp16>(i); | |||||
| const __fp16* pr = top_blob_reverse.row<const __fp16>(i); | |||||
| __fp16* ptr = top_blob.row<__fp16>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(__fp16)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(__fp16)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 3) | |||||
| { | |||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| cast_float32_to_float16(cell, top_blobs[2], opt); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1664,17 +1824,35 @@ int LSTM_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Ma | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | |||||
| return -100; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| // copy previous states | |||||
| Mat hidden; | Mat hidden; | ||||
| Mat cell; | Mat cell; | ||||
| cast_bfloat16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| cast_bfloat16_to_float32(bottom_blobs[2], cell, opt); | |||||
| Allocator* hidden_cell_allocator = top_blobs.size() == 3 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 3) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_cell_allocator; | |||||
| cast_bfloat16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| cast_bfloat16_to_float32(bottom_blobs[2], cell, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| cell.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (cell.empty()) | |||||
| return -100; | |||||
| cell.fill(0.f); | |||||
| } | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | |||||
| return -100; | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| @@ -1684,8 +1862,45 @@ int LSTM_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Ma | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_bfloat16(hidden, top_blobs[1], opt); | |||||
| cast_float32_to_bfloat16(cell, top_blobs[2], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| Mat cell0 = cell.row_range(0, 1); | |||||
| int ret0 = lstm_bf16s(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, cell0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| Mat cell1 = cell.row_range(1, 1); | |||||
| int ret1 = lstm_bf16s(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, cell1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const unsigned short* pf = top_blob_forward.row<const unsigned short>(i); | |||||
| const unsigned short* pr = top_blob_reverse.row<const unsigned short>(i); | |||||
| unsigned short* ptr = top_blob.row<unsigned short>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(unsigned short)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(unsigned short)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 3) | |||||
| { | |||||
| cast_float32_to_bfloat16(hidden, top_blobs[1], opt); | |||||
| cast_float32_to_bfloat16(cell, top_blobs[2], opt); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -377,13 +377,7 @@ int RNN_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) c | |||||
| int RNN_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | int RNN_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | ||||
| { | { | ||||
| if (bottom_blobs.size() != 2 || top_blobs.size() != 2) | |||||
| { | |||||
| return forward(bottom_blobs[0], top_blobs[0], opt); | |||||
| } | |||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int elembits = bottom_blob.elembits(); | int elembits = bottom_blob.elembits(); | ||||
| #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | #if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC | ||||
| @@ -402,24 +396,72 @@ int RNN_arm::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top | |||||
| #endif | #endif | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| Mat& hidden_state = top_blobs[1]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| //Copy previous states | |||||
| hidden_state = bottom_blobs[1].clone(opt.blob_allocator); | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| hidden = bottom_blobs[1].clone(hidden_allocator); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 4u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 4u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| int ret = rnn(bottom_blob, top_blob, direction, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden_state, opt); | |||||
| int ret = rnn(bottom_blob, top_blob, direction, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden, opt); | |||||
| if (ret != 0) | if (ret != 0) | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = rnn(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = rnn(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const float* pf = top_blob_forward.row(i); | |||||
| const float* pr = top_blob_reverse.row(i); | |||||
| float* ptr = top_blob.row(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(float)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(float)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| top_blobs[1] = hidden; | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -965,16 +1007,29 @@ int RNN_arm::forward_fp16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_allocator; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // copy previous states | |||||
| Mat hidden; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| @@ -983,7 +1038,42 @@ int RNN_arm::forward_fp16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = rnn_fp16s(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = rnn_fp16s(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const __fp16* pf = top_blob_forward.row<const __fp16>(i); | |||||
| const __fp16* pr = top_blob_reverse.row<const __fp16>(i); | |||||
| __fp16* ptr = top_blob.row<__fp16>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(__fp16)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(__fp16)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1051,16 +1141,29 @@ int RNN_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Ma | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_allocator; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // copy previous states | |||||
| Mat hidden; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| @@ -1069,7 +1172,42 @@ int RNN_arm::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector<Ma | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = rnn_fp16sa(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = rnn_fp16sa(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const __fp16* pf = top_blob_forward.row<const __fp16>(i); | |||||
| const __fp16* pr = top_blob_reverse.row<const __fp16>(i); | |||||
| __fp16* ptr = top_blob.row<__fp16>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(__fp16)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(__fp16)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -1387,16 +1525,29 @@ int RNN_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_allocator; | |||||
| cast_bfloat16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // copy previous states | |||||
| Mat hidden; | |||||
| cast_bfloat16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| @@ -1405,7 +1556,42 @@ int RNN_arm::forward_bf16s(const std::vector<Mat>& bottom_blobs, std::vector<Mat | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_bfloat16(hidden, top_blobs[1], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = rnn_bf16s(bottom_blob, top_blob_forward, 0, weight_xc_data_packed.channel(0), bias_c_data_packed.channel(0), weight_hc_data_packed.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = rnn_bf16s(bottom_blob, top_blob_reverse, 1, weight_xc_data_packed.channel(1), bias_c_data_packed.channel(1), weight_hc_data_packed.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const unsigned short* pf = top_blob_forward.row<const unsigned short>(i); | |||||
| const unsigned short* pr = top_blob_reverse.row<const unsigned short>(i); | |||||
| unsigned short* ptr = top_blob.row<unsigned short>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(unsigned short)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(unsigned short)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| cast_float32_to_bfloat16(hidden, top_blobs[1], opt); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -29,8 +29,6 @@ int GRU::load_param(const ParamDict& pd) | |||||
| num_output = pd.get(0, 0); | num_output = pd.get(0, 0); | ||||
| weight_data_size = pd.get(1, 0); | weight_data_size = pd.get(1, 0); | ||||
| direction = pd.get(2, 0); | direction = pd.get(2, 0); | ||||
| if (direction == 2) | |||||
| one_blob_only = true; | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -223,30 +221,74 @@ int GRU::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||||
| int GRU::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | int GRU::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | ||||
| { | { | ||||
| if (bottom_blobs.size() != 2 || top_blobs.size() != 2) | |||||
| { | |||||
| return forward(bottom_blobs[0], top_blobs[0], opt); | |||||
| } | |||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| Mat& hidden_state = top_blobs[1]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| //Copy previous states | |||||
| hidden_state = bottom_blobs[1].clone(opt.blob_allocator); | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| hidden = bottom_blobs[1].clone(hidden_allocator); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 4u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 4u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| int ret = gru(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden_state, opt); | |||||
| int ret = gru(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden, opt); | |||||
| if (ret != 0) | if (ret != 0) | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = gru(bottom_blob, top_blob_forward, 0, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = gru(bottom_blob, top_blob_reverse, 1, weight_xc_data.channel(1), bias_c_data.channel(1), weight_hc_data.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const float* pf = top_blob_forward.row(i); | |||||
| const float* pr = top_blob_reverse.row(i); | |||||
| float* ptr = top_blob.row(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(float)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(float)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| top_blobs[1] = hidden; | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -29,8 +29,6 @@ int LSTM::load_param(const ParamDict& pd) | |||||
| num_output = pd.get(0, 0); | num_output = pd.get(0, 0); | ||||
| weight_data_size = pd.get(1, 0); | weight_data_size = pd.get(1, 0); | ||||
| direction = pd.get(2, 0); | direction = pd.get(2, 0); | ||||
| if (direction == 2) | |||||
| one_blob_only = true; | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -232,32 +230,84 @@ int LSTM::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) cons | |||||
| int LSTM::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | int LSTM::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | ||||
| { | { | ||||
| if (bottom_blobs.size() != 3 || top_blobs.size() != 3) | |||||
| { | |||||
| return forward(bottom_blobs[0], top_blobs[0], opt); | |||||
| } | |||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| Mat& hidden_state = top_blobs[1]; | |||||
| Mat& cell_state = top_blobs[2]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| Mat hidden; | |||||
| Mat cell; | |||||
| Allocator* hidden_cell_allocator = top_blobs.size() == 3 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 3) | |||||
| { | |||||
| hidden = bottom_blobs[1].clone(hidden_cell_allocator); | |||||
| cell = bottom_blobs[2].clone(hidden_cell_allocator); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| //Copy previous states | |||||
| hidden_state = bottom_blobs[1].clone(opt.blob_allocator); | |||||
| cell_state = bottom_blobs[2].clone(opt.blob_allocator); | |||||
| cell.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (cell.empty()) | |||||
| return -100; | |||||
| cell.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 4u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 4u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| int ret = lstm(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden_state, cell_state, opt); | |||||
| int ret = lstm(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden, cell, opt); | |||||
| if (ret != 0) | if (ret != 0) | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| Mat cell0 = cell.row_range(0, 1); | |||||
| int ret0 = lstm(bottom_blob, top_blob_forward, 0, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden0, cell0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| Mat cell1 = cell.row_range(1, 1); | |||||
| int ret1 = lstm(bottom_blob, top_blob_reverse, 1, weight_xc_data.channel(1), bias_c_data.channel(1), weight_hc_data.channel(1), hidden1, cell1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const float* pf = top_blob_forward.row(i); | |||||
| const float* pr = top_blob_reverse.row(i); | |||||
| float* ptr = top_blob.row(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(float)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(float)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 3) | |||||
| { | |||||
| top_blobs[1] = hidden; | |||||
| top_blobs[2] = cell; | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -301,11 +301,6 @@ int GRU_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) | |||||
| int GRU_riscv::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | int GRU_riscv::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | ||||
| { | { | ||||
| if (bottom_blobs.size() != 2 || top_blobs.size() != 2) | |||||
| { | |||||
| return forward(bottom_blobs[0], top_blobs[0], opt); | |||||
| } | |||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int elembits = bottom_blob.elembits(); | int elembits = bottom_blob.elembits(); | ||||
| @@ -321,24 +316,73 @@ int GRU_riscv::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& t | |||||
| #endif | #endif | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| Mat& hidden_state = top_blobs[1]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| //Copy previous states | |||||
| hidden_state = bottom_blobs[1].clone(opt.blob_allocator); | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| hidden = bottom_blobs[1].clone(hidden_allocator); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 4u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 4u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| int ret = gru(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden_state, opt); | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret = gru(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden0, opt); | |||||
| if (ret != 0) | if (ret != 0) | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = gru(bottom_blob, top_blob_forward, 0, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = gru(bottom_blob, top_blob_reverse, 1, weight_xc_data.channel(1), bias_c_data.channel(1), weight_hc_data.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const float* pf = top_blob_forward.row(i); | |||||
| const float* pr = top_blob_reverse.row(i); | |||||
| float* ptr = top_blob.row(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(float)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(float)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| top_blobs[1] = hidden; | |||||
| } | |||||
| return 0; | return 0; | ||||
| #endif | #endif | ||||
| return GRU::forward(bottom_blobs, top_blobs, opt); | return GRU::forward(bottom_blobs, top_blobs, opt); | ||||
| @@ -587,24 +631,75 @@ int GRU_riscv::forward_fp16s(const std::vector<Mat>& bottom_blobs, std::vector<M | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_allocator; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| Mat& top_blob = top_blobs[0]; | Mat& top_blob = top_blobs[0]; | ||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| //Copy previous states | |||||
| Mat hidden; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| int ret = gru_fp16s(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden, opt); | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret = gru_fp16s(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden0, opt); | |||||
| if (ret != 0) | if (ret != 0) | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = gru_fp16s(bottom_blob, top_blob_forward, 0, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = gru_fp16s(bottom_blob, top_blob_reverse, 1, weight_xc_data.channel(1), bias_c_data.channel(1), weight_hc_data.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const __fp16* pf = top_blob_forward.row<const __fp16>(i); | |||||
| const __fp16* pr = top_blob_reverse.row<const __fp16>(i); | |||||
| __fp16* ptr = top_blob.row<__fp16>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(__fp16)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(__fp16)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -853,15 +948,29 @@ int GRU_riscv::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector< | |||||
| { | { | ||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| Option opt_cast = opt; | |||||
| opt_cast.blob_allocator = hidden_allocator; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt_cast); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| Mat& top_blob = top_blobs[0]; | Mat& top_blob = top_blobs[0]; | ||||
| top_blob.create(num_output, T, 2u, opt.blob_allocator); | |||||
| top_blob.create(num_output * num_directions, T, 2u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| //Copy previous states | |||||
| Mat hidden; | |||||
| cast_float16_to_float32(bottom_blobs[1], hidden, opt); | |||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| @@ -870,11 +979,46 @@ int GRU_riscv::forward_fp16sa(const std::vector<Mat>& bottom_blobs, std::vector< | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 2u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = gru_fp16sa(bottom_blob, top_blob_forward, 0, weight_xc_data_fp16sa.channel(0), bias_c_data_fp16sa.channel(0), weight_hc_data_fp16sa.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = gru_fp16sa(bottom_blob, top_blob_reverse, 1, weight_xc_data_fp16sa.channel(1), bias_c_data_fp16sa.channel(1), weight_hc_data_fp16sa.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const __fp16* pf = top_blob_forward.row<const __fp16>(i); | |||||
| const __fp16* pr = top_blob_reverse.row<const __fp16>(i); | |||||
| __fp16* ptr = top_blob.row<__fp16>(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(__fp16)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(__fp16)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| cast_float32_to_float16(hidden, top_blobs[1], opt); | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| #endif | #endif | ||||
| } // namespace ncnn | |||||
| } // namespace ncnn | |||||
| @@ -29,8 +29,6 @@ int RNN::load_param(const ParamDict& pd) | |||||
| num_output = pd.get(0, 0); | num_output = pd.get(0, 0); | ||||
| weight_data_size = pd.get(1, 0); | weight_data_size = pd.get(1, 0); | ||||
| direction = pd.get(2, 0); | direction = pd.get(2, 0); | ||||
| if (direction == 2) | |||||
| one_blob_only = true; | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -172,30 +170,74 @@ int RNN::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const | |||||
| int RNN::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | int RNN::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | ||||
| { | { | ||||
| if (bottom_blobs.size() != 2 || top_blobs.size() != 2) | |||||
| { | |||||
| return forward(bottom_blobs[0], top_blobs[0], opt); | |||||
| } | |||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| Mat& hidden_state = top_blobs[1]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| //Copy previous states | |||||
| hidden_state = bottom_blobs[1].clone(opt.blob_allocator); | |||||
| Mat hidden; | |||||
| Allocator* hidden_allocator = top_blobs.size() == 2 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 2) | |||||
| { | |||||
| hidden = bottom_blobs[1].clone(hidden_allocator); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| } | |||||
| top_blob.create(num_output, T, 4u, opt.blob_allocator); | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 4u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| // Uni directional | // Uni directional | ||||
| if (direction == 0 || direction == 1) | if (direction == 0 || direction == 1) | ||||
| { | { | ||||
| int ret = rnn(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden_state, opt); | |||||
| int ret = rnn(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden, opt); | |||||
| if (ret != 0) | if (ret != 0) | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (direction == 2) | |||||
| { | |||||
| Mat top_blob_forward(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| int ret0 = rnn(bottom_blob, top_blob_forward, 0, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| int ret1 = rnn(bottom_blob, top_blob_reverse, 1, weight_xc_data.channel(1), bias_c_data.channel(1), weight_hc_data.channel(1), hidden1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const float* pf = top_blob_forward.row(i); | |||||
| const float* pr = top_blob_reverse.row(i); | |||||
| float* ptr = top_blob.row(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(float)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(float)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 2) | |||||
| { | |||||
| top_blobs[1] = hidden; | |||||
| } | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -910,42 +910,123 @@ int LSTM_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) | |||||
| int LSTM_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | int LSTM_x86::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const | ||||
| { | { | ||||
| #if __AVX__ | #if __AVX__ | ||||
| if (bottom_blobs.size() != 3 || top_blobs.size() != 3) | |||||
| { | |||||
| return forward(bottom_blobs[0], top_blobs[0], opt); | |||||
| } | |||||
| const Mat& bottom_blob = bottom_blobs[0]; | const Mat& bottom_blob = bottom_blobs[0]; | ||||
| int T = bottom_blob.h; | int T = bottom_blob.h; | ||||
| Mat& top_blob = top_blobs[0]; | |||||
| Mat& hidden_state = top_blobs[1]; | |||||
| Mat& cell_state = top_blobs[2]; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| //Copy previous states | |||||
| hidden_state = bottom_blobs[1].clone(opt.blob_allocator); | |||||
| cell_state = bottom_blobs[2].clone(opt.blob_allocator); | |||||
| Mat hidden; | |||||
| Mat cell; | |||||
| Allocator* hidden_cell_allocator = top_blobs.size() == 3 ? opt.blob_allocator : opt.workspace_allocator; | |||||
| if (bottom_blobs.size() == 3) | |||||
| { | |||||
| hidden = bottom_blobs[1].clone(hidden_cell_allocator); | |||||
| cell = bottom_blobs[2].clone(hidden_cell_allocator); | |||||
| } | |||||
| else | |||||
| { | |||||
| hidden.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (hidden.empty()) | |||||
| return -100; | |||||
| hidden.fill(0.f); | |||||
| top_blob.create(num_output, T, 4u, opt.blob_allocator); | |||||
| cell.create(num_output, num_directions, 4u, hidden_cell_allocator); | |||||
| if (cell.empty()) | |||||
| return -100; | |||||
| cell.fill(0.f); | |||||
| } | |||||
| Mat& top_blob = top_blobs[0]; | |||||
| top_blob.create(num_output * num_directions, T, 4u, opt.blob_allocator); | |||||
| if (top_blob.empty()) | if (top_blob.empty()) | ||||
| return -100; | return -100; | ||||
| #if __AVX2__ | |||||
| if (opt.use_weight_fp16_storage) | |||||
| // Uni directional | |||||
| if (direction == 0 || direction == 1) | |||||
| { | { | ||||
| // Uni directional | |||||
| int ret = lstm_fp16(bottom_blob, top_blob, direction, weight_xc_data_fp16.channel(0), bias_c_data.channel(0), weight_hc_data_fp16.channel(0), hidden_state, cell_state, opt); | |||||
| if (ret != 0) | |||||
| return ret; | |||||
| #if __AVX2__ | |||||
| if (opt.use_weight_fp16_storage) | |||||
| { | |||||
| int ret = lstm_fp16(bottom_blob, top_blob, direction, weight_xc_data_fp16.channel(0), bias_c_data.channel(0), weight_hc_data_fp16.channel(0), hidden, cell, opt); | |||||
| if (ret != 0) | |||||
| return ret; | |||||
| } | |||||
| else | |||||
| { | |||||
| #endif | |||||
| int ret = lstm(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden, cell, opt); | |||||
| if (ret != 0) | |||||
| return ret; | |||||
| #if __AVX2__ | |||||
| } | |||||
| #endif | |||||
| } | } | ||||
| else | |||||
| if (direction == 2) | |||||
| { | { | ||||
| Mat top_blob_forward(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_forward.empty()) | |||||
| return -100; | |||||
| Mat top_blob_reverse(num_output, T, 4u, opt.workspace_allocator); | |||||
| if (top_blob_reverse.empty()) | |||||
| return -100; | |||||
| Mat hidden0 = hidden.row_range(0, 1); | |||||
| Mat cell0 = cell.row_range(0, 1); | |||||
| #if __AVX2__ | |||||
| if (opt.use_weight_fp16_storage) | |||||
| { | |||||
| int ret = lstm_fp16(bottom_blob, top_blob_forward, 0, weight_xc_data_fp16.channel(0), bias_c_data.channel(0), weight_hc_data_fp16.channel(0), hidden0, cell0, opt); | |||||
| if (ret != 0) | |||||
| return ret; | |||||
| } | |||||
| else | |||||
| { | |||||
| #endif | #endif | ||||
| // Uni directional | |||||
| int ret = lstm(bottom_blob, top_blob, direction, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden_state, cell_state, opt); | |||||
| if (ret != 0) | |||||
| return ret; | |||||
| int ret0 = lstm(bottom_blob, top_blob_forward, 0, weight_xc_data.channel(0), bias_c_data.channel(0), weight_hc_data.channel(0), hidden0, cell0, opt); | |||||
| if (ret0 != 0) | |||||
| return ret0; | |||||
| #if __AVX2__ | #if __AVX2__ | ||||
| } | |||||
| } | |||||
| #endif | #endif | ||||
| Mat hidden1 = hidden.row_range(1, 1); | |||||
| Mat cell1 = cell.row_range(1, 1); | |||||
| #if __AVX2__ | |||||
| if (opt.use_weight_fp16_storage) | |||||
| { | |||||
| int ret = lstm_fp16(bottom_blob, top_blob_reverse, 1, weight_xc_data_fp16.channel(1), bias_c_data.channel(1), weight_hc_data_fp16.channel(1), hidden1, cell1, opt); | |||||
| if (ret != 0) | |||||
| return ret; | |||||
| } | |||||
| else | |||||
| { | |||||
| #endif | |||||
| int ret1 = lstm(bottom_blob, top_blob_reverse, 1, weight_xc_data.channel(1), bias_c_data.channel(1), weight_hc_data.channel(1), hidden1, cell1, opt); | |||||
| if (ret1 != 0) | |||||
| return ret1; | |||||
| #if __AVX2__ | |||||
| } | |||||
| #endif | |||||
| // concat w | |||||
| for (int i = 0; i < T; i++) | |||||
| { | |||||
| const float* pf = top_blob_forward.row(i); | |||||
| const float* pr = top_blob_reverse.row(i); | |||||
| float* ptr = top_blob.row(i); | |||||
| memcpy(ptr, pf, num_output * sizeof(float)); | |||||
| memcpy(ptr + num_output, pr, num_output * sizeof(float)); | |||||
| } | |||||
| } | |||||
| if (top_blobs.size() == 3) | |||||
| { | |||||
| top_blobs[1] = hidden; | |||||
| top_blobs[2] = cell; | |||||
| } | |||||
| return 0; | return 0; | ||||
| #else | #else | ||||
| return LSTM::forward(bottom_blobs, top_blobs, opt); | return LSTM::forward(bottom_blobs, top_blobs, opt); | ||||
| @@ -42,19 +42,20 @@ static int test_gru(const ncnn::Mat& a, int outch, int direction) | |||||
| int test_gru_layer_with_hidden(const ncnn::Mat& a, int outch, int direction) | int test_gru_layer_with_hidden(const ncnn::Mat& a, int outch, int direction) | ||||
| { | { | ||||
| int input_size = a.w; | int input_size = a.w; | ||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| ncnn::ParamDict pd; | ncnn::ParamDict pd; | ||||
| pd.set(0, outch); | pd.set(0, outch); | ||||
| pd.set(1, outch * input_size * 3); | |||||
| pd.set(1, outch * input_size * 3 * num_directions); | |||||
| pd.set(2, direction); | pd.set(2, direction); | ||||
| std::vector<ncnn::Mat> weights(3); | std::vector<ncnn::Mat> weights(3); | ||||
| weights[0] = RandomMat(outch * input_size * 3); | |||||
| weights[1] = RandomMat(outch * 4); | |||||
| weights[2] = RandomMat(outch * outch * 3); | |||||
| weights[0] = RandomMat(outch * input_size * 3 * num_directions); | |||||
| weights[1] = RandomMat(outch * 4 * num_directions); | |||||
| weights[2] = RandomMat(outch * outch * 3 * num_directions); | |||||
| // initial hidden state | // initial hidden state | ||||
| ncnn::Mat hidden = RandomMat(outch); | |||||
| ncnn::Mat hidden = RandomMat(outch, num_directions); | |||||
| std::vector<ncnn::Mat> as(2); | std::vector<ncnn::Mat> as(2); | ||||
| as[0] = a; | as[0] = a; | ||||
| @@ -69,6 +70,64 @@ int test_gru_layer_with_hidden(const ncnn::Mat& a, int outch, int direction) | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| int test_gru_layer_with_hidden_input(const ncnn::Mat& a, int outch, int direction) | |||||
| { | |||||
| int input_size = a.w; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| ncnn::ParamDict pd; | |||||
| pd.set(0, outch); | |||||
| pd.set(1, outch * input_size * 3 * num_directions); | |||||
| pd.set(2, direction); | |||||
| std::vector<ncnn::Mat> weights(3); | |||||
| weights[0] = RandomMat(outch * input_size * 3 * num_directions); | |||||
| weights[1] = RandomMat(outch * 4 * num_directions); | |||||
| weights[2] = RandomMat(outch * outch * 3 * num_directions); | |||||
| // initial hidden state | |||||
| ncnn::Mat hidden = RandomMat(outch, num_directions); | |||||
| std::vector<ncnn::Mat> as(2); | |||||
| as[0] = a; | |||||
| as[1] = hidden; | |||||
| int ret = test_layer<ncnn::GRU>("GRU", pd, weights, as, 1); | |||||
| if (ret != 0) | |||||
| { | |||||
| fprintf(stderr, "test_gru_layer_with_hidden_input failed a.dims=%d a=(%d %d %d) outch=%d, direction = %d \n", a.dims, a.w, a.h, a.c, outch, direction); | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| int test_gru_layer_with_hidden_output(const ncnn::Mat& a, int outch, int direction) | |||||
| { | |||||
| int input_size = a.w; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| ncnn::ParamDict pd; | |||||
| pd.set(0, outch); | |||||
| pd.set(1, outch * input_size * 3 * num_directions); | |||||
| pd.set(2, direction); | |||||
| std::vector<ncnn::Mat> weights(3); | |||||
| weights[0] = RandomMat(outch * input_size * 3 * num_directions); | |||||
| weights[1] = RandomMat(outch * 4 * num_directions); | |||||
| weights[2] = RandomMat(outch * outch * 3 * num_directions); | |||||
| std::vector<ncnn::Mat> as(1); | |||||
| as[0] = a; | |||||
| int ret = test_layer<ncnn::GRU>("GRU", pd, weights, as, 2); | |||||
| if (ret != 0) | |||||
| { | |||||
| fprintf(stderr, "test_gru_layer_with_hidden_output failed a.dims=%d a=(%d %d %d) outch=%d, direction = %d \n", a.dims, a.w, a.h, a.c, outch, direction); | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| static int test_gru_0() | static int test_gru_0() | ||||
| { | { | ||||
| return 0 | return 0 | ||||
| @@ -86,6 +145,14 @@ static int test_gru_0() | |||||
| static int test_gru_1() | static int test_gru_1() | ||||
| { | { | ||||
| return 0 | return 0 | ||||
| || test_gru_layer_with_hidden(RandomMat(4, 4), 1, 2) | |||||
| || test_gru_layer_with_hidden(RandomMat(8, 2), 2, 2) | |||||
| || test_gru_layer_with_hidden(RandomMat(16, 8), 7, 2) | |||||
| || test_gru_layer_with_hidden(RandomMat(17, 8), 8, 2) | |||||
| || test_gru_layer_with_hidden(RandomMat(19, 15), 8, 2) | |||||
| || test_gru_layer_with_hidden(RandomMat(5, 16), 16, 2) | |||||
| || test_gru_layer_with_hidden(RandomMat(3, 16), 8, 2) | |||||
| || test_gru_layer_with_hidden(RandomMat(2, 5), 99, 2) | |||||
| || test_gru_layer_with_hidden(RandomMat(4, 4), 1, 1) | || test_gru_layer_with_hidden(RandomMat(4, 4), 1, 1) | ||||
| || test_gru_layer_with_hidden(RandomMat(8, 2), 2, 1) | || test_gru_layer_with_hidden(RandomMat(8, 2), 2, 1) | ||||
| || test_gru_layer_with_hidden(RandomMat(16, 8), 7, 1) | || test_gru_layer_with_hidden(RandomMat(16, 8), 7, 1) | ||||
| @@ -101,7 +168,57 @@ static int test_gru_1() | |||||
| || test_gru_layer_with_hidden(RandomMat(19, 15), 8, 0) | || test_gru_layer_with_hidden(RandomMat(19, 15), 8, 0) | ||||
| || test_gru_layer_with_hidden(RandomMat(5, 16), 16, 0) | || test_gru_layer_with_hidden(RandomMat(5, 16), 16, 0) | ||||
| || test_gru_layer_with_hidden(RandomMat(3, 16), 8, 0) | || test_gru_layer_with_hidden(RandomMat(3, 16), 8, 0) | ||||
| || test_gru_layer_with_hidden(RandomMat(2, 5), 17, 0); | |||||
| || test_gru_layer_with_hidden(RandomMat(2, 5), 17, 0) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(4, 4), 1, 2) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(8, 2), 2, 2) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(16, 8), 7, 2) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(17, 8), 8, 2) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(19, 15), 8, 2) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(5, 16), 16, 2) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(3, 16), 8, 2) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(2, 5), 99, 2) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(4, 4), 1, 1) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(8, 2), 2, 1) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(16, 8), 7, 1) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(17, 8), 8, 1) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(19, 15), 8, 1) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(5, 16), 16, 1) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(3, 16), 8, 1) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(2, 5), 99, 1) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(4, 2), 1, 0) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(8, 2), 2, 0) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(16, 8), 7, 0) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(17, 8), 8, 0) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(19, 15), 8, 0) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(5, 16), 16, 0) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(3, 16), 8, 0) | |||||
| || test_gru_layer_with_hidden_input(RandomMat(2, 5), 17, 0) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(4, 4), 1, 2) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(8, 2), 2, 2) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(16, 8), 7, 2) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(17, 8), 8, 2) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(19, 15), 8, 2) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(5, 16), 16, 2) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(3, 16), 8, 2) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(2, 5), 99, 2) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(4, 4), 1, 1) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(8, 2), 2, 1) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(16, 8), 7, 1) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(17, 8), 8, 1) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(19, 15), 8, 1) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(5, 16), 16, 1) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(3, 16), 8, 1) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(2, 5), 99, 1) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(4, 2), 1, 0) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(8, 2), 2, 0) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(16, 8), 7, 0) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(17, 8), 8, 0) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(19, 15), 8, 0) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(5, 16), 16, 0) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(3, 16), 8, 0) | |||||
| || test_gru_layer_with_hidden_output(RandomMat(2, 5), 17, 0); | |||||
| } | } | ||||
| static int test_gru_2() | static int test_gru_2() | ||||
| @@ -42,22 +42,23 @@ static int test_lstm(const ncnn::Mat& a, int outch, int direction) | |||||
| int test_lstm_layer_with_hidden(const ncnn::Mat& a, int outch, int direction) | int test_lstm_layer_with_hidden(const ncnn::Mat& a, int outch, int direction) | ||||
| { | { | ||||
| int input_size = a.w; | int input_size = a.w; | ||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| ncnn::ParamDict pd; | ncnn::ParamDict pd; | ||||
| pd.set(0, outch); | pd.set(0, outch); | ||||
| pd.set(1, outch * input_size * 4); | |||||
| pd.set(1, outch * input_size * 4 * num_directions); | |||||
| pd.set(2, direction); | pd.set(2, direction); | ||||
| std::vector<ncnn::Mat> weights(3); | std::vector<ncnn::Mat> weights(3); | ||||
| weights[0] = RandomMat(outch * input_size * 4); | |||||
| weights[1] = RandomMat(outch * 4); | |||||
| weights[2] = RandomMat(outch * outch * 4); | |||||
| weights[0] = RandomMat(outch * input_size * 4 * num_directions); | |||||
| weights[1] = RandomMat(outch * 4 * num_directions); | |||||
| weights[2] = RandomMat(outch * outch * 4 * num_directions); | |||||
| // initial hidden state | // initial hidden state | ||||
| ncnn::Mat hidden = RandomMat(outch); | |||||
| ncnn::Mat hidden = RandomMat(outch, num_directions); | |||||
| // initial cell state | // initial cell state | ||||
| ncnn::Mat cell = RandomMat(outch); | |||||
| ncnn::Mat cell = RandomMat(outch, num_directions); | |||||
| std::vector<ncnn::Mat> as(3); | std::vector<ncnn::Mat> as(3); | ||||
| as[0] = a; | as[0] = a; | ||||
| @@ -73,6 +74,68 @@ int test_lstm_layer_with_hidden(const ncnn::Mat& a, int outch, int direction) | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| int test_lstm_layer_with_hidden_input(const ncnn::Mat& a, int outch, int direction) | |||||
| { | |||||
| int input_size = a.w; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| ncnn::ParamDict pd; | |||||
| pd.set(0, outch); | |||||
| pd.set(1, outch * input_size * 4 * num_directions); | |||||
| pd.set(2, direction); | |||||
| std::vector<ncnn::Mat> weights(3); | |||||
| weights[0] = RandomMat(outch * input_size * 4 * num_directions); | |||||
| weights[1] = RandomMat(outch * 4 * num_directions); | |||||
| weights[2] = RandomMat(outch * outch * 4 * num_directions); | |||||
| // initial hidden state | |||||
| ncnn::Mat hidden = RandomMat(outch, num_directions); | |||||
| // initial cell state | |||||
| ncnn::Mat cell = RandomMat(outch, num_directions); | |||||
| std::vector<ncnn::Mat> as(3); | |||||
| as[0] = a; | |||||
| as[1] = hidden; | |||||
| as[2] = cell; | |||||
| int ret = test_layer<ncnn::LSTM>("LSTM", pd, weights, as, 1); | |||||
| if (ret != 0) | |||||
| { | |||||
| fprintf(stderr, "test_lstm_layer_with_hidden_input failed a.dims=%d a=(%d %d %d) outch=%d, direction = %d \n", a.dims, a.w, a.h, a.c, outch, direction); | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| int test_lstm_layer_with_hidden_output(const ncnn::Mat& a, int outch, int direction) | |||||
| { | |||||
| int input_size = a.w; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| ncnn::ParamDict pd; | |||||
| pd.set(0, outch); | |||||
| pd.set(1, outch * input_size * 4 * num_directions); | |||||
| pd.set(2, direction); | |||||
| std::vector<ncnn::Mat> weights(3); | |||||
| weights[0] = RandomMat(outch * input_size * 4 * num_directions); | |||||
| weights[1] = RandomMat(outch * 4 * num_directions); | |||||
| weights[2] = RandomMat(outch * outch * 4 * num_directions); | |||||
| std::vector<ncnn::Mat> as(1); | |||||
| as[0] = a; | |||||
| int ret = test_layer<ncnn::LSTM>("LSTM", pd, weights, as, 3); | |||||
| if (ret != 0) | |||||
| { | |||||
| fprintf(stderr, "test_lstm_layer_with_hidden_output failed a.dims=%d a=(%d %d %d) outch=%d, direction = %d \n", a.dims, a.w, a.h, a.c, outch, direction); | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| static int test_lstm_0() | static int test_lstm_0() | ||||
| { | { | ||||
| return 0 | return 0 | ||||
| @@ -90,6 +153,14 @@ static int test_lstm_0() | |||||
| static int test_lstm_1() | static int test_lstm_1() | ||||
| { | { | ||||
| return 0 | return 0 | ||||
| || test_lstm_layer_with_hidden(RandomMat(4, 4), 1, 2) | |||||
| || test_lstm_layer_with_hidden(RandomMat(8, 2), 2, 2) | |||||
| || test_lstm_layer_with_hidden(RandomMat(16, 8), 7, 2) | |||||
| || test_lstm_layer_with_hidden(RandomMat(17, 8), 8, 2) | |||||
| || test_lstm_layer_with_hidden(RandomMat(19, 15), 8, 2) | |||||
| || test_lstm_layer_with_hidden(RandomMat(5, 16), 16, 2) | |||||
| || test_lstm_layer_with_hidden(RandomMat(3, 16), 8, 2) | |||||
| || test_lstm_layer_with_hidden(RandomMat(2, 5), 99, 2) | |||||
| || test_lstm_layer_with_hidden(RandomMat(4, 4), 1, 1) | || test_lstm_layer_with_hidden(RandomMat(4, 4), 1, 1) | ||||
| || test_lstm_layer_with_hidden(RandomMat(8, 2), 2, 1) | || test_lstm_layer_with_hidden(RandomMat(8, 2), 2, 1) | ||||
| || test_lstm_layer_with_hidden(RandomMat(16, 8), 7, 1) | || test_lstm_layer_with_hidden(RandomMat(16, 8), 7, 1) | ||||
| @@ -105,7 +176,57 @@ static int test_lstm_1() | |||||
| || test_lstm_layer_with_hidden(RandomMat(19, 15), 8, 0) | || test_lstm_layer_with_hidden(RandomMat(19, 15), 8, 0) | ||||
| || test_lstm_layer_with_hidden(RandomMat(5, 16), 16, 0) | || test_lstm_layer_with_hidden(RandomMat(5, 16), 16, 0) | ||||
| || test_lstm_layer_with_hidden(RandomMat(3, 16), 8, 0) | || test_lstm_layer_with_hidden(RandomMat(3, 16), 8, 0) | ||||
| || test_lstm_layer_with_hidden(RandomMat(2, 5), 17, 0); | |||||
| || test_lstm_layer_with_hidden(RandomMat(2, 5), 17, 0) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(4, 4), 1, 2) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(8, 2), 2, 2) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(16, 8), 7, 2) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(17, 8), 8, 2) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(19, 15), 8, 2) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(5, 16), 16, 2) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(3, 16), 8, 2) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(2, 5), 99, 2) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(4, 4), 1, 1) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(8, 2), 2, 1) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(16, 8), 7, 1) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(17, 8), 8, 1) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(19, 15), 8, 1) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(5, 16), 16, 1) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(3, 16), 8, 1) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(2, 5), 99, 1) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(4, 2), 1, 0) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(8, 2), 2, 0) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(16, 8), 7, 0) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(17, 8), 8, 0) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(19, 15), 8, 0) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(5, 16), 16, 0) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(3, 16), 8, 0) | |||||
| || test_lstm_layer_with_hidden_input(RandomMat(2, 5), 17, 0) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(4, 4), 1, 2) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(8, 2), 2, 2) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(16, 8), 7, 2) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(17, 8), 8, 2) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(19, 15), 8, 2) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(5, 16), 16, 2) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(3, 16), 8, 2) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(2, 5), 99, 2) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(4, 4), 1, 1) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(8, 2), 2, 1) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(16, 8), 7, 1) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(17, 8), 8, 1) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(19, 15), 8, 1) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(5, 16), 16, 1) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(3, 16), 8, 1) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(2, 5), 99, 1) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(4, 2), 1, 0) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(8, 2), 2, 0) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(16, 8), 7, 0) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(17, 8), 8, 0) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(19, 15), 8, 0) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(5, 16), 16, 0) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(3, 16), 8, 0) | |||||
| || test_lstm_layer_with_hidden_output(RandomMat(2, 5), 17, 0); | |||||
| } | } | ||||
| static int test_lstm_2() | static int test_lstm_2() | ||||
| @@ -42,19 +42,20 @@ static int test_rnn(const ncnn::Mat& a, int outch, int direction) | |||||
| int test_rnn_layer_with_hidden(const ncnn::Mat& a, int outch, int direction) | int test_rnn_layer_with_hidden(const ncnn::Mat& a, int outch, int direction) | ||||
| { | { | ||||
| int input_size = a.w; | int input_size = a.w; | ||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| ncnn::ParamDict pd; | ncnn::ParamDict pd; | ||||
| pd.set(0, outch); | pd.set(0, outch); | ||||
| pd.set(1, outch * input_size); | |||||
| pd.set(1, outch * input_size * num_directions); | |||||
| pd.set(2, direction); | pd.set(2, direction); | ||||
| std::vector<ncnn::Mat> weights(3); | std::vector<ncnn::Mat> weights(3); | ||||
| weights[0] = RandomMat(outch * input_size); | |||||
| weights[1] = RandomMat(outch); | |||||
| weights[2] = RandomMat(outch * outch); | |||||
| weights[0] = RandomMat(outch * input_size * num_directions); | |||||
| weights[1] = RandomMat(outch * num_directions); | |||||
| weights[2] = RandomMat(outch * outch * num_directions); | |||||
| // initial hidden state | // initial hidden state | ||||
| ncnn::Mat hidden = RandomMat(outch); | |||||
| ncnn::Mat hidden = RandomMat(outch, num_directions); | |||||
| std::vector<ncnn::Mat> as(2); | std::vector<ncnn::Mat> as(2); | ||||
| as[0] = a; | as[0] = a; | ||||
| @@ -69,6 +70,64 @@ int test_rnn_layer_with_hidden(const ncnn::Mat& a, int outch, int direction) | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| int test_rnn_layer_with_hidden_input(const ncnn::Mat& a, int outch, int direction) | |||||
| { | |||||
| int input_size = a.w; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| ncnn::ParamDict pd; | |||||
| pd.set(0, outch); | |||||
| pd.set(1, outch * input_size * num_directions); | |||||
| pd.set(2, direction); | |||||
| std::vector<ncnn::Mat> weights(3); | |||||
| weights[0] = RandomMat(outch * input_size * num_directions); | |||||
| weights[1] = RandomMat(outch * num_directions); | |||||
| weights[2] = RandomMat(outch * outch * num_directions); | |||||
| // initial hidden state | |||||
| ncnn::Mat hidden = RandomMat(outch, num_directions); | |||||
| std::vector<ncnn::Mat> as(2); | |||||
| as[0] = a; | |||||
| as[1] = hidden; | |||||
| int ret = test_layer<ncnn::RNN>("RNN", pd, weights, as, 1); | |||||
| if (ret != 0) | |||||
| { | |||||
| fprintf(stderr, "test_rnn_layer_with_hidden_input failed a.dims=%d a=(%d %d %d) outch=%d, direction = %d \n", a.dims, a.w, a.h, a.c, outch, direction); | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| int test_rnn_layer_with_hidden_output(const ncnn::Mat& a, int outch, int direction) | |||||
| { | |||||
| int input_size = a.w; | |||||
| int num_directions = direction == 2 ? 2 : 1; | |||||
| ncnn::ParamDict pd; | |||||
| pd.set(0, outch); | |||||
| pd.set(1, outch * input_size * num_directions); | |||||
| pd.set(2, direction); | |||||
| std::vector<ncnn::Mat> weights(3); | |||||
| weights[0] = RandomMat(outch * input_size * num_directions); | |||||
| weights[1] = RandomMat(outch * num_directions); | |||||
| weights[2] = RandomMat(outch * outch * num_directions); | |||||
| std::vector<ncnn::Mat> as(1); | |||||
| as[0] = a; | |||||
| int ret = test_layer<ncnn::RNN>("RNN", pd, weights, as, 2); | |||||
| if (ret != 0) | |||||
| { | |||||
| fprintf(stderr, "test_rnn_layer_with_hidden_output failed a.dims=%d a=(%d %d %d) outch=%d, direction = %d \n", a.dims, a.w, a.h, a.c, outch, direction); | |||||
| } | |||||
| return ret; | |||||
| } | |||||
| static int test_rnn_0() | static int test_rnn_0() | ||||
| { | { | ||||
| return 0 | return 0 | ||||
| @@ -86,6 +145,14 @@ static int test_rnn_0() | |||||
| static int test_rnn_1() | static int test_rnn_1() | ||||
| { | { | ||||
| return 0 | return 0 | ||||
| || test_rnn_layer_with_hidden(RandomMat(4, 4), 1, 2) | |||||
| || test_rnn_layer_with_hidden(RandomMat(8, 2), 2, 2) | |||||
| || test_rnn_layer_with_hidden(RandomMat(16, 8), 7, 2) | |||||
| || test_rnn_layer_with_hidden(RandomMat(17, 8), 8, 2) | |||||
| || test_rnn_layer_with_hidden(RandomMat(19, 15), 8, 2) | |||||
| || test_rnn_layer_with_hidden(RandomMat(5, 16), 16, 2) | |||||
| || test_rnn_layer_with_hidden(RandomMat(3, 16), 8, 2) | |||||
| || test_rnn_layer_with_hidden(RandomMat(2, 5), 99, 2) | |||||
| || test_rnn_layer_with_hidden(RandomMat(4, 4), 1, 1) | || test_rnn_layer_with_hidden(RandomMat(4, 4), 1, 1) | ||||
| || test_rnn_layer_with_hidden(RandomMat(8, 2), 2, 1) | || test_rnn_layer_with_hidden(RandomMat(8, 2), 2, 1) | ||||
| || test_rnn_layer_with_hidden(RandomMat(16, 8), 7, 1) | || test_rnn_layer_with_hidden(RandomMat(16, 8), 7, 1) | ||||
| @@ -101,7 +168,57 @@ static int test_rnn_1() | |||||
| || test_rnn_layer_with_hidden(RandomMat(19, 15), 8, 0) | || test_rnn_layer_with_hidden(RandomMat(19, 15), 8, 0) | ||||
| || test_rnn_layer_with_hidden(RandomMat(5, 16), 16, 0) | || test_rnn_layer_with_hidden(RandomMat(5, 16), 16, 0) | ||||
| || test_rnn_layer_with_hidden(RandomMat(3, 16), 8, 0) | || test_rnn_layer_with_hidden(RandomMat(3, 16), 8, 0) | ||||
| || test_rnn_layer_with_hidden(RandomMat(2, 5), 17, 0); | |||||
| || test_rnn_layer_with_hidden(RandomMat(2, 5), 17, 0) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(4, 4), 1, 2) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(8, 2), 2, 2) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(16, 8), 7, 2) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(17, 8), 8, 2) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(19, 15), 8, 2) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(5, 16), 16, 2) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(3, 16), 8, 2) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(2, 5), 99, 2) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(4, 4), 1, 1) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(8, 2), 2, 1) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(16, 8), 7, 1) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(17, 8), 8, 1) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(19, 15), 8, 1) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(5, 16), 16, 1) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(3, 16), 8, 1) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(2, 5), 99, 1) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(4, 2), 1, 0) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(8, 2), 2, 0) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(16, 8), 7, 0) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(17, 8), 8, 0) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(19, 15), 8, 0) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(5, 16), 16, 0) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(3, 16), 8, 0) | |||||
| || test_rnn_layer_with_hidden_input(RandomMat(2, 5), 17, 0) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(4, 4), 1, 2) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(8, 2), 2, 2) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(16, 8), 7, 2) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(17, 8), 8, 2) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(19, 15), 8, 2) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(5, 16), 16, 2) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(3, 16), 8, 2) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(2, 5), 99, 2) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(4, 4), 1, 1) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(8, 2), 2, 1) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(16, 8), 7, 1) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(17, 8), 8, 1) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(19, 15), 8, 1) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(5, 16), 16, 1) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(3, 16), 8, 1) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(2, 5), 99, 1) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(4, 2), 1, 0) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(8, 2), 2, 0) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(16, 8), 7, 0) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(17, 8), 8, 0) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(19, 15), 8, 0) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(5, 16), 16, 0) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(3, 16), 8, 0) | |||||
| || test_rnn_layer_with_hidden_output(RandomMat(2, 5), 17, 0); | |||||
| } | } | ||||
| static int test_rnn_2() | static int test_rnn_2() | ||||