Merge pull request !7688 from zhanghaibo/mastertags/v1.1.0
| @@ -227,17 +227,17 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s | |||
| size_t src_step = src_stride * sizeof(float); | |||
| size_t dst_step = dst_stride * sizeof(float); | |||
| asm volatile( | |||
| "mov r7, %[src_ptr]\n" | |||
| "mov r11, %[src_ptr]\n" | |||
| "mov r8, %[dst_ptr]\n" | |||
| "mov r10, r8\n" | |||
| "vld1.32 {q0}, [r7], %[src_step]\n" | |||
| "vld1.32 {q0}, [r11], %[src_step]\n" | |||
| "vld1.32 {q1}, [r8], %[dst_step]\n" | |||
| "vld1.32 {q2}, [r7], %[src_step]\n" | |||
| "vld1.32 {q2}, [r11], %[src_step]\n" | |||
| "vld1.32 {q3}, [r8], %[dst_step]\n" | |||
| "vadd.f32 q0, q0, q1\n" | |||
| "vld1.32 {q8}, [r7], %[src_step]\n" | |||
| "vld1.32 {q8}, [r11], %[src_step]\n" | |||
| "vadd.f32 q2, q2, q3\n" | |||
| "vst1.32 {q0}, [r10], %[dst_step]\n" | |||
| @@ -245,19 +245,19 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s | |||
| "vld1.32 {q9}, [r8], %[dst_step]\n" | |||
| "vld1.32 {q10}, [r7], %[src_step]\n" | |||
| "vld1.32 {q10}, [r11], %[src_step]\n" | |||
| "vadd.f32 q8, q8, q9\n" | |||
| "vld1.32 {q11}, [r8], %[dst_step]\n" | |||
| "vadd.f32 q10, q10, q11\n" | |||
| "vld1.32 {q0}, [r7], %[src_step]\n" | |||
| "vld1.32 {q0}, [r11], %[src_step]\n" | |||
| "vst1.32 {q8}, [r10], %[dst_step]\n" | |||
| "vst1.32 {q10}, [r10], %[dst_step]\n" | |||
| "vld1.32 {q1}, [r8], %[dst_step]\n" | |||
| "vld1.32 {q2}, [r7], %[src_step]\n" | |||
| "vld1.32 {q2}, [r11], %[src_step]\n" | |||
| "vld1.32 {q3}, [r8], %[dst_step]\n" | |||
| "vadd.f32 q0, q0, q1\n" | |||
| @@ -266,10 +266,10 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s | |||
| "vst1.32 {q0}, [r10], %[dst_step]\n" | |||
| "vst1.32 {q2}, [r10], %[dst_step]\n" | |||
| "vld1.32 {q8}, [r7], %[src_step]\n" | |||
| "vld1.32 {q8}, [r11], %[src_step]\n" | |||
| "vld1.32 {q9}, [r8], %[dst_step]\n" | |||
| "vld1.32 {q10}, [r7], %[src_step]\n" | |||
| "vld1.32 {q10}, [r11], %[src_step]\n" | |||
| "vld1.32 {q11}, [r8], %[dst_step]\n" | |||
| "vadd.f32 q8, q8, q9\n" | |||
| @@ -280,7 +280,7 @@ void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_s | |||
| : | |||
| : [ src_ptr ] "r"(src_ptr), [ dst_ptr ] "r"(dst_ptr), [ src_step ] "r"(src_step), [ dst_step ] "r"(dst_step) | |||
| : "r7", "r8", "r10", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); | |||
| : "r8", "r10", "r11", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); | |||
| #else | |||
| for (int j = 0; j < 8; j++) { | |||
| const float *s = src_ptr + j * src_stride; | |||
| @@ -28,7 +28,9 @@ class SoftmaxOpenCLKernel : public OpenCLKernel { | |||
| public: | |||
| SoftmaxOpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||
| const std::vector<lite::Tensor *> &outputs) | |||
| : OpenCLKernel(parameter, inputs, outputs), parameter_(reinterpret_cast<SoftmaxParameter *>(parameter)) {} | |||
| : OpenCLKernel(parameter, inputs, outputs) { | |||
| parameter_ = reinterpret_cast<SoftmaxParameter *>(parameter); | |||
| } | |||
| ~SoftmaxOpenCLKernel() override = default; | |||
| int Init() override; | |||
| @@ -6,8 +6,6 @@ include_directories(${TOP_DIR}) | |||
| include_directories(${TEST_DIR}) | |||
| include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/dependency_gtest.cmake) | |||
| string(REPLACE " -Werror " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") | |||
| string(REPLACE " -Werror " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |||
| STRING(REPLACE " -fvisibility=hidden " " -fvisibility=default " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") | |||
| STRING(REPLACE " -fvisibility=hidden " " -fvisibility=default " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |||
| @@ -50,7 +50,7 @@ class CommonTest : public testing::Test { | |||
| template <typename T> | |||
| static void CompareOutputData(T *output_data, T *correct_data, int size, float err_bound) { | |||
| for (size_t i = 0; i < size; i++) { | |||
| for (int i = 0; i < size; i++) { | |||
| T abs = fabs(output_data[i] - correct_data[i]); | |||
| ASSERT_LE(abs, err_bound); | |||
| } | |||
| @@ -6,8 +6,6 @@ include_directories(${TOP_DIR}) | |||
| include_directories(${TEST_DIR}) | |||
| add_compile_definitions(ENABLE_NNACL_INFER_SHAPE) | |||
| string(REPLACE " -Werror " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") | |||
| string(REPLACE " -Werror " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |||
| STRING(REPLACE " -fvisibility=hidden " " -fvisibility=default " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") | |||
| STRING(REPLACE " -fvisibility=hidden " " -fvisibility=default " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |||
| @@ -149,7 +149,7 @@ TEST_F(InferTest, TestConvNode) { | |||
| ASSERT_NE(nullptr, output_data); | |||
| //=================================================== | |||
| ASSERT_EQ(output_size, outTensor->Size()); | |||
| for (size_t i = 0; i < outTensor->ElementsNum(); i++) { | |||
| for (int i = 0; i < outTensor->ElementsNum(); i++) { | |||
| ASSERT_LE((output_data[i] - outData[i]), 0.001); | |||
| } | |||
| MS_LOG(INFO) << "Passed"; | |||
| @@ -184,7 +184,7 @@ TEST_F(TestPack, PackWeightUint8) { | |||
| std::string weight_path = "./test_data/conv/convuint8_weight_32_3_3_3.bin"; | |||
| auto weight_data = reinterpret_cast<uint8_t *>(mindspore::lite::ReadFile(weight_path.c_str(), &weight_size)); | |||
| auto int8_weight = reinterpret_cast<int8_t *>(malloc(weight_size)); | |||
| for (int i = 0; i < weight_size; i++) { | |||
| for (unsigned int i = 0; i < weight_size; i++) { | |||
| int8_weight[i] = (int8_t)(weight_data[i] - 128); | |||
| } | |||
| int32_t filter_zp = 20; | |||
| @@ -117,7 +117,7 @@ TEST_F(TestStridedSlice, StridedSliceInt8) { | |||
| EXPECT_EQ(0, ret); | |||
| int8_t expect[4] = {-6, -5, 7, 8}; | |||
| for (int i = 0; i < sizeof(expect); ++i) { | |||
| for (unsigned int i = 0; i < sizeof(expect); ++i) { | |||
| EXPECT_EQ(output_data[i], expect[i]); | |||
| } | |||
| @@ -33,7 +33,7 @@ int ConstantOfShapeTestInit(std::vector<lite::Tensor *> *inputs_, std::vector<li | |||
| inputs_->push_back(in_t); | |||
| std::vector<int> c_shape(in_t->ElementsNum()); | |||
| for (int i = 0; i < c_shape.size(); ++i) { | |||
| for (unsigned int i = 0; i < c_shape.size(); ++i) { | |||
| c_shape[i] = a_ptr[i]; | |||
| } | |||
| auto out_t = new lite::Tensor(kNumberTypeFloat, c_shape, schema::Format_NHWC, lite::Tensor::Category::CONST_TENSOR); | |||
| @@ -138,10 +138,10 @@ TEST_F(TestConvolutionDwFp32, ConvDwFp32Accuracy) { | |||
| CompareOutputData(output_ptr, correct_data, outputs[0]->ElementsNum(), 0.0001); | |||
| delete conv_param; | |||
| for (int i = 0; i < inputs.size(); i++) { | |||
| for (unsigned int i = 0; i < inputs.size(); i++) { | |||
| delete inputs[i]; | |||
| } | |||
| for (int i = 0; i < outputs.size(); i++) { | |||
| for (unsigned int i = 0; i < outputs.size(); i++) { | |||
| delete outputs[i]; | |||
| } | |||
| delete kernel; | |||
| @@ -189,10 +189,10 @@ TEST_F(TestConvolutionDwFp32, ConvDwFp32Performance) { | |||
| printf("Convolution_depthwise fp32 average time : %f ms\n", time_avg / 1000.0f); | |||
| delete conv_param; | |||
| for (int i = 0; i < inputs.size(); i++) { | |||
| for (unsigned int i = 0; i < inputs.size(); i++) { | |||
| delete inputs[i]; | |||
| } | |||
| for (int i = 0; i < outputs.size(); i++) { | |||
| for (unsigned int i = 0; i < outputs.size(); i++) { | |||
| delete outputs[i]; | |||
| } | |||
| delete kernel; | |||
| @@ -60,7 +60,7 @@ TEST_F(TestEluFp32, EluTest) { | |||
| elu->Run(); | |||
| std::cout << "output shape:" << std::endl; | |||
| for (int i = 0; i < outputs_.front()->shape().size(); ++i) { | |||
| for (unsigned int i = 0; i < outputs_.front()->shape().size(); ++i) { | |||
| std::cout << outputs_.front()->shape()[i] << ' '; | |||
| } | |||
| std::cout << std::endl; | |||
| @@ -75,7 +75,7 @@ TEST_F(TestEmbeddingLookupFp32, ElTest) { | |||
| el->Run(); | |||
| std::cout << "output shape:" << std::endl; | |||
| for (int i = 0; i < outputs_.front()->shape().size(); ++i) { | |||
| for (unsigned int i = 0; i < outputs_.front()->shape().size(); ++i) { | |||
| std::cout << outputs_.front()->shape()[i] << ' '; | |||
| } | |||
| std::cout << std::endl; | |||
| @@ -146,8 +146,8 @@ TEST_F(TestFcFp32, FcTest2) { | |||
| CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct, total_size, 0.0001); | |||
| } | |||
| int FcTestInit3(std::vector<lite::Tensor *> *inputs_, std::vector<lite::Tensor *> *outputs_, | |||
| MatMulParameter *matmal_param, float **correct) { | |||
| void FcTestInit3(std::vector<lite::Tensor *> *inputs_, std::vector<lite::Tensor *> *outputs_, | |||
| MatMulParameter *matmal_param, float **correct) { | |||
| Tensor *in_t = new Tensor(kNumberTypeFloat, {1, 1, 1, 20}, schema::Format_NHWC, lite::Tensor::Category::CONST_TENSOR); | |||
| in_t->MallocData(); | |||
| float in[] = {1, 0, 3, 0, 4, 5, 2, 5, 2, 5, 1, 5, 0, 1, 2, 0, 2, 1, 0, 5}; | |||
| @@ -177,7 +177,6 @@ int FcTestInit3(std::vector<lite::Tensor *> *inputs_, std::vector<lite::Tensor * | |||
| matmal_param->a_transpose_ = false; | |||
| matmal_param->has_bias_ = false; | |||
| matmal_param->act_type_ = ActType_No; | |||
| return out_t->ElementsNum(); | |||
| } | |||
| TEST_F(TestFcFp32, FcTest3) { | |||
| @@ -185,7 +184,7 @@ TEST_F(TestFcFp32, FcTest3) { | |||
| std::vector<lite::Tensor *> outputs_; | |||
| auto matmul_param = new MatMulParameter(); | |||
| float *correct; | |||
| int total_size = FcTestInit3(&inputs_, &outputs_, matmul_param, &correct); | |||
| FcTestInit3(&inputs_, &outputs_, matmul_param, &correct); | |||
| lite::InnerContext *ctx = new lite::InnerContext; | |||
| ctx->thread_num_ = 1; | |||
| ASSERT_EQ(lite::RET_OK, ctx->Init()); | |||
| @@ -171,10 +171,10 @@ TEST_F(LstmFp32, LstmForwardFp32Accuracy) { | |||
| CompareOutput(outputs[2], output2_data); | |||
| delete lstm_param; | |||
| for (int i = 0; i < inputs.size() - 1; i++) { | |||
| for (unsigned int i = 0; i < inputs.size() - 1; i++) { | |||
| delete inputs[i]; | |||
| } | |||
| for (int i = 0; i < outputs.size(); i++) { | |||
| for (unsigned int i = 0; i < outputs.size(); i++) { | |||
| delete outputs[i]; | |||
| } | |||
| delete kernel; | |||
| @@ -321,10 +321,10 @@ TEST_F(LstmFp32, LstmBackwardFp32Accuracy) { | |||
| CompareOutput(outputs[2], output2_data); | |||
| delete lstm_param; | |||
| for (int i = 0; i < inputs.size() - 1; i++) { | |||
| for (unsigned int i = 0; i < inputs.size() - 1; i++) { | |||
| delete inputs[i]; | |||
| } | |||
| for (int i = 0; i < outputs.size(); i++) { | |||
| for (unsigned int i = 0; i < outputs.size(); i++) { | |||
| delete outputs[i]; | |||
| } | |||
| delete kernel; | |||
| @@ -79,7 +79,6 @@ TEST_F(TestPowerFp32, Simple) { | |||
| op->Init(); | |||
| op->Run(); | |||
| float correct[] = {1, 64, 2187, 65536}; | |||
| float *output = reinterpret_cast<float *>(outputs_[0]->MutableData()); | |||
| CompareOutputData(reinterpret_cast<float *>(outputs_[0]->MutableData()), correct, total_size, 0.0001); | |||
| delete op; | |||
| for (auto t : inputs_) delete t; | |||
| @@ -67,7 +67,7 @@ TEST_F(TestSkipGramFp32, ElTest) { | |||
| el->Run(); | |||
| std::vector<StringPack> output = mindspore::lite::ParseTensorBuffer(outputs_[0]); | |||
| for (int i = 0; i < output.size(); i++) { | |||
| for (unsigned int i = 0; i < output.size(); i++) { | |||
| for (int j = 0; j < output[i].len; j++) { | |||
| printf("%c", output[i].data[j]); | |||
| } | |||
| @@ -39,7 +39,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest4) { | |||
| param.block_sizes_[0] = 2; | |||
| param.block_sizes_[1] = 1; | |||
| DoSpaceToBatchNHWC(input.data(), out, param.block_sizes_, in_shape.data(), out_shape.data()); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| for (unsigned int i = 0; i < kOutSize; ++i) { | |||
| std::cout << out[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| @@ -57,7 +57,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest5) { | |||
| param.block_sizes_[0] = 1; | |||
| param.block_sizes_[1] = 2; | |||
| DoSpaceToBatchNHWC(input.data(), out, param.block_sizes_, in_shape.data(), out_shape.data()); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| for (unsigned int i = 0; i < kOutSize; ++i) { | |||
| std::cout << out[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| @@ -75,7 +75,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest6) { | |||
| param.block_sizes_[0] = 2; | |||
| param.block_sizes_[1] = 2; | |||
| DoSpaceToBatchNHWC(input.data(), out, param.block_sizes_, in_shape.data(), out_shape.data()); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| for (unsigned int i = 0; i < kOutSize; ++i) { | |||
| std::cout << out[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| @@ -97,7 +97,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest7) { | |||
| param.block_sizes_[0] = 2; | |||
| param.block_sizes_[1] = 2; | |||
| DoSpaceToBatchNHWC(input.data(), out, param.block_sizes_, in_shape.data(), out_shape.data()); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| for (unsigned int i = 0; i < kOutSize; ++i) { | |||
| std::cout << out[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| @@ -116,7 +116,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest8) { | |||
| std::vector<int> out_shape = {1, 5, 5, 2}; | |||
| std::vector<int> padding = {0, 1, 0, 1}; | |||
| DoSpaceToBatchPaddingNHWC(input.data(), out, in_shape.data(), padding.data(), out_shape.data()); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| for (unsigned int i = 0; i < kOutSize; ++i) { | |||
| std::cout << out[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| @@ -136,7 +136,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest9) { | |||
| std::vector<int> out_shape = {1, 6, 6, 2}; | |||
| std::vector<int> padding = {1, 1, 1, 1}; | |||
| DoSpaceToBatchPaddingNHWC(input.data(), out, in_shape.data(), padding.data(), out_shape.data()); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| for (unsigned int i = 0; i < kOutSize; ++i) { | |||
| std::cout << out[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| @@ -162,7 +162,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest10) { | |||
| param.block_sizes_[0] = 2; | |||
| param.block_sizes_[1] = 2; | |||
| DoSpaceToBatchNHWC(pedding_out, out, param.block_sizes_, pedding_out_shape.data(), out_shape.data()); | |||
| for (int i = 0; i < kOutSize; ++i) { | |||
| for (unsigned int i = 0; i < kOutSize; ++i) { | |||
| std::cout << out[i] << " "; | |||
| } | |||
| std::cout << "\n"; | |||
| @@ -276,10 +276,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { | |||
| kernel->Init(); | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| kernel->Run(); | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| printf("single thread running time : %lu ms\n", time_end - time_start); | |||
| std::string output_path = "./test_data/pooling/avgpoolgradfp32_s2_dx_3_28_28_3.bin"; | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| @@ -340,10 +337,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { | |||
| kernel->Init(); | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| kernel->Run(); | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| printf("single thread running time : %lu ms\n", time_end - time_start); | |||
| std::string output_path = "./test_data/pooling/avgpoolgradfp32_s3_dx_3_28_28_3.bin"; | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| @@ -461,10 +455,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { | |||
| kernel->Init(); | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| kernel->Run(); | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| printf("single thread running time : %lu ms\n", time_end - time_start); | |||
| std::string output_path = "./test_data/pooling/maxpoolgradfp32_1_xgrad_3_28_28_3.bin"; | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| @@ -535,10 +526,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { | |||
| kernel->Init(); | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| kernel->Run(); | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| printf("single thread running time : %lu ms\n", time_end - time_start); | |||
| std::string output_path = "./test_data/pooling/maxpoolgradfp32_s2_xgrad_3_28_28_3.bin"; | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| @@ -609,10 +597,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { | |||
| kernel->Init(); | |||
| auto time_start = mindspore::lite::GetTimeUs(); | |||
| kernel->Run(); | |||
| auto time_end = mindspore::lite::GetTimeUs(); | |||
| printf("single thread running time : %lu ms\n", time_end - time_start); | |||
| std::string output_path = "./test_data/pooling/maxpoolgradfp32_s3_xgrad_3_28_28_3.bin"; | |||
| auto res = lite::CompareRelativeOutput(out_data, output_path); | |||
| @@ -108,7 +108,7 @@ TEST_F(TestReluXInt8, Relu6) { | |||
| // 0.0f, 0.0f, 1.25f, 3.0f, 4.5f, 6.0f, 6.0f, 6.0f | |||
| int8_t expect[8] = {-128, -128, -96, -52, -14, 25, 25, 25}; | |||
| for (int i = 0; i < sizeof(expect); ++i) { | |||
| for (unsigned int i = 0; i < sizeof(expect); ++i) { | |||
| EXPECT_EQ(output_data[i], expect[i]); | |||
| } | |||
| @@ -68,12 +68,12 @@ TEST_F(TestNormalize, TestSentence) { | |||
| kernel_ = creator_(inputs_, outputs_, ¶meter_, &ctx_, desc_, nullptr); | |||
| ASSERT_NE(kernel_, nullptr); | |||
| auto ret = kernel_->Init(); | |||
| MS_ASSERT(ret == 0); | |||
| ASSERT_EQ(ret, 0); | |||
| ret = kernel_->Run(); | |||
| MS_ASSERT(ret == 0); | |||
| ASSERT_EQ(ret, 0); | |||
| std::vector<StringPack> output = mindspore::lite::ParseTensorBuffer(outputs_[0]); | |||
| for (int i = 0; i < output.size(); i++) { | |||
| for (unsigned int i = 0; i < output.size(); i++) { | |||
| for (int j = 0; j < output[i].len; j++) { | |||
| printf("%c", output[i].data[j]); | |||
| } | |||
| @@ -91,7 +91,6 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { | |||
| MS_LOG(INFO) << "Init tensors."; | |||
| std::vector<int> input_shape = {1, 9}; | |||
| schema::Format format = schema::Format_NC; | |||
| schema::Format op_format = schema::Format_NC4; | |||
| auto tensor_type = lite::Tensor::CONST_TENSOR; | |||
| auto *input_tensor = new (std::nothrow) lite::Tensor(data_type, input_shape, format, tensor_type); | |||
| if (input_tensor == nullptr) { | |||
| @@ -198,7 +197,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { | |||
| MS_LOG(INFO) << "Init tensors."; | |||
| std::vector<int> input_shape = {1, 9}; | |||
| schema::Format format = schema::Format_NC; | |||
| schema::Format op_format = schema::Format_NC4; | |||
| auto tensor_type = lite::Tensor::CONST_TENSOR; | |||
| auto *input_tensor = new (std::nothrow) lite::Tensor(data_type, input_shape, format, tensor_type); | |||
| if (input_tensor == nullptr) { | |||
| @@ -308,7 +306,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { | |||
| MS_LOG(INFO) << "Init tensors."; | |||
| std::vector<int> input_shape = {1, 9}; | |||
| schema::Format format = schema::Format_NC; | |||
| schema::Format op_format = schema::Format_NC4; | |||
| auto tensor_type = lite::Tensor::CONST_TENSOR; | |||
| auto *input_tensor = new (std::nothrow) lite::Tensor(data_type, input_shape, format, tensor_type); | |||
| if (input_tensor == nullptr) { | |||
| @@ -411,15 +408,14 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { | |||
| MS_LOG(INFO) << "Leaky relu Begin test!"; | |||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||
| ocl_runtime->Init(); | |||
| auto data_type = kNumberTypeFloat16; // need modify | |||
| auto data_type = kNumberTypeFloat16; | |||
| ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | |||
| bool enable_fp16 = ocl_runtime->GetFp16Enable(); | |||
| MS_LOG(INFO) << "Init tensors."; | |||
| std::vector<int> input_shape = {1, 9}; // need modify | |||
| std::vector<int> input_shape = {1, 9}; | |||
| auto tensor_type = lite::Tensor::CONST_TENSOR; | |||
| schema::Format format = schema::Format_NC; // need modify | |||
| schema::Format op_format = schema::Format_NHWC4; // need modify | |||
| schema::Format format = schema::Format_NC; | |||
| auto *input_tensor = new (std::nothrow) lite::Tensor(data_type, input_shape, format, tensor_type); | |||
| if (input_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new input tensor error!"; | |||
| @@ -527,7 +523,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { | |||
| MS_LOG(INFO) << "Init tensors."; | |||
| std::vector<int> input_shape = {1, 2, 3, 9}; | |||
| schema::Format format = schema::Format_NHWC; | |||
| schema::Format op_format = schema::Format_NC4HW4; | |||
| auto tensor_type = lite::Tensor::CONST_TENSOR; | |||
| auto *input_tensor = new (std::nothrow) lite::Tensor(data_type, input_shape, format, tensor_type); | |||
| if (input_tensor == nullptr) { | |||
| @@ -77,13 +77,12 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { | |||
| MS_LOG(INFO) << "BiasAdd Begin test:"; | |||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||
| ocl_runtime->Init(); | |||
| auto data_type = kNumberTypeFloat16; // need modify | |||
| auto data_type = kNumberTypeFloat16; | |||
| ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | |||
| std::vector<int> input_shape = {1, 9}; // need modify | |||
| std::vector<int> output_shape = {1, 9}; // need modify | |||
| std::vector<int> input_shape = {1, 9}; | |||
| std::vector<int> output_shape = {1, 9}; | |||
| auto tensor_type = lite::Tensor::CONST_TENSOR; | |||
| schema::Format type = schema::Format_NC; // need modify | |||
| schema::Format op_format = schema::Format_NC4; // need modify | |||
| schema::Format type = schema::Format_NC; | |||
| int weight_shape = 0; | |||
| if (input_shape.size() == 4) { | |||
| weight_shape = input_shape[3]; | |||
| @@ -86,7 +86,6 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { | |||
| auto data_type = kNumberTypeFloat16; | |||
| ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | |||
| schema::Format format = schema::Format_NHWC; | |||
| schema::Format op_format = schema::Format_NC4HW4; | |||
| auto tensor_type = lite::Tensor::CONST_TENSOR; | |||
| auto input_tensor = new (std::nothrow) lite::Tensor(data_type, input_shape, format, tensor_type); | |||
| if (input_tensor == nullptr) { | |||