Merge pull request !6070 from pengyongrong/concatAnyDimensiontags/v1.0.0
| @@ -58,3 +58,15 @@ __kernel void Sigmoid(__read_only image2d_t input, __write_only image2d_t output | |||||
| tmp.w = 1.0f / (1.0f + exp(-in_c4.w)); | tmp.w = 1.0f / (1.0f + exp(-in_c4.w)); | ||||
| WRITE_IMAGE(output, (int2)(X, Y), tmp); | WRITE_IMAGE(output, (int2)(X, Y), tmp); | ||||
| } | } | ||||
| __kernel void Tanh(__read_only image2d_t input, __write_only image2d_t output, int4 input_shape) { | |||||
| int Y = get_global_id(0); | |||||
| int X = get_global_id(1); | |||||
| if (X >= input_shape.z || Y >= input_shape.y) return; | |||||
| FLT4 in_c4 = READ_IMAGE(input, smp_zero, (int2)(X, Y)); | |||||
| in_c4.x = (exp(in_c4.x) - exp(-in_c4.x)) / (exp(in_c4.x) + exp(-in_c4.x)); | |||||
| in_c4.y = (exp(in_c4.y) - exp(-in_c4.y)) / (exp(in_c4.y) + exp(-in_c4.y)); | |||||
| in_c4.z = (exp(in_c4.z) - exp(-in_c4.z)) / (exp(in_c4.z) + exp(-in_c4.z)); | |||||
| in_c4.w = (exp(in_c4.w) - exp(-in_c4.w)) / (exp(in_c4.w) + exp(-in_c4.w)); | |||||
| WRITE_IMAGE(output, (int2)(X, Y), in_c4); | |||||
| } | |||||
| @@ -97,38 +97,6 @@ __kernel void ArithmeticSelf_ElementSin_NC4HW4(__read_only image2d_t input0, __w | |||||
| WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result); | WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result); | ||||
| } | } | ||||
| __kernel void ArithmeticSelf_ElementTanh_NHWC4(__read_only image2d_t input0, __write_only image2d_t output, | |||||
| int4 output_shape) { | |||||
| int X = get_global_id(0); // N*H | |||||
| int Y = get_global_id(1); // W | |||||
| int Z = get_global_id(2); // c/4 | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { | |||||
| return; | |||||
| } | |||||
| FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y)*output_shape.w + Z, (X))); | |||||
| result.x = tanh(result.x); | |||||
| result.y = tanh(result.y); | |||||
| result.z = tanh(result.z); | |||||
| result.w = tanh(result.w); | |||||
| WRITE_IMAGE(output, (int2)((Y)*output_shape.w + Z, (X)), result); | |||||
| } | |||||
| __kernel void ArithmeticSelf_ElementTanh_NC4HW4(__read_only image2d_t input0, __write_only image2d_t output, | |||||
| int4 output_shape) { | |||||
| int X = get_global_id(0); // N*H | |||||
| int Y = get_global_id(1); // W | |||||
| int Z = get_global_id(2); // c/4 | |||||
| if (X >= output_shape.x * output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { | |||||
| return; | |||||
| } | |||||
| FLT4 result = READ_IMAGE(input0, smp_none, (int2)((Y), (Z * output_shape.y + X))); | |||||
| result.x = tanh(result.x); | |||||
| result.y = tanh(result.y); | |||||
| result.z = tanh(result.z); | |||||
| result.w = tanh(result.w); | |||||
| WRITE_IMAGE(output, (int2)((Y), (Z * output_shape.y + X)), result); | |||||
| } | |||||
| __kernel void ArithmeticSelf_ElementNeg_NHWC4(__read_only image2d_t input0, __write_only image2d_t output, | __kernel void ArithmeticSelf_ElementNeg_NHWC4(__read_only image2d_t input0, __write_only image2d_t output, | ||||
| int4 output_shape) { | int4 output_shape) { | ||||
| int X = get_global_id(0); // N*H | int X = get_global_id(0); // N*H | ||||
| @@ -35,6 +35,7 @@ using mindspore::schema::ActivationType_LEAKY_RELU; | |||||
| using mindspore::schema::ActivationType_RELU; | using mindspore::schema::ActivationType_RELU; | ||||
| using mindspore::schema::ActivationType_RELU6; | using mindspore::schema::ActivationType_RELU6; | ||||
| using mindspore::schema::ActivationType_SIGMOID; | using mindspore::schema::ActivationType_SIGMOID; | ||||
| using mindspore::schema::ActivationType_TANH; | |||||
| using mindspore::schema::PrimitiveType_Activation; | using mindspore::schema::PrimitiveType_Activation; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| @@ -67,7 +68,8 @@ int ActivationOpenClKernel::Init() { | |||||
| {ActivationType_LEAKY_RELU, std::vector<std::string>{"LEAKY_RELU", "LeakyRelu"}}, | {ActivationType_LEAKY_RELU, std::vector<std::string>{"LEAKY_RELU", "LeakyRelu"}}, | ||||
| {ActivationType_RELU, std::vector<std::string>{"RELU", "Relu"}}, | {ActivationType_RELU, std::vector<std::string>{"RELU", "Relu"}}, | ||||
| {ActivationType_SIGMOID, std::vector<std::string>{"SIGMOID", "Sigmoid"}}, | {ActivationType_SIGMOID, std::vector<std::string>{"SIGMOID", "Sigmoid"}}, | ||||
| {ActivationType_RELU6, std::vector<std::string>{"RELU6", "Relu6"}}}; | |||||
| {ActivationType_RELU6, std::vector<std::string>{"RELU6", "Relu6"}}, | |||||
| {ActivationType_TANH, std::vector<std::string>{"TANH", "Tanh"}}}; | |||||
| if (Program_Kernel.count(type_) == 0) { | if (Program_Kernel.count(type_) == 0) { | ||||
| MS_LOG(ERROR) << "schema::ActivationType:" << type_ << "not found"; | MS_LOG(ERROR) << "schema::ActivationType:" << type_ << "not found"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -98,6 +100,7 @@ int ActivationOpenClKernel::Run() { | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, alpha_); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, alpha_); | ||||
| } | } | ||||
| std::vector<size_t> local = {}; | std::vector<size_t> local = {}; | ||||
| std::cout << img2d_shape.s[1] << " " << img2d_shape.s[2] << std::endl; | |||||
| std::vector<size_t> global = {static_cast<size_t>(img2d_shape.s[1]), static_cast<size_t>(img2d_shape.s[2])}; | std::vector<size_t> global = {static_cast<size_t>(img2d_shape.s[1]), static_cast<size_t>(img2d_shape.s[2])}; | ||||
| auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr); | auto ret = ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -132,7 +132,7 @@ int SliceOpenCLKernel::Run() { | |||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ||||
| return RET_OK; | return RET_OK; | ||||
| } // namespace mindspore::kernel | |||||
| } | |||||
| kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter, | ||||
| @@ -32,10 +32,12 @@ using mindspore::schema::ActivationType_LEAKY_RELU; | |||||
| using mindspore::schema::ActivationType_RELU; | using mindspore::schema::ActivationType_RELU; | ||||
| using mindspore::schema::ActivationType_RELU6; | using mindspore::schema::ActivationType_RELU6; | ||||
| using mindspore::schema::ActivationType_SIGMOID; | using mindspore::schema::ActivationType_SIGMOID; | ||||
| using mindspore::schema::ActivationType_TANH; | |||||
| using mindspore::schema::PrimitiveType_Activation; | using mindspore::schema::PrimitiveType_Activation; | ||||
| namespace mindspore { | namespace mindspore { | ||||
| class TestActivationOpenCL : public mindspore::CommonTest {}; | class TestActivationOpenCL : public mindspore::CommonTest {}; | ||||
| class TestActivationOpenCLTanh : public mindspore::CommonTest {}; | |||||
| void LoadActivationData(void *dst, size_t dst_size, const std::string &file_path) { | void LoadActivationData(void *dst, size_t dst_size, const std::string &file_path) { | ||||
| if (file_path.empty()) { | if (file_path.empty()) { | ||||
| @@ -532,4 +534,119 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { | |||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | lite::opencl::OpenCLRuntime::DeleteInstance(); | ||||
| } | } | ||||
| TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { | |||||
| std::string in_file = "/data/local/tmp/test_data/in_tanh.bin"; | |||||
| std::string out_file = "/data/local/tmp/test_data/out_tanh.bin"; | |||||
| MS_LOG(INFO) << "Tanh Begin test!"; | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| ocl_runtime->Init(); | |||||
| auto data_type = kNumberTypeFloat32; | |||||
| ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | |||||
| bool enable_fp16 = ocl_runtime->GetFp16Enable(); | |||||
| MS_LOG(INFO) << "Init tensors."; | |||||
| std::vector<int> input_shape = {1, 2, 3, 9}; | |||||
| schema::Format format = schema::Format_NHWC; | |||||
| schema::Format op_format = schema::Format_NC4HW4; | |||||
| auto tensor_type = lite::TensorCategory(schema::NodeType_ValueNode); | |||||
| auto *input_tensor = new (std::nothrow) lite::Tensor(data_type, input_shape, format, tensor_type); | |||||
| if (input_tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "new input tensor error!"; | |||||
| return; | |||||
| } | |||||
| auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, input_shape, format, tensor_type); | |||||
| if (output_tensor == nullptr) { | |||||
| MS_LOG(ERROR) << "new output tensor error!"; | |||||
| delete input_tensor; | |||||
| return; | |||||
| } | |||||
| std::vector<lite::Tensor *> inputs{input_tensor}; | |||||
| std::vector<lite::Tensor *> outputs{output_tensor}; | |||||
| auto allocator = ocl_runtime->GetAllocator(); | |||||
| inputs[0]->MallocData(allocator); | |||||
| MS_LOG(INFO) << "Initialize input data"; | |||||
| LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); | |||||
| if (enable_fp16) { | |||||
| printf_tensor<float16_t>("Tanh:FP16--input data--", inputs[0]); | |||||
| } else { | |||||
| printf_tensor<float>("Tanh:FP32--input data--", inputs[0]); | |||||
| } | |||||
| auto *param = new (std::nothrow) ActivationParameter(); | |||||
| if (param == nullptr) { | |||||
| MS_LOG(ERROR) << "New ActivationParameter fail."; | |||||
| delete input_tensor; | |||||
| delete output_tensor; | |||||
| return; | |||||
| } | |||||
| param->type_ = ActivationType_TANH; | |||||
| auto *kernel = | |||||
| new (std::nothrow) kernel::ActivationOpenClKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs); | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "Kernel:Tanh create fail."; | |||||
| delete param; | |||||
| delete input_tensor; | |||||
| delete output_tensor; | |||||
| return; | |||||
| } | |||||
| kernel->SetFormatType(op_format); | |||||
| auto ret = kernel->Init(); | |||||
| if (ret != RET_OK) { | |||||
| delete param; | |||||
| delete kernel; | |||||
| delete input_tensor; | |||||
| delete output_tensor; | |||||
| MS_LOG(ERROR) << "Init tanh fail."; | |||||
| return; | |||||
| } | |||||
| MS_LOG(INFO) << "Create kernel SubGraphOpenCLKernel."; | |||||
| std::vector<kernel::LiteKernel *> kernels{kernel}; | |||||
| auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); | |||||
| if (sub_graph == nullptr) { | |||||
| delete kernel; | |||||
| delete param; | |||||
| delete input_tensor; | |||||
| delete output_tensor; | |||||
| MS_LOG(ERROR) << "Kernel SubGraphOpenCLKernel create fail."; | |||||
| return; | |||||
| } | |||||
| MS_LOG(INFO) << "Initialize sub_graph."; | |||||
| ret = sub_graph->Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init sub_graph error."; | |||||
| delete kernel; | |||||
| delete param; | |||||
| delete input_tensor; | |||||
| delete output_tensor; | |||||
| delete sub_graph; | |||||
| return; | |||||
| } | |||||
| MS_LOG(INFO) << "Run SubGraphOpenCLKernel."; | |||||
| ret = sub_graph->Run(); | |||||
| if (ret != RET_OK) { | |||||
| delete kernel; | |||||
| delete param; | |||||
| delete input_tensor; | |||||
| delete output_tensor; | |||||
| delete sub_graph; | |||||
| MS_LOG(ERROR) << "Run SubGraphOpenCLKernel error."; | |||||
| return; | |||||
| } | |||||
| if (enable_fp16) { | |||||
| printf_tensor<float16_t>("Tanh:FP16--output data---", outputs[0]); | |||||
| CompareRes<float16_t>(output_tensor, out_file); | |||||
| } else { | |||||
| printf_tensor<float>("Tanh:FP32--output data---", outputs[0]); | |||||
| CompareRes<float>(output_tensor, out_file); | |||||
| } | |||||
| delete kernel; | |||||
| delete param; | |||||
| delete input_tensor; | |||||
| delete output_tensor; | |||||
| delete sub_graph; | |||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | |||||
| } // namespace mindspore | } // namespace mindspore | ||||