From: @pengyongrong Reviewed-by: @ddwsky,@ddwsky Signed-off-by: @ddwsky,@ddwskytags/v1.1.0
| @@ -39,12 +39,11 @@ int GatherOpenCLKernel::CheckSpecs() { | |||
| MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1 output Tensor but get " << out_tensors_.size(); | |||
| return RET_ERROR; | |||
| } | |||
| if (in_tensors_.at(1)->category() == lite::Tensor::VAR) { | |||
| MS_LOG(ERROR) << "GatherOpenCLKernel only supports indices Tensor is weight."; | |||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||
| if (!in_tensors_.at(1)->IsConst() && enable_fp16_) { | |||
| MS_LOG(ERROR) << "GatherOpenCLKernel Unsupportted intensor1 = tensor and datatype = fp16 "; | |||
| return RET_ERROR; | |||
| } | |||
| int input_ndim = in_tensors_.front()->shape().size(); | |||
| if (input_ndim < 0 || input_ndim > 4) { | |||
| MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1-4D input Tensor but get " << input_ndim << "D."; | |||
| @@ -59,7 +58,7 @@ int GatherOpenCLKernel::CheckSpecs() { | |||
| TypeId data_type = in_tensors_.at(1)->data_type(); | |||
| if (data_type != kNumberTypeInt32 && data_type != kNumberTypeInt64 && data_type != kNumberTypeFloat32 && | |||
| data_type != kNumberTypeFloat16) { | |||
| MS_LOG(ERROR) << "Conv2D only supports Int32/Int64/Float32/Float16 indices Tensor."; | |||
| MS_LOG(ERROR) << "GatherOpenCLKernel only supports Int32/Int64/Float32/Float16 indices Tensor."; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -107,17 +106,51 @@ int GatherOpenCLKernel::Prepare() { | |||
| ocl_runtime_->LoadSource(program_name, gather_source); | |||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); | |||
| #endif | |||
| if (!in_tensors_.at(1)->IsConst()) { | |||
| intensor1_is_tensor = true; | |||
| } | |||
| int ret = InitWeights(); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| if (!intensor1_is_tensor) { | |||
| int ret = InitWeights(); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| } | |||
| SetGlobalLocal(); | |||
| SetConstArgs(); | |||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | |||
| return RET_OK; | |||
| } | |||
| int GatherOpenCLKernel::ConvertTensorToweight() { | |||
| auto allocator = ocl_runtime_->GetAllocator(); | |||
| GpuTensorInfo img_info(in_tensors_[1]); | |||
| size_t dtype = sizeof(cl_int); | |||
| stride_w = img_info.RowPitch() / dtype; | |||
| auto indices_tensor = in_tensors_.at(1); | |||
| auto indices_num = indices_tensor->ElementsNum(); | |||
| indices_data_ = reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num)); | |||
| allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true); | |||
| if (indices_data_ == nullptr) { | |||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||
| return RET_ERROR; | |||
| } | |||
| auto data_type = indices_tensor->data_type(); | |||
| auto data = indices_tensor->data_c(); | |||
| if (data_type == kNumberTypeInt32) { | |||
| for (int i = 0; i < indices_num; i++) { | |||
| indices_data_[i] = reinterpret_cast<int32_t *>(data)[i * stride_w]; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "Gather Only supported The DataType Of Intensor1 is Int32 " | |||
| << " But Your Type is :" << data_type; | |||
| return RET_ERROR; | |||
| } | |||
| allocator->UnmapBuffer(indices_data_); | |||
| return RET_OK; | |||
| } | |||
| int GatherOpenCLKernel::InitWeights() { | |||
| auto indices_tensor = in_tensors_.at(1); | |||
| auto indices_num = indices_tensor->ElementsNum(); | |||
| @@ -152,6 +185,9 @@ int GatherOpenCLKernel::InitWeights() { | |||
| int GatherOpenCLKernel::Run() { | |||
| MS_LOG(DEBUG) << this->name() << " Running! "; | |||
| if (intensor1_is_tensor) { | |||
| ConvertTensorToweight(); | |||
| } | |||
| ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()); | |||
| ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF); | |||
| @@ -39,6 +39,7 @@ class GatherOpenCLKernel : public OpenCLKernel { | |||
| void SetConstArgs() override; | |||
| void SetGlobalLocal() override; | |||
| int Tune() override { return lite::RET_OK; } | |||
| int ConvertTensorToweight(); | |||
| protected: | |||
| int UpdateWeights(); | |||
| @@ -46,6 +47,9 @@ class GatherOpenCLKernel : public OpenCLKernel { | |||
| private: | |||
| int32_t *indices_data_{nullptr}; | |||
| int axis_ = {0}; | |||
| bool intensor1_is_tensor{false}; | |||
| bool enable_fp16_{false}; | |||
| cl_int stride_w{1}; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif | |||
| @@ -34,7 +34,7 @@ namespace mindspore::kernel { | |||
| int ToFormatOpenCLKernel::CheckSpecs() { | |||
| auto data_type = in_tensors_.front()->data_type(); | |||
| if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16) { | |||
| if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16 && data_type != kNumberTypeInt32) { | |||
| MS_LOG(ERROR) << "Unsupported data type " << data_type; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -61,7 +61,8 @@ void ToFormatOpenCLKernel::SetGlobalLocal() { | |||
| } | |||
| int ToFormatOpenCLKernel::Prepare() { | |||
| std::map<TypeId, std::string> dtype_str{{kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}}; | |||
| std::map<TypeId, std::string> dtype_str{ | |||
| {kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}, {kNumberTypeInt32, "float"}}; | |||
| std::string kernel_name; | |||
| if (out_mem_type_ == MemType::IMG) { | |||
| kernel_name = "to_format_NHWC_to_NHWC4_IMG_" + dtype_str[in_tensors_.front()->data_type()]; | |||
| @@ -68,7 +68,7 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std | |||
| memcpy(tensor->MutableData(), input_data, tensor->Size()); | |||
| } | |||
| } else { | |||
| EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32); | |||
| EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32); | |||
| subgraph_inputs.push_back(tensor); | |||
| subgraph_inputs_data[tensor] = reinterpret_cast<float *>(input_data); | |||
| } | |||
| @@ -46,6 +46,22 @@ TEST_F(TestOpenCL_Gather, Axis0) { | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Gather, Axis0_Tensor) { | |||
| int axis = 0; | |||
| std::vector<int> input_shape = {10}; | |||
| std::vector<int> indices_shape = {2}; | |||
| std::vector<int> output_shape = {2}; | |||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; | |||
| int32_t indices[] = {1, 3}; | |||
| float output_data[] = {1, 3}; | |||
| for (auto fp16_enable : {false}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, | |||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Gather, Axis1) { | |||
| int axis = 1; | |||
| std::vector<int> input_shape = {1, 5, 4, 4}; | |||
| @@ -75,6 +91,35 @@ TEST_F(TestOpenCL_Gather, Axis1) { | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Gather, Axis1_intensor1) { | |||
| int axis = 1; | |||
| std::vector<int> input_shape = {1, 5, 4, 4}; | |||
| std::vector<int> indices_shape = {2}; | |||
| std::vector<int> output_shape = {1, 2, 4, 4}; | |||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; | |||
| float output_data[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |||
| 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; | |||
| int32_t indices_int32[] = {1, 3}; | |||
| int64_t indices_int64[] = {1, 3}; | |||
| float32_t indices_fp32[] = {1, 3}; | |||
| float16_t indices_fp16[] = {1, 3}; | |||
| TypeId data_types[] = {kNumberTypeInt32, kNumberTypeInt64, kNumberTypeFloat32, kNumberTypeFloat16}; | |||
| void *indices_datas[] = {indices_int32, indices_int64, indices_fp32, indices_fp16}; | |||
| for (int i = 0; i < 1; ++i) { | |||
| for (auto fp16_enable : {false}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain( | |||
| {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices_datas[i], VAR, data_types[i]}}, | |||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||
| } | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Gather, Axis2) { | |||
| int axis = 2; | |||
| std::vector<int> input_shape = {1, 5, 4, 4}; | |||
| @@ -96,6 +141,26 @@ TEST_F(TestOpenCL_Gather, Axis2) { | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Gather, Axis2_intensor1) { | |||
| int axis = 2; | |||
| std::vector<int> input_shape = {1, 5, 4, 4}; | |||
| std::vector<int> indices_shape = {2}; | |||
| std::vector<int> output_shape = {1, 5, 2, 4}; | |||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; | |||
| int32_t indices[] = {1, 3}; | |||
| float output_data[] = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, 36, 37, 38, 39, | |||
| 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79}; | |||
| for (auto fp16_enable : {false}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, | |||
| {output_shape, output_data}, param, fp16_enable); | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Gather, Axis3) { | |||
| int axis = 3; | |||
| std::vector<int> input_shape = {1, 5, 4, 4}; | |||
| @@ -117,4 +182,24 @@ TEST_F(TestOpenCL_Gather, Axis3) { | |||
| } | |||
| } | |||
| TEST_F(TestOpenCL_Gather, Axis3_intensor1) { | |||
| int axis = 3; | |||
| std::vector<int> input_shape = {1, 5, 4, 4}; | |||
| std::vector<int> indices_shape = {2}; | |||
| std::vector<int> output_shape = {1, 5, 4, 2}; | |||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; | |||
| int32_t indices[] = {1, 3}; | |||
| float output_data[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, | |||
| 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79}; | |||
| for (auto fp16_enable : {false}) { | |||
| auto *param = CreateParameter(axis); | |||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, | |||
| {output_shape, output_data}, param, fp16_enable); | |||
| } | |||
| } | |||
| } // namespace mindspore::lite::opencl::test | |||