| @@ -39,12 +39,11 @@ int GatherOpenCLKernel::CheckSpecs() { | |||||
| MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1 output Tensor but get " << out_tensors_.size(); | MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1 output Tensor but get " << out_tensors_.size(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (in_tensors_.at(1)->category() == lite::Tensor::VAR) { | |||||
| MS_LOG(ERROR) << "GatherOpenCLKernel only supports indices Tensor is weight."; | |||||
| enable_fp16_ = ocl_runtime_->GetFp16Enable(); | |||||
| if (!in_tensors_.at(1)->IsConst() && enable_fp16_) { | |||||
| MS_LOG(ERROR) << "GatherOpenCLKernel Unsupportted intensor1 = tensor and datatype = fp16 "; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| int input_ndim = in_tensors_.front()->shape().size(); | int input_ndim = in_tensors_.front()->shape().size(); | ||||
| if (input_ndim < 0 || input_ndim > 4) { | if (input_ndim < 0 || input_ndim > 4) { | ||||
| MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1-4D input Tensor but get " << input_ndim << "D."; | MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1-4D input Tensor but get " << input_ndim << "D."; | ||||
| @@ -59,7 +58,7 @@ int GatherOpenCLKernel::CheckSpecs() { | |||||
| TypeId data_type = in_tensors_.at(1)->data_type(); | TypeId data_type = in_tensors_.at(1)->data_type(); | ||||
| if (data_type != kNumberTypeInt32 && data_type != kNumberTypeInt64 && data_type != kNumberTypeFloat32 && | if (data_type != kNumberTypeInt32 && data_type != kNumberTypeInt64 && data_type != kNumberTypeFloat32 && | ||||
| data_type != kNumberTypeFloat16) { | data_type != kNumberTypeFloat16) { | ||||
| MS_LOG(ERROR) << "Conv2D only supports Int32/Int64/Float32/Float16 indices Tensor."; | |||||
| MS_LOG(ERROR) << "GatherOpenCLKernel only supports Int32/Int64/Float32/Float16 indices Tensor."; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -107,17 +106,51 @@ int GatherOpenCLKernel::Prepare() { | |||||
| ocl_runtime_->LoadSource(program_name, gather_source); | ocl_runtime_->LoadSource(program_name, gather_source); | ||||
| ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); | ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); | ||||
| #endif | #endif | ||||
| if (!in_tensors_.at(1)->IsConst()) { | |||||
| intensor1_is_tensor = true; | |||||
| } | |||||
| int ret = InitWeights(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| if (!intensor1_is_tensor) { | |||||
| int ret = InitWeights(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| } | } | ||||
| SetGlobalLocal(); | SetGlobalLocal(); | ||||
| SetConstArgs(); | SetConstArgs(); | ||||
| MS_LOG(DEBUG) << kernel_name << " Init Done!"; | MS_LOG(DEBUG) << kernel_name << " Init Done!"; | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int GatherOpenCLKernel::ConvertTensorToweight() { | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| GpuTensorInfo img_info(in_tensors_[1]); | |||||
| size_t dtype = sizeof(cl_int); | |||||
| stride_w = img_info.RowPitch() / dtype; | |||||
| auto indices_tensor = in_tensors_.at(1); | |||||
| auto indices_num = indices_tensor->ElementsNum(); | |||||
| indices_data_ = reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num)); | |||||
| allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true); | |||||
| if (indices_data_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto data_type = indices_tensor->data_type(); | |||||
| auto data = indices_tensor->data_c(); | |||||
| if (data_type == kNumberTypeInt32) { | |||||
| for (int i = 0; i < indices_num; i++) { | |||||
| indices_data_[i] = reinterpret_cast<int32_t *>(data)[i * stride_w]; | |||||
| } | |||||
| } else { | |||||
| MS_LOG(ERROR) << "Gather Only supported The DataType Of Intensor1 is Int32 " | |||||
| << " But Your Type is :" << data_type; | |||||
| return RET_ERROR; | |||||
| } | |||||
| allocator->UnmapBuffer(indices_data_); | |||||
| return RET_OK; | |||||
| } | |||||
| int GatherOpenCLKernel::InitWeights() { | int GatherOpenCLKernel::InitWeights() { | ||||
| auto indices_tensor = in_tensors_.at(1); | auto indices_tensor = in_tensors_.at(1); | ||||
| auto indices_num = indices_tensor->ElementsNum(); | auto indices_num = indices_tensor->ElementsNum(); | ||||
| @@ -152,6 +185,9 @@ int GatherOpenCLKernel::InitWeights() { | |||||
| int GatherOpenCLKernel::Run() { | int GatherOpenCLKernel::Run() { | ||||
| MS_LOG(DEBUG) << this->name() << " Running! "; | MS_LOG(DEBUG) << this->name() << " Running! "; | ||||
| if (intensor1_is_tensor) { | |||||
| ConvertTensorToweight(); | |||||
| } | |||||
| ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()); | ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()); | ||||
| ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()); | ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()); | ||||
| ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF); | ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF); | ||||
| @@ -39,6 +39,7 @@ class GatherOpenCLKernel : public OpenCLKernel { | |||||
| void SetConstArgs() override; | void SetConstArgs() override; | ||||
| void SetGlobalLocal() override; | void SetGlobalLocal() override; | ||||
| int Tune() override { return lite::RET_OK; } | int Tune() override { return lite::RET_OK; } | ||||
| int ConvertTensorToweight(); | |||||
| protected: | protected: | ||||
| int UpdateWeights(); | int UpdateWeights(); | ||||
| @@ -46,6 +47,9 @@ class GatherOpenCLKernel : public OpenCLKernel { | |||||
| private: | private: | ||||
| int32_t *indices_data_{nullptr}; | int32_t *indices_data_{nullptr}; | ||||
| int axis_ = {0}; | int axis_ = {0}; | ||||
| bool intensor1_is_tensor{false}; | |||||
| bool enable_fp16_{false}; | |||||
| cl_int stride_w{1}; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif | #endif | ||||
| @@ -34,7 +34,7 @@ namespace mindspore::kernel { | |||||
| int ToFormatOpenCLKernel::CheckSpecs() { | int ToFormatOpenCLKernel::CheckSpecs() { | ||||
| auto data_type = in_tensors_.front()->data_type(); | auto data_type = in_tensors_.front()->data_type(); | ||||
| if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16) { | |||||
| if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16 && data_type != kNumberTypeInt32) { | |||||
| MS_LOG(ERROR) << "Unsupported data type " << data_type; | MS_LOG(ERROR) << "Unsupported data type " << data_type; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -61,7 +61,8 @@ void ToFormatOpenCLKernel::SetGlobalLocal() { | |||||
| } | } | ||||
| int ToFormatOpenCLKernel::Prepare() { | int ToFormatOpenCLKernel::Prepare() { | ||||
| std::map<TypeId, std::string> dtype_str{{kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}}; | |||||
| std::map<TypeId, std::string> dtype_str{ | |||||
| {kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}, {kNumberTypeInt32, "float"}}; | |||||
| std::string kernel_name; | std::string kernel_name; | ||||
| if (out_mem_type_ == MemType::IMG) { | if (out_mem_type_ == MemType::IMG) { | ||||
| kernel_name = "to_format_NHWC_to_NHWC4_IMG_" + dtype_str[in_tensors_.front()->data_type()]; | kernel_name = "to_format_NHWC_to_NHWC4_IMG_" + dtype_str[in_tensors_.front()->data_type()]; | ||||
| @@ -68,7 +68,7 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std | |||||
| memcpy(tensor->MutableData(), input_data, tensor->Size()); | memcpy(tensor->MutableData(), input_data, tensor->Size()); | ||||
| } | } | ||||
| } else { | } else { | ||||
| EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32); | |||||
| EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32); | |||||
| subgraph_inputs.push_back(tensor); | subgraph_inputs.push_back(tensor); | ||||
| subgraph_inputs_data[tensor] = reinterpret_cast<float *>(input_data); | subgraph_inputs_data[tensor] = reinterpret_cast<float *>(input_data); | ||||
| } | } | ||||
| @@ -46,6 +46,22 @@ TEST_F(TestOpenCL_Gather, Axis0) { | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis0_Tensor) { | |||||
| int axis = 0; | |||||
| std::vector<int> input_shape = {10}; | |||||
| std::vector<int> indices_shape = {2}; | |||||
| std::vector<int> output_shape = {2}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; | |||||
| int32_t indices[] = {1, 3}; | |||||
| float output_data[] = {1, 3}; | |||||
| for (auto fp16_enable : {false}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | |||||
| } | |||||
| TEST_F(TestOpenCL_Gather, Axis1) { | TEST_F(TestOpenCL_Gather, Axis1) { | ||||
| int axis = 1; | int axis = 1; | ||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | std::vector<int> input_shape = {1, 5, 4, 4}; | ||||
| @@ -75,6 +91,35 @@ TEST_F(TestOpenCL_Gather, Axis1) { | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis1_intensor1) { | |||||
| int axis = 1; | |||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | |||||
| std::vector<int> indices_shape = {2}; | |||||
| std::vector<int> output_shape = {1, 2, 4, 4}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; | |||||
| float output_data[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |||||
| 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; | |||||
| int32_t indices_int32[] = {1, 3}; | |||||
| int64_t indices_int64[] = {1, 3}; | |||||
| float32_t indices_fp32[] = {1, 3}; | |||||
| float16_t indices_fp16[] = {1, 3}; | |||||
| TypeId data_types[] = {kNumberTypeInt32, kNumberTypeInt64, kNumberTypeFloat32, kNumberTypeFloat16}; | |||||
| void *indices_datas[] = {indices_int32, indices_int64, indices_fp32, indices_fp16}; | |||||
| for (int i = 0; i < 1; ++i) { | |||||
| for (auto fp16_enable : {false}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain( | |||||
| {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices_datas[i], VAR, data_types[i]}}, | |||||
| {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9); | |||||
| } | |||||
| } | |||||
| } | |||||
| TEST_F(TestOpenCL_Gather, Axis2) { | TEST_F(TestOpenCL_Gather, Axis2) { | ||||
| int axis = 2; | int axis = 2; | ||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | std::vector<int> input_shape = {1, 5, 4, 4}; | ||||
| @@ -96,6 +141,26 @@ TEST_F(TestOpenCL_Gather, Axis2) { | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis2_intensor1) { | |||||
| int axis = 2; | |||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | |||||
| std::vector<int> indices_shape = {2}; | |||||
| std::vector<int> output_shape = {1, 5, 2, 4}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; | |||||
| int32_t indices[] = {1, 3}; | |||||
| float output_data[] = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, 36, 37, 38, 39, | |||||
| 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79}; | |||||
| for (auto fp16_enable : {false}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | |||||
| } | |||||
| } | |||||
| TEST_F(TestOpenCL_Gather, Axis3) { | TEST_F(TestOpenCL_Gather, Axis3) { | ||||
| int axis = 3; | int axis = 3; | ||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | std::vector<int> input_shape = {1, 5, 4, 4}; | ||||
| @@ -117,4 +182,24 @@ TEST_F(TestOpenCL_Gather, Axis3) { | |||||
| } | } | ||||
| } | } | ||||
| TEST_F(TestOpenCL_Gather, Axis3_intensor1) { | |||||
| int axis = 3; | |||||
| std::vector<int> input_shape = {1, 5, 4, 4}; | |||||
| std::vector<int> indices_shape = {2}; | |||||
| std::vector<int> output_shape = {1, 5, 4, 2}; | |||||
| float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |||||
| 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | |||||
| 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, | |||||
| 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; | |||||
| int32_t indices[] = {1, 3}; | |||||
| float output_data[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, | |||||
| 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79}; | |||||
| for (auto fp16_enable : {false}) { | |||||
| auto *param = CreateParameter(axis); | |||||
| TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}}, | |||||
| {output_shape, output_data}, param, fp16_enable); | |||||
| } | |||||
| } | |||||
| } // namespace mindspore::lite::opencl::test | } // namespace mindspore::lite::opencl::test | ||||