solve gather ops issue_id=1276SX

5 years ago · 4e7d7458ce
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
@@ -39,12 +39,11 @@ int GatherOpenCLKernel::CheckSpecs() {
    MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1 output Tensor but get " << out_tensors_.size();
    return RET_ERROR;
  }
  if (in_tensors_.at(1)->category() == lite::Tensor::VAR) {
    MS_LOG(ERROR) << "GatherOpenCLKernel only supports indices Tensor is weight.";
  enable_fp16_ = ocl_runtime_->GetFp16Enable();
  if (!in_tensors_.at(1)->IsConst() && enable_fp16_) {
    MS_LOG(ERROR) << "GatherOpenCLKernel Unsupportted intensor1 = tensor and datatype = fp16  ";
    return RET_ERROR;
  }
  int input_ndim = in_tensors_.front()->shape().size();
  if (input_ndim < 0 || input_ndim > 4) {
    MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1-4D input Tensor but get " << input_ndim << "D.";
@@ -59,7 +58,7 @@ int GatherOpenCLKernel::CheckSpecs() {
  TypeId data_type = in_tensors_.at(1)->data_type();
  if (data_type != kNumberTypeInt32 && data_type != kNumberTypeInt64 && data_type != kNumberTypeFloat32 &&
      data_type != kNumberTypeFloat16) {
    MS_LOG(ERROR) << "Conv2D only supports Int32/Int64/Float32/Float16 indices Tensor.";
    MS_LOG(ERROR) << "GatherOpenCLKernel only supports Int32/Int64/Float32/Float16 indices Tensor.";
    return RET_ERROR;
  }
@@ -107,17 +106,51 @@ int GatherOpenCLKernel::Prepare() {
  ocl_runtime_->LoadSource(program_name, gather_source);
  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
 #endif
  if (!in_tensors_.at(1)->IsConst()) {
    intensor1_is_tensor = true;
  }
  int ret = InitWeights();
  if (ret != RET_OK) {
    return ret;
  if (!intensor1_is_tensor) {
    int ret = InitWeights();
    if (ret != RET_OK) {
      return ret;
    }
  }
  SetGlobalLocal();
  SetConstArgs();
  MS_LOG(DEBUG) << kernel_name << " Init Done!";
  return RET_OK;
 }
 int GatherOpenCLKernel::ConvertTensorToweight() {
  auto allocator = ocl_runtime_->GetAllocator();
  GpuTensorInfo img_info(in_tensors_[1]);
  size_t dtype = sizeof(cl_int);
  stride_w = img_info.RowPitch() / dtype;
  auto indices_tensor = in_tensors_.at(1);
  auto indices_num = indices_tensor->ElementsNum();
  indices_data_ = reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num));
  allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true);
  if (indices_data_ == nullptr) {
    MS_LOG(ERROR) << "Memory allocation failed";
    return RET_ERROR;
  }
  auto data_type = indices_tensor->data_type();
  auto data = indices_tensor->data_c();
  if (data_type == kNumberTypeInt32) {
    for (int i = 0; i < indices_num; i++) {
      indices_data_[i] = reinterpret_cast<int32_t *>(data)[i * stride_w];
    }
  } else {
    MS_LOG(ERROR) << "Gather Only supported The DataType Of Intensor1 is Int32  "
                  << " But Your Type is :" << data_type;
    return RET_ERROR;
  }
  allocator->UnmapBuffer(indices_data_);
  return RET_OK;
 }
 int GatherOpenCLKernel::InitWeights() {
  auto indices_tensor = in_tensors_.at(1);
  auto indices_num = indices_tensor->ElementsNum();
@@ -152,6 +185,9 @@ int GatherOpenCLKernel::InitWeights() {
 int GatherOpenCLKernel::Run() {
  MS_LOG(DEBUG) << this->name() << " Running! ";
  if (intensor1_is_tensor) {
    ConvertTensorToweight();
  }
  ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c());
  ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c());
  ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF);
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
@@ -39,6 +39,7 @@ class GatherOpenCLKernel : public OpenCLKernel {
  void SetConstArgs() override;
  void SetGlobalLocal() override;
  int Tune() override { return lite::RET_OK; }
  int ConvertTensorToweight();
 protected:
  int UpdateWeights();
@@ -46,6 +47,9 @@ class GatherOpenCLKernel : public OpenCLKernel {
 private:
  int32_t *indices_data_{nullptr};
  int axis_ = {0};
  bool intensor1_is_tensor{false};
  bool enable_fp16_{false};
  cl_int stride_w{1};
 };
 }  // namespace mindspore::kernel
 #endif
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
@@ -34,7 +34,7 @@ namespace mindspore::kernel {
 int ToFormatOpenCLKernel::CheckSpecs() {
  auto data_type = in_tensors_.front()->data_type();
  if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16) {
  if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16 && data_type != kNumberTypeInt32) {
    MS_LOG(ERROR) << "Unsupported data type " << data_type;
    return RET_ERROR;
  }
@@ -61,7 +61,8 @@ void ToFormatOpenCLKernel::SetGlobalLocal() {
 }
 int ToFormatOpenCLKernel::Prepare() {
  std::map<TypeId, std::string> dtype_str{{kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}};
  std::map<TypeId, std::string> dtype_str{
    {kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}, {kNumberTypeInt32, "float"}};
  std::string kernel_name;
  if (out_mem_type_ == MemType::IMG) {
    kernel_name = "to_format_NHWC_to_NHWC4_IMG_" + dtype_str[in_tensors_.front()->data_type()];
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc
@@ -68,7 +68,7 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std
        memcpy(tensor->MutableData(), input_data, tensor->Size());
      }
    } else {
      EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32);
      EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32);
      subgraph_inputs.push_back(tensor);
      subgraph_inputs_data[tensor] = reinterpret_cast<float *>(input_data);
    }
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc
@@ -46,6 +46,22 @@ TEST_F(TestOpenCL_Gather, Axis0) {
  }
 }
 TEST_F(TestOpenCL_Gather, Axis0_Tensor) {
  int axis = 0;
  std::vector<int> input_shape = {10};
  std::vector<int> indices_shape = {2};
  std::vector<int> output_shape = {2};
  float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
  int32_t indices[] = {1, 3};
  float output_data[] = {1, 3};
  for (auto fp16_enable : {false}) {
    auto *param = CreateParameter(axis);
    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
             {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9);
  }
 }
 TEST_F(TestOpenCL_Gather, Axis1) {
  int axis = 1;
  std::vector<int> input_shape = {1, 5, 4, 4};
@@ -75,6 +91,35 @@ TEST_F(TestOpenCL_Gather, Axis1) {
  }
 }
 TEST_F(TestOpenCL_Gather, Axis1_intensor1) {
  int axis = 1;
  std::vector<int> input_shape = {1, 5, 4, 4};
  std::vector<int> indices_shape = {2};
  std::vector<int> output_shape = {1, 2, 4, 4};
  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
                        40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
                        60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79};
  float output_data[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
                         48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
  int32_t indices_int32[] = {1, 3};
  int64_t indices_int64[] = {1, 3};
  float32_t indices_fp32[] = {1, 3};
  float16_t indices_fp16[] = {1, 3};
  TypeId data_types[] = {kNumberTypeInt32, kNumberTypeInt64, kNumberTypeFloat32, kNumberTypeFloat16};
  void *indices_datas[] = {indices_int32, indices_int64, indices_fp32, indices_fp16};
  for (int i = 0; i < 1; ++i) {
    for (auto fp16_enable : {false}) {
      auto *param = CreateParameter(axis);
      TestMain(
        {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices_datas[i], VAR, data_types[i]}},
        {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9);
    }
  }
 }
 TEST_F(TestOpenCL_Gather, Axis2) {
  int axis = 2;
  std::vector<int> input_shape = {1, 5, 4, 4};
@@ -96,6 +141,26 @@ TEST_F(TestOpenCL_Gather, Axis2) {
  }
 }
 TEST_F(TestOpenCL_Gather, Axis2_intensor1) {
  int axis = 2;
  std::vector<int> input_shape = {1, 5, 4, 4};
  std::vector<int> indices_shape = {2};
  std::vector<int> output_shape = {1, 5, 2, 4};
  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
                        40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
                        60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79};
  int32_t indices[] = {1, 3};
  float output_data[] = {4,  5,  6,  7,  12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, 36, 37, 38, 39,
                         44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79};
  for (auto fp16_enable : {false}) {
    auto *param = CreateParameter(axis);
    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
             {output_shape, output_data}, param, fp16_enable);
  }
 }
 TEST_F(TestOpenCL_Gather, Axis3) {
  int axis = 3;
  std::vector<int> input_shape = {1, 5, 4, 4};
@@ -117,4 +182,24 @@ TEST_F(TestOpenCL_Gather, Axis3) {
  }
 }
 TEST_F(TestOpenCL_Gather, Axis3_intensor1) {
  int axis = 3;
  std::vector<int> input_shape = {1, 5, 4, 4};
  std::vector<int> indices_shape = {2};
  std::vector<int> output_shape = {1, 5, 4, 2};
  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
                        40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
                        60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79};
  int32_t indices[] = {1, 3};
  float output_data[] = {1,  3,  5,  7,  9,  11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39,
                         41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79};
  for (auto fp16_enable : {false}) {
    auto *param = CreateParameter(axis);
    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
             {output_shape, output_data}, param, fp16_enable);
  }
 }
 }  // namespace mindspore::lite::opencl::test