From ff4b738c0d5ad597c3fe07250fcd950ce9e2cc6d Mon Sep 17 00:00:00 2001
From: yeyunpeng2020 <yeyunpeng1@huawei.com>
Date: Sun, 18 Apr 2021 21:18:26 +0800
Subject: [PATCH] fix opencl one_hot and support int32 data type.

---
 .../runtime/gpu/opencl/opencl_allocator.cc    |   2 +
 .../src/runtime/gpu/opencl/opencl_executor.cc |  44 ++--
 .../src/runtime/gpu/opencl/opencl_runtime.cc  |   5 +-
 .../src/runtime/gpu/opencl/opencl_runtime.h   |   2 +-
 .../src/runtime/kernel/opencl/cl/gather.cl    |  82 +++----
 .../src/runtime/kernel/opencl/cl/one_hot.cl   | 219 +++++++-----------
 .../src/runtime/kernel/opencl/cl/reshape.cl   | 103 ++++----
 .../src/runtime/kernel/opencl/cl/to_format.cl | 118 +++++-----
 .../runtime/kernel/opencl/kernel/concat.cc    |   2 +-
 .../runtime/kernel/opencl/kernel/gather.cc    |  20 +-
 .../runtime/kernel/opencl/kernel/one_hot.cc   |  18 +-
 .../runtime/kernel/opencl/kernel/one_hot.h    |   1 +
 .../runtime/kernel/opencl/kernel/reshape.cc   |  11 +-
 .../runtime/kernel/opencl/opencl_kernel.cc    |  12 +-
 .../src/runtime/kernel/opencl/opencl_kernel.h |   2 +-
 mindspore/lite/src/scheduler.cc               |   5 +-
 .../src/runtime/kernel/opencl/gather_tests.cc |  98 ++++----
 .../runtime/kernel/opencl/one_hot_tests.cc    |  52 ++---
 .../runtime/kernel/opencl/reshape_tests.cc    |  36 ++-
 19 files changed, 420 insertions(+), 412 deletions(-)
diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc
index 1837c39785..db7e0398ca 100644
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc
@@ -145,6 +145,8 @@ int OpenCLAllocator::GetImgDtypeSize(const ImageSize &img_size) {
     dtype_size = sizeof(cl_half);
   } else if (img_size.dtype == CL_SIGNED_INT8) {
     dtype_size = sizeof(cl_uchar);
+  } else if (img_size.dtype == CL_SIGNED_INT32) {
+    dtype_size = sizeof(cl_int);
   } else {
     MS_LOG(ERROR) << "Unsupported dtype " << img_size.dtype;
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc
index 3786b090c7..f3917e7462 100644
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.cc
@@ -61,23 +61,35 @@ int OpenCLExecutor::RunOrTune(const std::vector<Tensor *> &inputs, const std::ve
         return ret;
       }
     }
-    if (is_tune) {
-      ret = op_kernel->Tune();
-      if (ret != RET_OK) {
-        MS_LOG(ERROR) << "tuning kernel failed, name: " << kernel->name();
-        return ret;
-      }
-    } else {
-      ret = kernel->Run();
-      if (ret != RET_OK) {
-        MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
-        return ret;
+    // Support ZeroShape
+    size_t zero_shape_num = 0;
+    for (auto tensor : kernel->out_tensors()) {
+      for (size_t i = 0; i < tensor->shape().size(); i++) {
+        if (tensor->shape()[i] == 0) {
+          zero_shape_num++;
+          break;
+        }
       }
-      if (profiling_tmp) {
-        auto execute_time = op_kernel->GetProfilingTimeMs();
-        MS_LOG(INFO) << "OpenCl kernel " << kernel->name() << "(" << kernel->type_str()
-                     << ") execute time is: " << op_kernel->GetProfilingTimeMs() << "ms";
-        callbackParam.execute_time = execute_time;
+    }
+    if (zero_shape_num != kernel->out_tensors().size()) {
+      if (is_tune) {
+        ret = op_kernel->Tune();
+        if (ret != RET_OK) {
+          MS_LOG(ERROR) << "tuning kernel failed, name: " << kernel->name();
+          return ret;
+        }
+      } else {
+        ret = kernel->Run();
+        if (ret != RET_OK) {
+          MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
+          return ret;
+        }
+        if (profiling_tmp) {
+          auto execute_time = op_kernel->GetProfilingTimeMs();
+          MS_LOG(INFO) << "OpenCl kernel " << kernel->name() << "(" << kernel->type_str()
+                       << ") execute time is: " << op_kernel->GetProfilingTimeMs() << "ms";
+          callbackParam.execute_time = execute_time;
+        }
       }
     }
     ret = kernel->PostProcess();
diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc
index 6b831146ae..df333f7df9 100644
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc
@@ -369,10 +369,9 @@ bool OpenCLRuntime::SetFp16Enable(bool enable) {
 }
 
 int OpenCLRuntime::BuildKernel(const cl::Kernel &kernel, const std::string &program_name,
-                               const std::string &kernel_name, const std::vector<std::string> &build_options_ext,
-                               TypeId data_type) {
+                               const std::string &kernel_name, const std::vector<std::string> &build_options_ext) {
   std::string build_option = default_build_option_;
-  if (fp16_enable_ && data_type != kNumberTypeInt32) {
+  if (fp16_enable_) {
     build_option +=
       " -DFP16_ENABLE=1 -DFLT=half -DFLT4=half4 -DFLT16=half16 -DAS_FLT4=as_half4 -DAS_UINT4=as_ushort4 -DUINT4=ushort4"
       " -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh -DTO_FLT=convert_half -DTO_FLT4=convert_half4";
diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h
index c642c0b971..d67378c025 100644
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h
@@ -120,7 +120,7 @@ class OpenCLRuntime {
   std::vector<unsigned char> GetProgramBinary(const cl::Program &program);
   bool LoadSource(const std::string &program_name, const std::string &source);
   int BuildKernel(const cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name,
-                  const std::vector<std::string> &build_options_ext = {}, TypeId data_type = kNumberTypeFloat32);
+                  const std::vector<std::string> &build_options_ext = {});
   int RunKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local,
                 cl::CommandQueue *command_queue = nullptr, cl::Event *event = nullptr);
   int ReadOrWriteImage(void *buffer, void *data, bool is_read);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl
index 97a68da12d..36edf3a38f 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl
@@ -2,43 +2,47 @@
 #define C4NUM 4
 __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
 
-__kernel void gather(__write_only image2d_t dst_data, __read_only image2d_t src_data, __global int *indices,
-                     int4 src_size, int4 dst_size, int indices_num, int axis) {
-  int X = get_global_id(0);  // w
-  int Y = get_global_id(1);  // n*h
-  int Z = get_global_id(2);  // c
-  if (X >= dst_size.x || Y >= dst_size.y * dst_size.w || Z >= dst_size.z || dst_size.y == 0) {
-    return;
+#define GATHER(SUFFIX, READ_IMAGE, WRITE_IMAGE, TYPE)                                                                \
+  __kernel void gather_##SUFFIX(__write_only image2d_t dst_data, __read_only image2d_t src_data,                     \
+                                __global int *indices, int4 src_size, int4 dst_size, int indices_num, int axis) {    \
+    int X = get_global_id(0);                                                                                        \
+    int Y = get_global_id(1);                                                                                        \
+    int Z = get_global_id(2);                                                                                        \
+    if (X >= dst_size.x || Y >= dst_size.y * dst_size.w || Z >= dst_size.z || dst_size.y == 0) {                     \
+      return;                                                                                                        \
+    }                                                                                                                \
+    TYPE##4 res_data = (TYPE##4)(0.0f, 0.0f, 0.0f, 0.0f);                                                            \
+    int batch = Y / dst_size.y;                                                                                      \
+    int height = Y % dst_size.y;                                                                                     \
+    if (axis == 0) {                                                                                                 \
+      res_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + Z, indices[batch] * src_size.y + height));   \
+    } else if (axis == 1) {                                                                                          \
+      res_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + Z, batch * src_size.y + indices[height]));   \
+    } else if (axis == 2) {                                                                                          \
+      res_data = READ_IMAGE(src_data, smp_zero, (int2)(indices[X] * src_size.z + Z, batch * src_size.y + height));   \
+    } else if (axis == 3) {                                                                                          \
+      int offset[4] = {indices[Z * 4] / 4, indices[Z * 4 + 1] / 4, indices[Z * 4 + 2] / 4, indices[Z * 4 + 3] / 4};  \
+      TYPE tmp[4];                                                                                                   \
+      TYPE res_tmp[4];                                                                                               \
+      for (int i = 0; i < indices_num; ++i) {                                                                        \
+        TYPE##4 rd_data = (TYPE##4)(0.0f, 0.0f, 0.0f, 0.0f);                                                         \
+        rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height));   \
+        if (i >= 1 && offset[i] != offset[i - 1]) {                                                                  \
+          rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height)); \
+        }                                                                                                            \
+        tmp[0] = rd_data.x;                                                                                          \
+        tmp[1] = rd_data.y;                                                                                          \
+        tmp[2] = rd_data.z;                                                                                          \
+        tmp[3] = rd_data.w;                                                                                          \
+        res_tmp[i] = tmp[indices[Z * 4 + i] % 4];                                                                    \
+      }                                                                                                              \
+      res_data.x = res_tmp[0];                                                                                       \
+      res_data.y = res_tmp[1];                                                                                       \
+      res_data.z = res_tmp[2];                                                                                       \
+      res_data.w = res_tmp[3];                                                                                       \
+    }                                                                                                                \
+    WRITE_IMAGE(dst_data, (int2)(X * dst_size.z + Z, batch * dst_size.y + height), res_data);                        \
   }
-  FLT4 res_data = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);
-  int batch = Y / dst_size.y;
-  int height = Y % dst_size.y;
-  if (axis == 0) {
-    res_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + Z, indices[batch] * src_size.y + height));
-  } else if (axis == 1) {
-    res_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + Z, batch * src_size.y + indices[height]));
-  } else if (axis == 2) {
-    res_data = READ_IMAGE(src_data, smp_zero, (int2)(indices[X] * src_size.z + Z, batch * src_size.y + height));
-  } else if (axis == 3) {
-    int offset[4] = {indices[Z * 4] / 4, indices[Z * 4 + 1] / 4, indices[Z * 4 + 2] / 4, indices[Z * 4 + 3] / 4};
-    FLT tmp[4];
-    FLT res_tmp[4];
-    for (int i = 0; i < indices_num; ++i) {
-      FLT4 rd_data = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);
-      rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height));
-      if (i >= 1 && offset[i] != offset[i - 1]) {
-        rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X * src_size.z + offset[i], batch * src_size.y + height));
-      }
-      tmp[0] = rd_data.x;
-      tmp[1] = rd_data.y;
-      tmp[2] = rd_data.z;
-      tmp[3] = rd_data.w;
-      res_tmp[i] = tmp[indices[Z * 4 + i] % 4];
-    }
-    res_data.x = res_tmp[0];
-    res_data.y = res_tmp[1];
-    res_data.z = res_tmp[2];
-    res_data.w = res_tmp[3];
-  }
-  WRITE_IMAGE(dst_data, (int2)(X * dst_size.z + Z, batch * dst_size.y + height), res_data);
-}
+// GATHER(SUFFIX, READ_IMAGE, WRITE_IMAGE, TYPE)
+GATHER(float, READ_IMAGE, WRITE_IMAGE, FLT);
+GATHER(int, read_imagei, write_imagei, int);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/one_hot.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/one_hot.cl
index 872cd054ea..49ebd72ad8 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/one_hot.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/one_hot.cl
@@ -4,9 +4,17 @@
 
 #define C4NUM 4
 #define UP_DIV(x, y) (((x) + (y) - (1)) / (y))
+
+#define SET_ON_OR_OFF_VALUE(RESULT, POSITION, INDICES, ON_VALUE, OFF_VALUE) \
+  if (POSITION == INDICES) {                                                \
+    RESULT = (float)(ON_VALUE);                                             \
+  } else {                                                                  \
+    RESULT = (float)(OFF_VALUE);                                            \
+  }
+
 __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
 __kernel void OneHotAxis0(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape,
-                          int4 out_shape, int depth, float on_value, float off_value, int C) {
+                          int4 out_shape, int depth, float on_value, float off_value, int C, int support_neg_index) {
   int X = get_global_id(0);  // C4
   int Y = get_global_id(1);  // W
   int Z = get_global_id(2);  // H * N
@@ -14,42 +22,31 @@ __kernel void OneHotAxis0(__read_only image2d_t src_data, __write_only image2d_t
   int N = Z / out_shape.y;
   int H = Z % out_shape.y;
   int in_index = (H * out_shape.z + Y) * out_shape.w + X;
-  FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x));
+  int4 indices = read_imagei(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x));
   int *indices_int = (int *)&indices;
-  FLT4 result = (FLT4)(0.f);
-  if (4 * X < C) {
-    if (indices_int[0] == N) {
-      result.x = (FLT)(on_value);
-    } else {
-      result.x = (FLT)(off_value);
+  for (int i = 0; i < C4NUM; i++) {
+    if (support_neg_index != 0 && indices_int[i] < 0) {
+      indices_int[i] += depth;
     }
   }
+  float4 result = (float4)(0.f);
+  if (4 * X < C) {
+    SET_ON_OR_OFF_VALUE(result.x, N, indices_int[0], on_value, off_value);
+  }
   if (4 * X + 1 < C) {
-    if (indices_int[1] == N) {
-      result.y = (FLT)(on_value);
-    } else {
-      result.y = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.y, N, indices_int[1], on_value, off_value);
   }
   if (4 * X + 2 < C) {
-    if (indices_int[2] == N) {
-      result.z = (FLT)(on_value);
-    } else {
-      result.z = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.z, N, indices_int[2], on_value, off_value);
   }
   if (4 * X + 3 < C) {
-    if (indices_int[3] == N) {
-      result.w = (FLT)(on_value);
-    } else {
-      result.w = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.w, N, indices_int[3], on_value, off_value);
   }
-  WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result);
+  write_imagef(dst_data, (int2)(Y * out_shape.w + X, Z), result);
 }
 
 __kernel void OneHotAxis1(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape,
-                          int4 out_shape, int depth, float on_value, float off_value, int C) {
+                          int4 out_shape, int depth, float on_value, float off_value, int C, int support_neg_index) {
   int X = get_global_id(0);  // C4
   int Y = get_global_id(1);  // W
   int Z = get_global_id(2);  // H * N
@@ -57,42 +54,31 @@ __kernel void OneHotAxis1(__read_only image2d_t src_data, __write_only image2d_t
   int N = Z / out_shape.y;
   int H = Z % out_shape.y;
   int in_index = (N * out_shape.z + Y) * out_shape.w + X;
-  FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x));
+  int4 indices = read_imagei(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x));
   int *indices_int = (int *)&indices;
-  FLT4 result = (FLT4)(0.f);
-  if (4 * X < C) {
-    if (indices_int[0] == H) {
-      result.x = (FLT)(on_value);
-    } else {
-      result.x = (FLT)(off_value);
+  for (int i = 0; i < C4NUM; i++) {
+    if (support_neg_index != 0 && indices_int[i] < 0) {
+      indices_int[i] += depth;
     }
   }
+  float4 result = (float4)(0.f);
+  if (4 * X < C) {
+    SET_ON_OR_OFF_VALUE(result.x, H, indices_int[0], on_value, off_value);
+  }
   if (4 * X + 1 < C) {
-    if (indices_int[1] == H) {
-      result.y = (FLT)(on_value);
-    } else {
-      result.y = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.y, H, indices_int[1], on_value, off_value);
   }
   if (4 * X + 2 < C) {
-    if (indices_int[2] == H) {
-      result.z = (FLT)(on_value);
-    } else {
-      result.z = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.z, H, indices_int[2], on_value, off_value);
   }
   if (4 * X + 3 < C) {
-    if (indices_int[3] == H) {
-      result.w = (FLT)(on_value);
-    } else {
-      result.w = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.w, H, indices_int[3], on_value, off_value);
   }
-  WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result);
+  write_imagef(dst_data, (int2)(Y * out_shape.w + X, Z), result);
 }
 
 __kernel void OneHotAxis2(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape,
-                          int4 out_shape, int depth, float on_value, float off_value, int C) {
+                          int4 out_shape, int depth, float on_value, float off_value, int C, int support_neg_index) {
   int X = get_global_id(0);  // C4
   int Y = get_global_id(1);  // W
   int Z = get_global_id(2);  // H * N
@@ -100,42 +86,31 @@ __kernel void OneHotAxis2(__read_only image2d_t src_data, __write_only image2d_t
   int N = Z / out_shape.y;
   int H = Z % out_shape.y;
   int in_index = (N * out_shape.y + H) * out_shape.w + X;
-  FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x));
+  int4 indices = read_imagei(src_data, smp_zero, (int2)(in_index % in_image2d_shape.x, in_index / in_image2d_shape.x));
   int *indices_int = (int *)&indices;
-  FLT4 result = (FLT4)(0.f);
-  if (4 * X < C) {
-    if (indices_int[0] == Y) {
-      result.x = (FLT)(on_value);
-    } else {
-      result.x = (FLT)(off_value);
+  for (int i = 0; i < C4NUM; i++) {
+    if (support_neg_index != 0 && indices_int[i] < 0) {
+      indices_int[i] += depth;
     }
   }
+  float4 result = (float4)(0.f);
+  if (4 * X < C) {
+    SET_ON_OR_OFF_VALUE(result.x, Y, indices_int[0], on_value, off_value);
+  }
   if (4 * X + 1 < C) {
-    if (indices_int[1] == Y) {
-      result.y = (FLT)(on_value);
-    } else {
-      result.y = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.y, Y, indices_int[1], on_value, off_value);
   }
   if (4 * X + 2 < C) {
-    if (indices_int[2] == Y) {
-      result.z = (FLT)(on_value);
-    } else {
-      result.z = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.z, Y, indices_int[2], on_value, off_value);
   }
   if (4 * X + 3 < C) {
-    if (indices_int[3] == Y) {
-      result.w = (FLT)(on_value);
-    } else {
-      result.w = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.w, Y, indices_int[3], on_value, off_value);
   }
-  WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result);
+  write_imagef(dst_data, (int2)(Y * out_shape.w + X, Z), result);
 }
 
 __kernel void OneHotAxis3(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape,
-                          int4 out_shape, int depth, float on_value, float off_value, int C) {
+                          int4 out_shape, int depth, float on_value, float off_value, int C, int support_neg_index) {
   int X = get_global_id(0);  // C4
   int Y = get_global_id(1);  // W
   int Z = get_global_id(2);  // H * N
@@ -145,88 +120,54 @@ __kernel void OneHotAxis3(__read_only image2d_t src_data, __write_only image2d_t
   int ci4_size = UP_DIV(out_shape.z, C4NUM);
   int in_index_c4 = (N * out_shape.y + H) * ci4_size + Y / 4;
   int in_index_c4_remainder = Y % 4;
-  FLT4 indices =
-    READ_IMAGE(src_data, smp_zero, (int2)(in_index_c4 % in_image2d_shape.x, in_index_c4 / in_image2d_shape.x));
+  int4 indices =
+    read_imagei(src_data, smp_zero, (int2)(in_index_c4 % in_image2d_shape.x, in_index_c4 / in_image2d_shape.x));
   int *indices_int = (int *)&indices;
   int index_one = indices_int[in_index_c4_remainder];
-  FLT4 result = (FLT4)(0.f);
+  if (support_neg_index != 0 && index_one < 0) {
+    index_one += depth;
+  }
+  float4 result = (float4)(0.f);
   if (4 * X < C) {
-    if (index_one == 4 * X) {
-      result.x = (FLT)(on_value);
-    } else {
-      result.x = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.x, 4 * X, index_one, on_value, off_value);
   }
   if (4 * X + 1 < C) {
-    if (index_one == 4 * X + 1) {
-      result.y = (FLT)(on_value);
-    } else {
-      result.y = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.y, 4 * X + 1, index_one, on_value, off_value);
   }
   if (4 * X + 2 < C) {
-    if (index_one == 4 * X + 2) {
-      result.z = (FLT)(on_value);
-    } else {
-      result.z = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.z, 4 * X + 2, index_one, on_value, off_value);
   }
   if (4 * X + 3 < C) {
-    if (index_one == 4 * X + 3) {
-      result.w = (FLT)(on_value);
-    } else {
-      result.w = (FLT)(off_value);
-    }
+    SET_ON_OR_OFF_VALUE(result.w, 4 * X + 3, index_one, on_value, off_value);
   }
-  WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result);
+  write_imagef(dst_data, (int2)(Y * out_shape.w + X, Z), result);
 }
 
-__kernel void OneHot2DAxis0(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape,
-                            int4 out_shape, int depth, float on_value, float off_value, int C) {
+__kernel void OneHot2DAxis3(__read_only image2d_t src_data, __write_only image2d_t dst_data, int2 in_image2d_shape,
+                            int4 out_shape, int depth, float on_value, float off_value, int C, int support_neg_index) {
   int X = get_global_id(0);  // C4
-  int Y = get_global_id(1);  // W
-  int Z = get_global_id(2);  // N
+  int Y = get_global_id(1);  // W (out_shape.w is 1, Y is always 0)
+  int Z = get_global_id(2);  // H * N (out_shape.h is 1, so N == Z)
   if (X >= out_shape.w || Y >= out_shape.z || Z >= out_shape.x * out_shape.y) return;
-  FLT4 result = (FLT4)(0.f);
-  int channel = 4 * X;
-  if (channel < C) {
-    FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(0, channel));
-    int index = ((int *)&indices)[0];
-    if (index == Z) {
-      result.x = (FLT)(on_value);
-    } else {
-      result.x = (FLT)(off_value);
-    }
+  int in_index_c4_remainder = Z % 4;
+  int4 indices = read_imagei(src_data, smp_zero, (int2)(Z / C4NUM, 0));
+  int *indices_int = (int *)&indices;
+  int index_one = indices_int[in_index_c4_remainder];
+  if (support_neg_index != 0 && index_one < 0) {
+    index_one += depth;
   }
-  channel++;
-  if (channel < C) {
-    FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(0, channel));
-    int index = ((int *)&indices)[0];
-    if (index == Z) {
-      result.y = (FLT)(on_value);
-    } else {
-      result.y = (FLT)(off_value);
-    }
+  float4 result = (float4)(0.f);
+  if (4 * X < C) {
+    SET_ON_OR_OFF_VALUE(result.x, 4 * X, index_one, on_value, off_value);
   }
-  channel++;
-  if (channel < C) {
-    FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(0, channel));
-    int index = ((int *)&indices)[0];
-    if (index == Z) {
-      result.z = (FLT)(on_value);
-    } else {
-      result.z = (FLT)(off_value);
-    }
+  if (4 * X + 1 < C) {
+    SET_ON_OR_OFF_VALUE(result.y, 4 * X + 1, index_one, on_value, off_value);
   }
-  channel++;
-  if (channel < C) {
-    FLT4 indices = READ_IMAGE(src_data, smp_zero, (int2)(0, channel));
-    int index = ((int *)&indices)[0];
-    if (index == Z) {
-      result.w = (FLT)(on_value);
-    } else {
-      result.w = (FLT)(off_value);
-    }
+  if (4 * X + 2 < C) {
+    SET_ON_OR_OFF_VALUE(result.z, 4 * X + 2, index_one, on_value, off_value);
+  }
+  if (4 * X + 3 < C) {
+    SET_ON_OR_OFF_VALUE(result.w, 4 * X + 3, index_one, on_value, off_value);
   }
-  WRITE_IMAGE(dst_data, (int2)(Y * out_shape.w + X, Z), result);
+  write_imagef(dst_data, (int2)(Y * out_shape.w + X, Z), result);
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/reshape.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/reshape.cl
index dfa8e25f8f..2acccbdf9b 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/reshape.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/reshape.cl
@@ -2,60 +2,53 @@
 #define C4NUM 4
 #define UP_DIV(x, y) (((x) + (y) - (1)) / (y))
 __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
-__kernel void reshape_NHWC4(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 src_size,
-                            int4 dst_size) {
-  int X = get_global_id(0);
-  int Y = get_global_id(1);
-  int CO4 = UP_DIV(dst_size.z, C4NUM);
-  int CO4_rem = dst_size.z % C4NUM;
-  if (X >= dst_size.x || Y > dst_size.y) {
-    return;
-  }
-  int CI4 = UP_DIV(src_size.x, C4NUM);
-  int CI4_rem = src_size.x % C4NUM;
-  CI4_rem = (CI4_rem == 0) ? C4NUM : CI4_rem;
-  int in_img_x = CI4 * src_size.y;
-  FLT4 res = (FLT4)(0.0f);
-  FLT tmp[4];
-  FLT res_tmp[4];
-  int gcnt = 0;
-  if (CO4_rem == 0 && ((CI4_rem & 0x3) == 0)) {
-    gcnt = X + dst_size.x * Y;
-    res = READ_IMAGE(src_data, smp_zero, (int2)(gcnt % in_img_x, gcnt / in_img_x));
-    WRITE_IMAGE(dst_data, (int2)(X, Y), res);
-  } else {
-    int start = ((X / CO4 * dst_size.z + min(dst_size.z, (X % CO4) * C4NUM)) + dst_size.w * Y);
-    gcnt = start / src_size.x * CI4 + (start % src_size.x) / C4NUM;
-    start = start % src_size.x % C4NUM;
-    for (int i = 0, n = 0, j = start; i < C4NUM; ++n, j = 0) {
-      int X_src = (gcnt + n) % in_img_x;
-      res = READ_IMAGE(src_data, smp_zero, (int2)(X_src, (gcnt + n) / in_img_x));
-      tmp[0] = res.x;
-      tmp[1] = res.y;
-      tmp[2] = res.z;
-      tmp[3] = res.w;
-      int k = (X_src % CI4) == (CI4 - 1) ? CI4_rem : C4NUM;
-      for (; j < k && i < C4NUM; ++j, ++i) {
-        res_tmp[i] = tmp[j];
-      }
-    }
-    res.x = res_tmp[0];
-    res.y = res_tmp[1];
-    res.z = res_tmp[2];
-    res.w = res_tmp[3];
-    WRITE_IMAGE(dst_data, (int2)(X, Y), res);
-  }
-}
 
-__kernel void reshape_NC4HW4(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 src_size,
-                             int4 dst_size) {
-  int X = get_global_id(0);
-  int Y = get_global_id(1);
-  if (X >= dst_size.x || Y > dst_size.y) {
-    return;
+#define RESHAPE_NHWC4(SUFFIX, READ_IMAGE, WRITE_IMAGE, TYPE)                                                           \
+  __kernel void reshape_NHWC4_##SUFFIX(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 src_size, \
+                                       int4 dst_size) {                                                                \
+    int X = get_global_id(0);                                                                                          \
+    int Y = get_global_id(1);                                                                                          \
+    int CO4 = UP_DIV(dst_size.z, C4NUM);                                                                               \
+    int CO4_rem = dst_size.z % C4NUM;                                                                                  \
+    if (X >= dst_size.x || Y > dst_size.y) {                                                                           \
+      return;                                                                                                          \
+    }                                                                                                                  \
+    int CI4 = UP_DIV(src_size.x, C4NUM);                                                                               \
+    int CI4_rem = src_size.x % C4NUM;                                                                                  \
+    CI4_rem = (CI4_rem == 0) ? C4NUM : CI4_rem;                                                                        \
+    int in_img_x = CI4 * src_size.y;                                                                                   \
+    TYPE##4 res = (TYPE##4)(0.0f);                                                                                     \
+    TYPE tmp[4];                                                                                                       \
+    TYPE res_tmp[4];                                                                                                   \
+    int gcnt = 0;                                                                                                      \
+    if (CO4_rem == 0 && ((CI4_rem & 0x3) == 0)) {                                                                      \
+      gcnt = X + dst_size.x * Y;                                                                                       \
+      res = READ_IMAGE(src_data, smp_zero, (int2)(gcnt % in_img_x, gcnt / in_img_x));                                  \
+      WRITE_IMAGE(dst_data, (int2)(X, Y), res);                                                                        \
+    } else {                                                                                                           \
+      int start = ((X / CO4 * dst_size.z + min(dst_size.z, (X % CO4) * C4NUM)) + dst_size.w * Y);                      \
+      gcnt = start / src_size.x * CI4 + (start % src_size.x) / C4NUM;                                                  \
+      start = start % src_size.x % C4NUM;                                                                              \
+      for (int i = 0, n = 0, j = start; i < C4NUM; ++n, j = 0) {                                                       \
+        int X_src = (gcnt + n) % in_img_x;                                                                             \
+        res = READ_IMAGE(src_data, smp_zero, (int2)(X_src, (gcnt + n) / in_img_x));                                    \
+        tmp[0] = res.x;                                                                                                \
+        tmp[1] = res.y;                                                                                                \
+        tmp[2] = res.z;                                                                                                \
+        tmp[3] = res.w;                                                                                                \
+        int k = (X_src % CI4) == (CI4 - 1) ? CI4_rem : C4NUM;                                                          \
+        for (; j < k && i < C4NUM; ++j, ++i) {                                                                         \
+          res_tmp[i] = tmp[j];                                                                                         \
+        }                                                                                                              \
+      }                                                                                                                \
+      res.x = res_tmp[0];                                                                                              \
+      res.y = res_tmp[1];                                                                                              \
+      res.z = res_tmp[2];                                                                                              \
+      res.w = res_tmp[3];                                                                                              \
+      WRITE_IMAGE(dst_data, (int2)(X, Y), res);                                                                        \
+    }                                                                                                                  \
   }
-  int CI4 = UP_DIV(src_size.x, C4NUM);
-  int in_img_x = CI4 * src_size.y;
-  int gcnt = X + dst_size.x * Y;
-  WRITE_IMAGE(dst_data, (int2)(X, Y), READ_IMAGE(src_data, smp_zero, (int2)(gcnt % in_img_x, gcnt / in_img_x)));
-}
+
+// RESHAPE_NHWC4(SUFFIX, READ_IMAGE, WRITE_IMAGE, TYPE)
+RESHAPE_NHWC4(float, READ_IMAGE, WRITE_IMAGE, FLT);
+RESHAPE_NHWC4(int, read_imagei, write_imagei, int);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/to_format.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/to_format.cl
index dff7bc7c29..e59446c7db 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/to_format.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/to_format.cl
@@ -1,82 +1,82 @@
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
 
-#define BUF_to_IMG(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, WRITE_IMAGE_OUT)                                      \
-  __kernel void BUF_to_IMG_##src_dtype##_##dst_dtype(__global SRC_TYPE##4 *src_data, __write_only image2d_t dst_data, \
-                                                     int4 size, int4 shape) {                                    \
-    int X = get_global_id(0);                                                                                    \
-    int Y = get_global_id(1);                                                                                    \
-    int Z = get_global_id(2);                                                                                    \
-    if (X >= size.x || Y >= size.y || Z >= size.z) {                                                             \
-      return;                                                                                                    \
-    }                                                                                                            \
-    DST_TYPE##4 data = (DST_TYPE##4)(0.f);                                                                         \
-    int offset = (X * shape.z + Y) * shape.w + Z * 4;                                                            \
-    __global SRC_TYPE *src_addr = (__global SRC_TYPE *)src_data;                                                   \
-    src_addr += offset;                                                                                          \
-    if ((Z + 1) * 4 <= shape.w) {                                                                                \
-      data = convert_##DST_TYPE##4(((__global SRC_TYPE##4 *)src_addr)[0]);                                         \
-    } else {                                                                                                     \
-      if ((shape.w - Z * 4) >= 1) {                                                                              \
-        data.x = (DST_TYPE)src_addr[0];                                                                           \
-      }                                                                                                          \
-      if ((shape.w - Z * 4) >= 2) {                                                                              \
-        data.y = (DST_TYPE)src_addr[1];                                                                           \
-      }                                                                                                          \
-      if ((shape.w - Z * 4) >= 3) {                                                                              \
-        data.z = (DST_TYPE)src_addr[2];                                                                           \
-      }                                                                                                          \
-    }                                                                                                            \
-    if (size.y * size.z <= MAX_IMAGE2D_WIDTH)                                                                    \
-      WRITE_IMAGE_OUT(dst_data, (int2)(Y * size.z + Z, X), data);                                                \
-    else                                                                                                         \
-      WRITE_IMAGE_OUT(dst_data, (int2)(Z, X * size.y + Y), data);                                                \
+#define BUF_to_IMG(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, WRITE_IMAGE_OUT)                                          \
+  __kernel void BUF_to_IMG_##src_dtype##_##dst_dtype(__global SRC_TYPE##4 * src_data, __write_only image2d_t dst_data, \
+                                                     int4 size, int4 shape) {                                          \
+    int X = get_global_id(0);                                                                                          \
+    int Y = get_global_id(1);                                                                                          \
+    int Z = get_global_id(2);                                                                                          \
+    if (X >= size.x || Y >= size.y || Z >= size.z) {                                                                   \
+      return;                                                                                                          \
+    }                                                                                                                  \
+    DST_TYPE##4 data = (DST_TYPE##4)(0.f);                                                                             \
+    int offset = (X * shape.z + Y) * shape.w + Z * 4;                                                                  \
+    __global SRC_TYPE *src_addr = (__global SRC_TYPE *)src_data;                                                       \
+    src_addr += offset;                                                                                                \
+    if ((Z + 1) * 4 <= shape.w) {                                                                                      \
+      data = convert_##DST_TYPE##4(((__global SRC_TYPE##4 *)src_addr)[0]);                                             \
+    } else {                                                                                                           \
+      if ((shape.w - Z * 4) >= 1) {                                                                                    \
+        data.x = (DST_TYPE)src_addr[0];                                                                                \
+      }                                                                                                                \
+      if ((shape.w - Z * 4) >= 2) {                                                                                    \
+        data.y = (DST_TYPE)src_addr[1];                                                                                \
+      }                                                                                                                \
+      if ((shape.w - Z * 4) >= 3) {                                                                                    \
+        data.z = (DST_TYPE)src_addr[2];                                                                                \
+      }                                                                                                                \
+    }                                                                                                                  \
+    if (size.y * size.z <= MAX_IMAGE2D_WIDTH)                                                                          \
+      WRITE_IMAGE_OUT(dst_data, (int2)(Y * size.z + Z, X), data);                                                      \
+    else                                                                                                               \
+      WRITE_IMAGE_OUT(dst_data, (int2)(Z, X * size.y + Y), data);                                                      \
   }
 
 // BUF_to_IMG(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, WRITE_IMAGE_OUT)
 BUF_to_IMG(float32, float32, float, float, write_imagef);
 BUF_to_IMG(float32, float16, float, half, write_imageh);
 BUF_to_IMG(float16, float16, half, half, write_imageh);
-BUF_to_IMG(int32, int32, float, float, write_imagef);
-BUF_to_IMG(uint32, uint32, float, float, write_imagef);
+BUF_to_IMG(int32, int32, int, int, write_imagei);
+BUF_to_IMG(uint32, uint32, int, int, write_imagei);
 BUF_to_IMG(int8, int8, char, int, write_imagei);
 
-#define IMG_to_BUF(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, READ_IMAGE_IN)                                            \
+#define IMG_to_BUF(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, READ_IMAGE_IN)                                           \
   __kernel void IMG_to_BUF_##src_dtype##_##dst_dtype(__read_only image2d_t src_data, __global DST_TYPE##4 * dst_data, \
-                                                     int4 size, int4 shape) {                                        \
-    int X = get_global_id(0);                                                                                        \
-    int Y = get_global_id(1);                                                                                        \
-    int Z = get_global_id(2);                                                                                        \
-    if (X >= size.x || Y >= size.y || Z >= size.z) {                                                                 \
-      return;                                                                                                        \
-    }                                                                                                                \
+                                                     int4 size, int4 shape) {                                         \
+    int X = get_global_id(0);                                                                                         \
+    int Y = get_global_id(1);                                                                                         \
+    int Z = get_global_id(2);                                                                                         \
+    if (X >= size.x || Y >= size.y || Z >= size.z) {                                                                  \
+      return;                                                                                                         \
+    }                                                                                                                 \
     DST_TYPE##4 data;                                                                                                 \
-    if (size.y * size.z <= MAX_IMAGE2D_WIDTH)                                                                        \
+    if (size.y * size.z <= MAX_IMAGE2D_WIDTH)                                                                         \
       data = convert_##DST_TYPE##4(READ_IMAGE_IN(src_data, smp_zero, (int2)(Y * size.z + Z, X)));                     \
-    else                                                                                                             \
+    else                                                                                                              \
       data = convert_##DST_TYPE##4(READ_IMAGE_IN(src_data, smp_zero, (int2)(Z, X * size.y + Y)));                     \
-    int offset = (X * shape.z + Y) * shape.w + Z * 4;                                                                \
-    __global DST_TYPE *dst_addr = (__global DST_TYPE *)dst_data;                                                       \
-    dst_addr += offset;                                                                                              \
-    if ((Z + 1) * 4 <= shape.w) {                                                                                    \
+    int offset = (X * shape.z + Y) * shape.w + Z * 4;                                                                 \
+    __global DST_TYPE *dst_addr = (__global DST_TYPE *)dst_data;                                                      \
+    dst_addr += offset;                                                                                               \
+    if ((Z + 1) * 4 <= shape.w) {                                                                                     \
       ((__global DST_TYPE##4 *)dst_addr)[0] = data;                                                                   \
-    } else {                                                                                                         \
-      if (shape.w - Z * 4 >= 1) {                                                                                    \
-        dst_addr[0] = data.x;                                                                                        \
-      }                                                                                                              \
-      if (shape.w - Z * 4 >= 2) {                                                                                    \
-        dst_addr[1] = data.y;                                                                                        \
-      }                                                                                                              \
-      if (shape.w - Z * 4 >= 3) {                                                                                    \
-        dst_addr[2] = data.z;                                                                                        \
-      }                                                                                                              \
-    }                                                                                                                \
+    } else {                                                                                                          \
+      if (shape.w - Z * 4 >= 1) {                                                                                     \
+        dst_addr[0] = data.x;                                                                                         \
+      }                                                                                                               \
+      if (shape.w - Z * 4 >= 2) {                                                                                     \
+        dst_addr[1] = data.y;                                                                                         \
+      }                                                                                                               \
+      if (shape.w - Z * 4 >= 3) {                                                                                     \
+        dst_addr[2] = data.z;                                                                                         \
+      }                                                                                                               \
+    }                                                                                                                 \
   }
 
 // IMG_to_BUF(src_dtype, dst_dtype, SRC_TYPE, DST_TYPE, READ_IMAGE_IN)
 IMG_to_BUF(float32, float32, float, float, read_imagef);
 IMG_to_BUF(float16, float32, half, float, read_imageh);
 IMG_to_BUF(float16, float16, half, half, read_imageh);
-IMG_to_BUF(int32, int32, float, float, read_imagef);
-IMG_to_BUF(uint32, uint32, float, float, read_imagef);
+IMG_to_BUF(int32, int32, int, int, read_imagei);
+IMG_to_BUF(uint32, uint32, int, int, read_imagei);
 IMG_to_BUF(int8, int8, char, char, read_imagei);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
index ef4e3d0cb0..bb76fc7203 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
@@ -207,7 +207,7 @@ int ConcatOpenCLKernel::Prepare() {
   std::string source = concat_source;
   std::string program_name = "Concat";
   ocl_runtime_->LoadSource(program_name, source);
-  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {}, out_tensors_[0]->data_type());
+  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {});
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   SetConstArgs();
   SetGlobalLocal();
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
index 82ee6640d8..aea80665ba 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
@@ -32,8 +32,8 @@ using mindspore::schema::PrimitiveType_Gather;
 namespace mindspore::kernel {
 
 int GatherOpenCLKernel::CheckSpecs() {
-  if (in_tensors_.size() != 2) {
-    MS_LOG(ERROR) << "GatherOpenCLKernel only supports 2 input Tensor but get " << in_tensors_.size();
+  if (in_tensors_.size() != 3) {
+    MS_LOG(ERROR) << "GatherOpenCLKernel only supports 3 input Tensor but get " << in_tensors_.size();
     return RET_ERROR;
   }
   if (out_tensors_.size() != 1) {
@@ -67,6 +67,10 @@ int GatherOpenCLKernel::CheckSpecs() {
     return RET_ERROR;
   }
   axis_ = *reinterpret_cast<int32_t *>(in_tensors_.at(2)->data_c());
+  if (in_tensors_.at(2)->data_c() == nullptr) {
+    MS_LOG(ERROR) << "GatherOpenCLKernel need Axis.";
+    return RET_ERROR;
+  }
   if (axis_ < 0) {
     axis_ += input_ndim;
   }
@@ -102,6 +106,11 @@ void GatherOpenCLKernel::SetGlobalLocal() {
 
 int GatherOpenCLKernel::Prepare() {
   std::string kernel_name = "gather";
+  if (desc_.data_type == kNumberTypeInt32) {
+    kernel_name += "_int";
+  } else {
+    kernel_name += "_float";
+  }
   if (in_tensors_.at(0)->shape().size() == 1 && axis_ == 0) {
     axis_ = 3;
   }
@@ -110,7 +119,7 @@ int GatherOpenCLKernel::Prepare() {
 #else
   std::string program_name = "gather";
   ocl_runtime_->LoadSource(program_name, gather_source);
-  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {}, out_tensors_[0]->data_type());
+  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {});
 #endif
   if (in_tensors_.at(1)->IsConst()) {
     intensor1_is_tensor = false;
@@ -130,7 +139,7 @@ int GatherOpenCLKernel::ConvertTensorToweight() {
   auto indices_tensor = in_tensors_.at(1);
   auto indices_num = indices_tensor->ElementsNum();
   indices_data_ =
-    reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num), lite::opencl::MemType::BUF);
+    reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num, lite::opencl::MemType::BUF));
   allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true);
   if (indices_data_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
@@ -156,7 +165,7 @@ int GatherOpenCLKernel::InitWeights() {
   auto indices_num = indices_tensor->ElementsNum();
   auto allocator = ocl_runtime_->GetAllocator();
   indices_data_ =
-    reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num), lite::opencl::MemType::BUF);
+    reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num, lite::opencl::MemType::BUF));
   if (indices_data_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
@@ -198,5 +207,6 @@ int GatherOpenCLKernel::Run() {
 
 REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Gather, OpenCLKernelCreator<GatherOpenCLKernel>);
 REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Gather, OpenCLKernelCreator<GatherOpenCLKernel>);
+REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Gather, OpenCLKernelCreator<GatherOpenCLKernel>);
 
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc
index 48b77c72fc..23fef86cf1 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc
@@ -39,12 +39,12 @@ int OneHotOpenCLKernel::CheckSpecs() {
 
 int OneHotOpenCLKernel::Prepare() {
   std::string kernel_name = "OneHot";
-  auto param = reinterpret_cast<OneHotParameter *>(op_parameter_);
+  param_ = reinterpret_cast<OneHotParameter *>(op_parameter_);
   in_shape_ = GpuTensorInfo(in_tensors_[0]);
   out_shape_ = GpuTensorInfo(out_tensors_[0]);
-  axis_ = out_shape_.AlignAxis(param->axis_);
-  if (in_tensors_[0]->shape().size() == 1 && axis_ == 0) {
-    kernel_name += "2DAxis0";
+  axis_ = out_shape_.AlignAxis(param_->axis_);
+  if (in_tensors_[0]->shape().size() == 1 && axis_ == 3) {
+    kernel_name += "2DAxis3";
   } else {
     kernel_name += "Axis" + std::to_string(axis_);
   }
@@ -54,7 +54,7 @@ int OneHotOpenCLKernel::Prepare() {
   std::string source = one_hot_source;
   std::string program_name = "OneHot";
   ocl_runtime_->LoadSource(program_name, source);
-  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name);
+  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {});
 #endif
   InitWeights();
   SetConstArgs();
@@ -69,10 +69,12 @@ int OneHotOpenCLKernel::InitWeights() {
   if (in_tensors_.size() == 3) {  // onnx
     off_value_ = static_cast<float *>(in_tensors_[2]->data_c())[0];
     on_value_ = static_cast<float *>(in_tensors_[2]->data_c())[1];
+    param_->support_neg_index_ = true;
   }
   if (in_tensors_.size() == 4) {  // tf
     on_value_ = static_cast<float *>(in_tensors_[2]->data_c())[0];
     off_value_ = static_cast<float *>(in_tensors_[3]->data_c())[0];
+    param_->support_neg_index_ = false;
   }
   return RET_OK;
 }
@@ -87,7 +89,8 @@ void OneHotOpenCLKernel::SetConstArgs() {
   ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_);
   ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_);
   ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast<int>(out_shape_.C));
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<int>(out_shape_.C));
+  ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast<int>(param_->support_neg_index_));
 }
 void OneHotOpenCLKernel::SetGlobalLocal() {
   local_size_ = {};
@@ -103,6 +106,5 @@ int OneHotOpenCLKernel::Run() {
   return mindspore::lite::RET_OK;
 }
 
-REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_OneHot, OpenCLKernelCreator<OneHotOpenCLKernel>)
-REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_OneHot, OpenCLKernelCreator<OneHotOpenCLKernel>)
+REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_OneHot, OpenCLKernelCreator<OneHotOpenCLKernel>)
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h
index 06fa7e8d6a..137721a46a 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h
@@ -43,6 +43,7 @@ class OneHotOpenCLKernel : public OpenCLKernel {
   int axis_{0};
   GpuTensorInfo in_shape_;
   GpuTensorInfo out_shape_;
+  OneHotParameter *param_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
index 0f8eb1b94d..e7e88e6bb5 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
@@ -73,13 +73,18 @@ void ReshapeOpenCLKernel::SetGlobalLocal() {
 
 int ReshapeOpenCLKernel::Prepare() {
   std::string kernel_name = "reshape_NHWC4";
+  if (desc_.data_type == kNumberTypeInt32) {
+    kernel_name += "_int";
+  } else {
+    kernel_name += "_float";
+  }
 #ifdef PROGRAM_WITH_IL
   kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name);
 #else
   std::string source = reshape_source;
   std::string program_name = "reshape";
   ocl_runtime_->LoadSource(program_name, source);
-  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {}, out_tensors_[0]->data_type());
+  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, {});
 #endif
 
   SetGlobalLocal();
@@ -109,10 +114,14 @@ int ReshapeOpenCLKernel::PreProcess() {
 
 REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Reshape, OpenCLKernelCreator<ReshapeOpenCLKernel>)
 REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Reshape, OpenCLKernelCreator<ReshapeOpenCLKernel>)
+REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Reshape, OpenCLKernelCreator<ReshapeOpenCLKernel>)
 REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Squeeze, OpenCLKernelCreator<ReshapeOpenCLKernel>)
 REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Squeeze, OpenCLKernelCreator<ReshapeOpenCLKernel>)
+REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Squeeze, OpenCLKernelCreator<ReshapeOpenCLKernel>)
 REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Unsqueeze, OpenCLKernelCreator<ReshapeOpenCLKernel>)
 REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Unsqueeze, OpenCLKernelCreator<ReshapeOpenCLKernel>)
+REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_Unsqueeze, OpenCLKernelCreator<ReshapeOpenCLKernel>)
 REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_ExpandDims, OpenCLKernelCreator<ReshapeOpenCLKernel>)
 REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_ExpandDims, OpenCLKernelCreator<ReshapeOpenCLKernel>)
+REG_KERNEL(kGPU, kNumberTypeInt32, PrimitiveType_ExpandDims, OpenCLKernelCreator<ReshapeOpenCLKernel>)
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc
index 23b5ab3bf4..1d512474ca 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc
@@ -70,15 +70,15 @@ int OpenCLKernel::GetImageSize(size_t idx, lite::opencl::ImageSize *img_size) {
   auto img_info = GpuTensorInfo(out_tensors_[idx]);
   size_t img_dtype = CL_FLOAT;
   switch (out_tensors_[idx]->data_type()) {
-    case kNumberTypeFloat32:
-    case kNumberTypeInt32:
-    case kNumberTypeUInt32: {
+    case kNumberTypeFloat32: {
       img_dtype = CL_FLOAT;
       break;
     }
-    case kNumberTypeFloat16:
-    case kNumberTypeInt16:
-    case kNumberTypeUInt16: {
+    case kNumberTypeInt32: {
+      img_dtype = CL_SIGNED_INT32;
+      break;
+    }
+    case kNumberTypeFloat16: {
       img_dtype = CL_HALF_FLOAT;
       break;
     }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
index 7ec7bcf11f..55c765e4bd 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
@@ -244,7 +244,7 @@ kernel::LiteKernel *OpenCLKernelCreator(const std::vector<lite::Tensor *> &input
     return nullptr;
   }
   if (!opParameter->infer_flag_) {
-    MS_LOG(WARNING) << "kernel don't infer shape yet!";
+    MS_LOG(WARNING) << "kernel " << opParameter->name_ << "don't infer shape yet!";
     return kernel;
   }
   auto ret = kernel->CheckSpecs();
diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc
index cf16b85eaf..f716530546 100644
--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@@ -360,13 +360,10 @@ kernel::LiteKernel *Scheduler::FindGpuKernel(const std::vector<Tensor *> &in_ten
 
   if (context_->IsGpuEnabled()) {
     // support more data type like int32
-    kernel::KernelKey gpu_desc{kGPU, kNumberTypeFloat32, desc.type};
+    kernel::KernelKey gpu_desc{kGPU, desc.data_type, desc.type};
     if (context_->IsGpuFloat16Enabled()) {
       gpu_desc.data_type = kNumberTypeFloat16;
     }
-    if (in_tensors.front()->data_type() == kNumberTypeInt8) {
-      gpu_desc.data_type = kNumberTypeInt8;
-    }
 
     // weight dequant
     auto ret = WeightDecoder::DequantNode(op_parameter, in_tensors, kNumberTypeFloat32);
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc
index 258205be5e..50412ce3bd 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc
@@ -22,68 +22,75 @@ class TestOpenCL_Gather : public CommonTest {};
 
 namespace {
 // PrimitiveType_Gather: src/ops/populate/gather_populate.cc
-OpParameter *CreateParameter(int axis) {
+OpParameter *CreateParameter() {
   auto *param = test::CreateParameter<GatherParameter>(schema::PrimitiveType_Gather);
-  param->axis_ = axis;
   return reinterpret_cast<OpParameter *>(param);
 }
 }  // namespace
 
 TEST_F(TestOpenCL_Gather, Axis0) {
-  int axis = 0;
   std::vector<int> input_shape = {10};
   std::vector<int> indices_shape = {5};
   std::vector<int> output_shape = {5};
+  std::vector<int> axis_shape = {1};
+  int32_t axis_data[] = {0};
   float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
   int32_t indices[] = {1, 3, 5, 7, 9};
   float output_data[] = {1, 3, 5, 7, 9};
 
   for (auto fp16_enable : {false}) {
-    auto *param = CreateParameter(axis);
-    TestMain(
-      {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}},
-      {output_shape, output_data}, param, fp16_enable);
+    auto *param = CreateParameter();
+    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+              {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32},
+              {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}},
+             {output_shape, output_data}, param, fp16_enable);
   }
 }
 
 TEST_F(TestOpenCL_Gather, Axis0ConstTensor) {
-  int axis = 0;
   std::vector<int> input_shape = {10};
   std::vector<int> indices_shape = {1};
   std::vector<int> output_shape = {1};
+  std::vector<int> axis_shape = {1};
+  int32_t axis_data[] = {0};
   float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
   int32_t indices[] = {1};
   float output_data[] = {1};
 
   for (auto fp16_enable : {false}) {
-    auto *param = CreateParameter(axis);
-    TestMain(
-      {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}},
-      {output_shape, output_data}, param, fp16_enable);
+    auto *param = CreateParameter();
+    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+              {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32},
+              {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}},
+             {output_shape, output_data}, param, fp16_enable);
   }
 }
 
 TEST_F(TestOpenCL_Gather, Axis0_Tensor) {
-  int axis = 0;
   std::vector<int> input_shape = {10};
   std::vector<int> indices_shape = {1};
   std::vector<int> output_shape = {1};
+  std::vector<int> axis_shape = {1};
+  int32_t axis_data[] = {0};
   float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
   int32_t indices[] = {1};
   float output_data[] = {1};
 
   for (auto fp16_enable : {false}) {
-    auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
+    auto *param = CreateParameter();
+    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+              {indices_shape, indices, VAR, kNumberTypeInt32},
+              {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}},
              {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9);
   }
 }
 
 TEST_F(TestOpenCL_Gather, Axis1) {
-  int axis = 1;
   std::vector<int> input_shape = {1, 5, 4, 4};
   std::vector<int> indices_shape = {2};
   std::vector<int> output_shape = {1, 2, 4, 4};
+  std::vector<int> axis_shape = {1};
+  int32_t axis_data[] = {1};
   float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
                         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
@@ -100,19 +107,21 @@ TEST_F(TestOpenCL_Gather, Axis1) {
 
   for (int i = 0; i < 1; ++i) {
     for (auto fp16_enable : {false, true}) {
-      auto *param = CreateParameter(axis);
+      auto *param = CreateParameter();
       TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
-                {indices_shape, indices_datas[i], CONST_TENSOR, data_types[i]}},
+                {indices_shape, indices_datas[i], CONST_TENSOR, data_types[i]},
+                {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}},
                {output_shape, output_data}, param, fp16_enable);
     }
   }
 }
 
 TEST_F(TestOpenCL_Gather, Axis1_intensor1) {
-  int axis = 1;
   std::vector<int> input_shape = {1, 5, 4, 4};
   std::vector<int> indices_shape = {2};
   std::vector<int> output_shape = {1, 2, 4, 4};
+  std::vector<int> axis_shape = {1};
+  int32_t axis_data[] = {1};
   float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
                         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
@@ -129,19 +138,21 @@ TEST_F(TestOpenCL_Gather, Axis1_intensor1) {
 
   for (int i = 0; i < 1; ++i) {
     for (auto fp16_enable : {false}) {
-      auto *param = CreateParameter(axis);
-      TestMain(
-        {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices_datas[i], VAR, data_types[i]}},
-        {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9);
+      auto *param = CreateParameter();
+      TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+                {indices_shape, indices_datas[i], VAR, data_types[i]},
+                {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}},
+               {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-9);
     }
   }
 }
 
 TEST_F(TestOpenCL_Gather, Axis2) {
-  int axis = 2;
   std::vector<int> input_shape = {1, 5, 4, 4};
   std::vector<int> indices_shape = {2};
   std::vector<int> output_shape = {1, 5, 2, 4};
+  std::vector<int> axis_shape = {1};
+  int32_t axis_data[] = {2};
   float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
                         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
@@ -151,18 +162,20 @@ TEST_F(TestOpenCL_Gather, Axis2) {
                          44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79};
 
   for (auto fp16_enable : {false, true}) {
-    auto *param = CreateParameter(axis);
-    TestMain(
-      {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}},
-      {output_shape, output_data}, param, fp16_enable);
+    auto *param = CreateParameter();
+    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+              {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32},
+              {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}},
+             {output_shape, output_data}, param, fp16_enable);
   }
 }
 
 TEST_F(TestOpenCL_Gather, Axis2_intensor1) {
-  int axis = 2;
   std::vector<int> input_shape = {1, 5, 4, 4};
   std::vector<int> indices_shape = {2};
   std::vector<int> output_shape = {1, 5, 2, 4};
+  std::vector<int> axis_shape = {1};
+  int32_t axis_data[] = {2};
   float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
                         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
@@ -172,17 +185,20 @@ TEST_F(TestOpenCL_Gather, Axis2_intensor1) {
                          44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79};
 
   for (auto fp16_enable : {false}) {
-    auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
+    auto *param = CreateParameter();
+    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+              {indices_shape, indices, VAR, kNumberTypeInt32},
+              {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}},
              {output_shape, output_data}, param, fp16_enable);
   }
 }
 
 TEST_F(TestOpenCL_Gather, Axis3) {
-  int axis = 3;
   std::vector<int> input_shape = {1, 5, 4, 4};
   std::vector<int> indices_shape = {2};
   std::vector<int> output_shape = {1, 5, 4, 2};
+  std::vector<int> axis_shape = {1};
+  int32_t axis_data[] = {3};
   float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
                         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
@@ -192,18 +208,20 @@ TEST_F(TestOpenCL_Gather, Axis3) {
                          41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79};
 
   for (auto fp16_enable : {false, true}) {
-    auto *param = CreateParameter(axis);
-    TestMain(
-      {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}},
-      {output_shape, output_data}, param, fp16_enable);
+    auto *param = CreateParameter();
+    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+              {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32},
+              {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}},
+             {output_shape, output_data}, param, fp16_enable);
   }
 }
 
 TEST_F(TestOpenCL_Gather, Axis3_intensor1) {
-  int axis = 3;
   std::vector<int> input_shape = {1, 5, 4, 4};
   std::vector<int> indices_shape = {2};
   std::vector<int> output_shape = {1, 5, 4, 2};
+  std::vector<int> axis_shape = {1};
+  int32_t axis_data[] = {3};
   float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
                         20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
                         40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
@@ -213,8 +231,10 @@ TEST_F(TestOpenCL_Gather, Axis3_intensor1) {
                          41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79};
 
   for (auto fp16_enable : {false}) {
-    auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, VAR, kNumberTypeInt32}},
+    auto *param = CreateParameter();
+    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+              {indices_shape, indices, VAR, kNumberTypeInt32},
+              {axis_shape, axis_data, CONST_TENSOR, kNumberTypeInt32}},
              {output_shape, output_data}, param, fp16_enable);
   }
 }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/one_hot_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/one_hot_tests.cc
index ed0fc24c1a..d741989a28 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/one_hot_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/one_hot_tests.cc
@@ -42,7 +42,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis3Fp32) {
   float output_data[] = {-1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1};
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -63,7 +63,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis3T2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -86,7 +86,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis3T3Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -109,7 +109,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis3T4Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -130,7 +130,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -152,7 +152,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis2T2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -173,7 +173,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis2T3Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -196,7 +196,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis2T4Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -219,7 +219,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis1T1Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -240,7 +240,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis1T2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -263,7 +263,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis1T3Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -284,7 +284,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis0Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -307,7 +307,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis0T2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -332,7 +332,7 @@ TEST_F(TestOpenCL_OneHot, OneHot4DAxis0T3Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -354,7 +354,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis0Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -377,7 +377,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis0T2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -399,7 +399,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis1Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -422,7 +422,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis1T2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -443,7 +443,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -466,7 +466,7 @@ TEST_F(TestOpenCL_OneHot, OneHot3DAxis2T2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -487,7 +487,7 @@ TEST_F(TestOpenCL_OneHot, OneHot2DAxis0Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -508,7 +508,7 @@ TEST_F(TestOpenCL_OneHot, OneHot2DAxis0T2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -529,7 +529,7 @@ TEST_F(TestOpenCL_OneHot, OneHot2DAxis1Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -551,7 +551,7 @@ TEST_F(TestOpenCL_OneHot, OneHot2DAxis1T2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -572,7 +572,7 @@ TEST_F(TestOpenCL_OneHot, OneHot1DAxis0Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
@@ -593,7 +593,7 @@ TEST_F(TestOpenCL_OneHot, OneHot1DAxis0T2Fp32) {
 
   for (auto fp16_enable : {false}) {
     auto *param = CreateParameter(axis);
-    TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32},
+    TestMain({{input_shape, input_data, VAR, kNumberTypeInt32},
               {{}, &depth, CONST_SCALAR, kNumberTypeInt32},
               {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32},
               {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}},
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc
index f5b950497b..38bf78c03d 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc
@@ -34,7 +34,9 @@ TEST_F(TestOpenCL_Reshape, 4D_2D_test0) {
   float input_data[] = {0, 1, 2, 3, 4, 5, 6};
   float output_data[] = {0, 1, 2, 3, 4, 5, 6};
   for (auto fp16_enable : {false, true}) {
-    TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable);
+    TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32},
+              {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}},
+             {shape_out, output_data}, CreateParameter(), fp16_enable);
   }
 }
 
@@ -44,7 +46,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test0) {
   float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
   float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
   for (auto fp16_enable : {false, true}) {
-    TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable);
+    TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32},
+              {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}},
+             {shape_out, output_data}, CreateParameter(), fp16_enable);
   }
 }
 
@@ -54,7 +58,9 @@ TEST_F(TestOpenCL_Reshape, 4D_2D_test1) {
   float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
   float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
   for (auto fp16_enable : {false, true}) {
-    TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable);
+    TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32},
+              {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}},
+             {shape_out, output_data}, CreateParameter(), fp16_enable);
   }
 }
 
@@ -64,7 +70,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test1) {
   float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
   float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
   for (auto fp16_enable : {false, true}) {
-    TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable);
+    TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32},
+              {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}},
+             {shape_out, output_data}, CreateParameter(), fp16_enable);
   }
 }
 
@@ -74,7 +82,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test2) {
   float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
   float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
   for (auto fp16_enable : {false, true}) {
-    TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable);
+    TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32},
+              {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}},
+             {shape_out, output_data}, CreateParameter(), fp16_enable);
   }
 }
 
@@ -90,7 +100,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test3) {
     20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
   };
   for (auto fp16_enable : {false, true}) {
-    TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable);
+    TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32},
+              {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}},
+             {shape_out, output_data}, CreateParameter(), fp16_enable);
   }
 }
 
@@ -119,7 +131,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test4) {
     184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199};
 
   for (auto fp16_enable : {false, true}) {
-    TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable);
+    TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32},
+              {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}},
+             {shape_out, output_data}, CreateParameter(), fp16_enable);
   }
 }
 
@@ -131,7 +145,9 @@ TEST_F(TestOpenCL_Reshape, 4D_4D_test5) {
   float output_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
                          15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29};
   for (auto fp16_enable : {false, true}) {
-    TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable);
+    TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32},
+              {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}},
+             {shape_out, output_data}, CreateParameter(), fp16_enable);
   }
 }
 
@@ -152,7 +168,9 @@ TEST_F(TestOpenCL_Reshape, 3D_2D_test6) {
     96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119};
 
   for (auto fp16_enable : {false, true}) {
-    TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable);
+    TestMain({{shape_in, input_data, VAR, kNumberTypeFloat32},
+              {{static_cast<int>(shape_out.size())}, shape_out.data(), CONST_TENSOR, kNumberTypeInt32}},
+             {shape_out, output_data}, CreateParameter(), fp16_enable);
   }
 }
 }  // namespace mindspore::lite::opencl::test