!4553 add ori_format and ocr2/mv2 opencl perf testcase

Merge pull request !4553 from wandongdong/up
5 years ago · 4ddbe4c44f
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
@@ -44,6 +44,7 @@ int ActivationOpenClKernel::Init() {
    MS_LOG(ERROR) << "Activate fun only support dim=4, but your dim=" << in_tensors_[0]->shape().size();
    return RET_ERROR;
  }
  ori_format_ = out_tensors_[0]->GetFormat();
  std::string program_name = "";
  std::string kernel_name = "";
  std::string source = activation_source_fp32;
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
@@ -111,6 +111,7 @@ int ArithmeticOpenCLKernel::Init() {
  runtime_->LoadSource(program_name, source);
  runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
 #endif
  ori_format_ = out_tensors_[0]->GetFormat();
  out_tensors_[0]->SetFormat(schema::Format_NHWC4);
  Image2dGetWorkGroupSize();
  return 0;
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
@@ -56,6 +56,7 @@ int Conv2dTransposeOpenCLKernel::Init() {
  ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
 #endif
  PadWeight();
  ori_format_ = out_tensors_[0]->GetFormat();
  out_tensors_[0]->SetFormat(schema::Format_NHWC4);
  MS_LOG(DEBUG) << kernel_name << " Init Done!";
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc
@@ -40,6 +40,7 @@ int ConvolutionOpenCLKernel::Init() {
  ocl_runtime->LoadSource(program_name, source);
  ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
  this->InitBuffer();
  ori_format_ = out_tensors_[0]->GetFormat();
  out_tensors_[0]->SetFormat(schema::Format_NHWC4);
  MS_LOG(DEBUG) << kernel_name << " Init Done!";
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
@@ -43,6 +43,7 @@ int DepthwiseConv2dOpenCLKernel::Init() {
  auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
  std::string kernel_name = "DepthwiseConv2d";
  auto in_format = in_tensors_[0]->GetFormat();
  ori_format_ = out_tensors_[0]->GetFormat();
  out_tensors_[0]->SetFormat(in_format);
  if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
    MS_LOG(ERROR) << "input format(" << in_format << ") "
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
@@ -68,6 +68,7 @@ int MatMulOpenCLKernel::Init() {
  PadWeight();
  allocator->UnmapBuffer(padWeight_);
  allocator->UnmapBuffer(bias_);
  ori_format_ = out_tensors_[0]->GetFormat();
  out_tensors_[0]->SetFormat(schema::Format_NHWC4);
  MS_LOG(DEBUG) << kernel_name << " Init Done!";
  return 0;
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
@@ -19,7 +19,6 @@
 #include <vector>
 #include "src/runtime/kernel/opencl/opencl_kernel.h"
 #include "src/runtime/kernel/arm/nnacl/conv_parameter.h"
 #include "src/runtime/opencl/opencl_runtime.h"
@@ -32,11 +31,11 @@ using FLOAT_T = float;
 namespace mindspore::kernel {
 class MatMulOpenCLKernel : public LiteKernel {
 class MatMulOpenCLKernel : public OpenCLKernel {
 public:
  explicit MatMulOpenCLKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
                              const std::vector<lite::tensor::Tensor *> &outputs, bool hasBias)
      : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {
      : OpenCLKernel(parameter, inputs, outputs) {
    hasBias_ = hasBias;
  }
  ~MatMulOpenCLKernel() override{};
@@ -57,4 +56,3 @@ class MatMulOpenCLKernel : public LiteKernel {
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_BACKEND_OPENCL_MATMUL_H_
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
@@ -73,6 +73,7 @@ int PoolingOpenCLKernel::Init() {
  ocl_runtime->LoadSource(program_name, source);
  ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
 #endif
  ori_format_ = out_tensors_[0]->GetFormat();
  out_tensors_[0]->SetFormat(schema::Format_NHWC4);
  MS_LOG(DEBUG) << kernel_name << " Init Done!";
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
@@ -48,6 +48,7 @@ int ReshapeOpenCLKernel::Init() {
  ocl_runtime->LoadSource(program_name, source);
  ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options);
 #endif
  ori_format_ = out_tensors_[0]->GetFormat();
  out_tensors_[0]->SetFormat(schema::Format_NHWC);
  MS_LOG(DEBUG) << kernel_name << " Init Done!";
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
@@ -115,9 +115,10 @@ int SoftmaxOpenCLKernel::Init() {
  }
  std::set<std::string> build_options;
  runtime_->LoadSource(program_name, source);
  out_tensors_[0]->SetFormat(schema::Format_NHWC4);
  runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
 #endif
  ori_format_ = out_tensors_[0]->GetFormat();
  out_tensors_[0]->SetFormat(schema::Format_NHWC4);
  MS_LOG(DEBUG) << kernel_name << " Init Done!";
  return lite::RET_OK;
 }
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
@@ -60,6 +60,7 @@ int TransposeOpenCLKernel::Init() {
    MS_LOG(ERROR) << "input H * W % 4 != 0 not support!";
    return RET_ERROR;
  }
  ori_format_ = out_tensors_[0]->GetFormat();
  out_tensors_[0]->SetFormat(schema::Format_NCHW);
  MS_LOG(DEBUG) << kernel_name << " Init Done!";
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
@@ -49,9 +49,11 @@ class OpenCLKernel : public LiteKernel {
  }
  OpenCLMemType GetMemType() { return out_mem_type_; }
  void SetMemType(OpenCLMemType mem_type) { out_mem_type_ = mem_type; }
  schema::Format GetOriFormat() { return ori_format_;}
 protected:
  OpenCLMemType out_mem_type_{OpenCLMemType::IMG};
  schema::Format ori_format_{schema::Format_NHWC4};
 };
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc
@@ -34,6 +34,15 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *
  out_parameters->clear();
  out_convert_ops->clear();
  for (size_t i = 0; i < in_tensors.size(); ++i) {
    OpenCLKernel* cur_opencl_op = reinterpret_cast<OpenCLKernel*>(in_kernels[i]);
    schema::Format ori_format = cur_opencl_op->GetOriFormat();
    if (mem_type == cur_opencl_op->GetMemType() && in_tensors[i]->GetFormat() == ori_format) {
      continue;
    }
    auto dst_format =
      (mem_type == OpenCLMemType::IMG) ? in_kernels[i]->out_tensors()[0]->GetFormat() : ori_format;
    auto src_format =
      (mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels[i]->out_tensors()[0]->GetFormat();
    lite::tensor::Tensor *new_tensor = new (std::nothrow) lite::tensor::Tensor();
    MS_ASSERT(new_tensor);
    if (new_tensor == nullptr) {
@@ -41,10 +50,6 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::tensor::Tensor *
      return RET_ERROR;
    }
    new_tensor->CopyTensor(*in_tensors[i]);
    auto dst_format =
      (mem_type == OpenCLMemType::IMG) ? in_kernels.back()->out_tensors()[0]->GetFormat() : in_tensors[i]->GetFormat();
    auto src_format =
      (mem_type == OpenCLMemType::IMG) ? in_tensors[i]->GetFormat() : in_kernels.front()->out_tensors()[0]->GetFormat();
    if ((dst_format == schema::Format_NCHW || dst_format == schema::Format_NC4HW4) &&
        (src_format == schema::Format_NHWC || src_format == schema::Format_NHWC4)) {
      auto &shape = new_tensor->shape();
--- a/mindspore/lite/test/st/benchmark_test.cc
+++ b/mindspore/lite/test/st/benchmark_test.cc
@@ -49,6 +49,13 @@ const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_02.ms",
 auto status = RunBenchmark(5, argv);
 ASSERT_EQ(status, RET_OK);
 }
 TEST_F(BenchmarkTest, TestOCR_02_GPU_PERF) {
 const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_02.ms",
                                     "--inDataPath=./hiai/model_02_in.bin",
                                     "--device=GPU"};
 auto status = RunBenchmark(4, argv);
 ASSERT_EQ(status, RET_OK);
 }
 TEST_F(BenchmarkTest, Test_MV2_GPU) {
 const char *argv[] = {"./benchmark", "--modelPath=./hiai/mobilenet_v2.ms",
@@ -59,6 +66,14 @@ auto status = RunBenchmark(5, argv);
 ASSERT_EQ(status, RET_OK);
 }
 TEST_F(BenchmarkTest, Test_MV2_GPU_PERF) {
  const char *argv[] = {"./benchmark", "--modelPath=./hiai/mobilenet_v2.ms",
                        "--inDataPath=./hiai/mobilenet_v2_in.bin",
                        "--device=GPU"};
  auto status = RunBenchmark(4, argv);
  ASSERT_EQ(status, RET_OK);
 }
 TEST_F(BenchmarkTest, TestHebing) {
  const char *argv[] = {"./benchmark", "--modelPath=./hiai/model_hebing_3branch.ms",
                                       "--inDataPath=./hiai/model_hebing_3branch.bin",