From aaa543e9276078f556a0a3f737018b381bd17b7d Mon Sep 17 00:00:00 2001 From: wandongdong Date: Wed, 16 Sep 2020 01:38:20 -0700 Subject: [PATCH] fix opencl segment bug for P8 --- .../kernel/opencl/kernel/activation.cc | 4 +- .../kernel/opencl/kernel/arithmetic.cc | 22 ++++----- .../kernel/opencl/kernel/arithmetic_self.cc | 4 +- .../runtime/kernel/opencl/kernel/batchnorm.cc | 12 ++--- .../runtime/kernel/opencl/kernel/biasadd.cc | 6 +-- .../runtime/kernel/opencl/kernel/concat.cc | 28 +++++------ .../kernel/opencl/kernel/conv2d_transpose.cc | 12 ++--- .../kernel/opencl/kernel/depthwise_conv2d.cc | 8 ++-- .../runtime/kernel/opencl/kernel/matmul.cc | 14 +++--- .../runtime/kernel/opencl/kernel/pooling2d.cc | 4 +- .../src/runtime/kernel/opencl/kernel/prelu.cc | 12 ++--- .../runtime/kernel/opencl/kernel/reduce.cc | 4 +- .../runtime/kernel/opencl/kernel/reshape.cc | 9 ++-- .../src/runtime/kernel/opencl/kernel/scale.cc | 46 +++++++++---------- .../src/runtime/kernel/opencl/kernel/slice.cc | 4 +- .../runtime/kernel/opencl/kernel/softmax.cc | 12 ++--- .../runtime/kernel/opencl/kernel/to_format.cc | 12 ++++- .../runtime/kernel/opencl/kernel/transpose.cc | 6 +-- .../kernel/opencl/subgraph_opencl_kernel.cc | 2 +- .../lite/src/runtime/opencl/opencl_runtime.h | 1 + mindspore/lite/src/scheduler.cc | 6 ++- .../runtime/kernel/opencl/activation_tests.cc | 14 +++--- .../kernel/opencl/arithmetic_self_tests.cc | 4 +- .../runtime/kernel/opencl/arithmetic_tests.cc | 10 ++-- .../kernel/opencl/avg_pooling_tests.cc | 6 +-- .../runtime/kernel/opencl/batchnorm_tests.cc | 24 +++++----- .../runtime/kernel/opencl/biasadd_tests.cc | 8 ++-- .../src/runtime/kernel/opencl/concat_tests.cc | 32 ++++++------- .../kernel/opencl/conv2d_transpose_tests.cc | 6 +-- .../kernel/opencl/depthwise_conv2d_tests.cc | 4 +- .../src/runtime/kernel/opencl/gather_tests.cc | 2 +- .../src/runtime/kernel/opencl/matmul_tests.cc | 6 +-- .../kernel/opencl/max_pooling_tests.cc | 6 +-- .../src/runtime/kernel/opencl/prelu_tests.cc | 8 ++-- .../src/runtime/kernel/opencl/reduce_tests.cc | 6 +-- .../runtime/kernel/opencl/reshape_tests.cc | 6 +-- .../src/runtime/kernel/opencl/scale_tests.cc | 14 +++--- .../src/runtime/kernel/opencl/slice_tests.cc | 8 ++-- .../runtime/kernel/opencl/softmax_tests.cc | 6 +-- .../runtime/kernel/opencl/to_format_tests.cc | 4 +- .../runtime/kernel/opencl/transpose_tests.cc | 6 +-- .../src/runtime/kernel/opencl/utils_tests.h | 2 +- 42 files changed, 212 insertions(+), 198 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc index d617972399..e402f9e84e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc @@ -91,8 +91,8 @@ int ActivationOpenClKernel::Run() { cl_int4 img2d_shape = GetImg2dShape(); auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, img2d_shape); if (type_ == ActivationType_LEAKY_RELU) { ocl_runtime->SetKernelArg(kernel_, arg_idx++, alpha_); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index 24ad00b463..1040dca9c1 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -105,7 +105,7 @@ int ArithmeticOpenCLKernel::GetImageSize(size_t idx, std::vector *img_si int ArithmeticOpenCLKernel::InitBuffer() { const ArithmeticParameter *arithmetic_parameter = reinterpret_cast(op_parameter_); if (!arithmetic_parameter->broadcasting_) { - if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->MutableData() != nullptr) { + if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->data_c() != nullptr) { auto allocator = runtime_->GetAllocator(); std::vector img_size; GetImageSize(0, &img_size); @@ -117,7 +117,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) { if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) { weight_ptr_ = - allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size); + allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size); } else { MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to " << in_tensors_[0]->data_type(); @@ -132,7 +132,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { return RET_ERROR; } std::function to_dtype = [](float x) -> float { return x; }; - PackNHWCToNC4HW4(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype); + PackNHWCToNC4HW4(in_tensors_[1]->data_c(), weight, batch, plane, channel, to_dtype); weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); delete[] weight; } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { @@ -142,7 +142,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { return RET_ERROR; } std::function to_dtype = [](float x) -> float16_t { return static_cast(x); }; - PackNHWCToNC4HW4(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype); + PackNHWCToNC4HW4(in_tensors_[1]->data_c(), weight, batch, plane, channel, to_dtype); weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); delete[] weight; } else { @@ -164,7 +164,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { return RET_ERROR; } std::function to_dtype = [](float x) -> float { return x; }; - PackNHWCToNHWC4(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype); + PackNHWCToNHWC4(in_tensors_[1]->data_c(), weight, batch, plane, channel, to_dtype); weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); delete[] weight; } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { @@ -174,7 +174,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { return RET_ERROR; } std::function to_dtype = [](float x) -> float16_t { return static_cast(x); }; - PackNHWCToNHWC4(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype); + PackNHWCToNHWC4(in_tensors_[1]->data_c(), weight, batch, plane, channel, to_dtype); weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); delete[] weight; } else { @@ -302,23 +302,23 @@ int ArithmeticOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); + runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (element_flag_) { - void *weight = weight_ptr_ == nullptr ? in_tensors_[1]->MutableData() : weight_ptr_; + void *weight = weight_ptr_ == nullptr ? in_tensors_[1]->data_c() : weight_ptr_; runtime_->SetKernelArg(kernel_, arg_idx++, weight); } else { float weight = 0.f; if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { - weight = static_cast(in_tensors_[1]->MutableData())[0]; + weight = static_cast(in_tensors_[1]->data_c())[0]; } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { - weight = static_cast(static_cast(in_tensors_[1]->MutableData())[0]); + weight = static_cast(static_cast(in_tensors_[1]->data_c())[0]); } else { MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type(); return RET_ERROR; } runtime_->SetKernelArg(kernel_, arg_idx++, weight); } - runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); int H = 0; int W = 0; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc index 4f95798c75..5ac1616556 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc @@ -176,8 +176,8 @@ int ArithmeticSelfOpenCLKernel::Run() { ArithmeticSelfGetWorkGroup(global, &local, max_global[0]); int arg_cn = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); ocl_runtime->RunKernel(kernel_, global, local, nullptr); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc index e07ea96fc5..44825acae0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc @@ -112,12 +112,12 @@ int BatchNormOpenCLKernel::Run() { std::vector global = {OH, OW, OC}; BatchNormGetWorkGroup(global, &local, max_global[0]); int arg_cn = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); // input tensor - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->MutableData()); // scale - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->MutableData()); // offest - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->MutableData()); // mean - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->MutableData()); // variance - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); // out tensor + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); // scale + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); // offest + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance + ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->epsilon_); ocl_runtime->RunKernel(kernel_, global, local, nullptr); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc index 19433da577..7bc5cd4d40 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc @@ -47,7 +47,7 @@ void BiasAddOpenCLKernel::InitBuffer() { BiasAdd_ = allocator->Malloc(div_ci * C4NUM * fp_size, img_size); BiasAdd_ = allocator->MapBuffer(BiasAdd_, CL_MAP_WRITE, nullptr, true); memset(BiasAdd_, 0x00, div_ci * C4NUM * fp_size); - memcpy(BiasAdd_, in_tensors_[1]->MutableData(), C * fp_size); + memcpy(BiasAdd_, in_tensors_[1]->data_c(), C * fp_size); allocator->UnmapBuffer(BiasAdd_); } @@ -93,8 +93,8 @@ int BiasAddOpenCLKernel::Run() { int arg_idx = 0; std::map data_type{ {schema::Format::Format_NC4, 1}, {schema::Format::Format_NHWC4, 2}, {schema::Format::Format_NC4HW4, 3}}; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); ocl_runtime->SetKernelArg(kernel_, arg_idx++, BiasAdd_); ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc index d1776b18de..b7e79ec745 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc @@ -55,11 +55,11 @@ int ConcatOpenCLKernel::RunAxis0() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto allocator_ = ocl_runtime->GetAllocator(); std::vector img_size; - auto dst_data = out_tensors_[0]->MutableData(); + auto dst_data = out_tensors_[0]->data_c(); auto dst_origin = cl::array{0, 0, 0}; cl::Image2D *out_image = reinterpret_cast(allocator_->GetImage(dst_data)); for (int i = 0; i < in_tensors_.size(); i++) { - auto src_data = in_tensors_[i]->MutableData(); + auto src_data = in_tensors_[i]->data_c(); allocator_->GetImageSize(src_data, &img_size); auto src_origin = cl::array{0, 0, 0}; auto region = cl::array{img_size[0], img_size[1], 1}; @@ -176,9 +176,9 @@ int ConcatOpenCLKernel::Run() { int arg_cn = 0; if (in_tensors_.size() == 2) { - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); @@ -187,10 +187,10 @@ int ConcatOpenCLKernel::Run() { auto input3_shape = in_tensors_[2]->shape(); cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); @@ -202,11 +202,11 @@ int ConcatOpenCLKernel::Run() { cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; cl_int4 input_shape4_ = {input4_shape[0], input4_shape[1], input4_shape[2], UP_DIV(input4_shape[3], C4NUM)}; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index be238473a3..8b3d023e7c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -79,7 +79,7 @@ void Conv2dTransposeOpenCLKernel::PadWeight() { padWeight_ = allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size); padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); memset(padWeight_, 0x00, div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size); - auto origin_weight = in_tensors_.at(kWeightIndex)->MutableData(); + auto origin_weight = in_tensors_.at(kWeightIndex)->data_c(); auto weight_dtype = in_tensors_.at(kWeightIndex)->data_type(); int index = 0; for (int co_i = 0; co_i < div_co; co_i++) { @@ -136,14 +136,14 @@ void Conv2dTransposeOpenCLKernel::PadWeight() { auto bias_dtype = in_tensors_[2]->data_type(); if (bias_dtype == kNumberTypeFloat32 && enable_fp16_) { for (int i = 0; i < co; i++) { - reinterpret_cast(bias_)[i] = reinterpret_cast(in_tensors_[2]->MutableData())[i]; + reinterpret_cast(bias_)[i] = reinterpret_cast(in_tensors_[2]->data_c())[i]; } } else if (bias_dtype == kNumberTypeFloat16 && !enable_fp16_) { for (int i = 0; i < co; i++) { - reinterpret_cast(bias_)[i] = reinterpret_cast(in_tensors_[2]->MutableData())[i]; + reinterpret_cast(bias_)[i] = reinterpret_cast(in_tensors_[2]->data_c())[i]; } } else { - memcpy(bias_, in_tensors_[2]->MutableData(), co * data_size); + memcpy(bias_, in_tensors_[2]->data_c(), co * data_size); } } allocator->UnmapBuffer(bias_); @@ -200,10 +200,10 @@ int Conv2dTransposeOpenCLKernel::Run() { cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), 1}; cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), 1}; int arg_cnt = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF); ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size); ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride); ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index 0082243c41..f3e47e8e42 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -89,7 +89,7 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() { bool is_fp16 = ocl_runtime->GetFp16Enable(); // weight: o, h, w, i; o == group, i == 1 - void *origin_weight = in_tensors_.at(kWeightIndex)->MutableData(); + void *origin_weight = in_tensors_.at(kWeightIndex)->data_c(); int CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); int pack_weight_size = C4NUM * CO4 * parameter->kernel_h_ * parameter->kernel_w_; @@ -133,7 +133,7 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() { bias_data_ = allocator->MapBuffer(bias_data_, CL_MAP_WRITE, nullptr, true); size_t up_co_size = C4NUM * CO4 * dtype_size; memset(bias_data_, 0, up_co_size); - auto ori_bias = in_tensors_.at(kBiasIndex)->MutableData(); + auto ori_bias = in_tensors_.at(kBiasIndex)->data_c(); if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) { float16_t *bias_ptr = static_cast(bias_data_); for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) { @@ -207,10 +207,10 @@ int DepthwiseConv2dOpenCLKernel::Run() { (cl_int)out_tensors_[0]->Batch()}; int arg_cnt = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF); ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF); - ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size); ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride); ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index 91949e0730..4782c0f377 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -93,8 +93,8 @@ void MatMulOpenCLKernel::PadWeight() { auto padWeightFp32 = reinterpret_cast(padWeight_); auto padWeightFp16 = reinterpret_cast(padWeight_); memset(padWeight_, 0x00, a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size); - auto originWeightFp32 = reinterpret_cast(in_tensors_.at(kWeightIndex)->MutableData()); - auto originWeightFp16 = reinterpret_cast(in_tensors_.at(kWeightIndex)->MutableData()); + auto originWeightFp32 = reinterpret_cast(in_tensors_.at(kWeightIndex)->data_c()); + auto originWeightFp16 = reinterpret_cast(in_tensors_.at(kWeightIndex)->data_c()); bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16; // pad weight @@ -153,14 +153,14 @@ void MatMulOpenCLKernel::PadWeight() { if (in_tensors_.size() >= 3) { if (in_tensors_[2]->data_type() == kNumberTypeFloat32 && enable_fp16_) { for (int i = 0; i < co; i++) { - reinterpret_cast(bias_)[i] = reinterpret_cast(in_tensors_[2]->MutableData())[i]; + reinterpret_cast(bias_)[i] = reinterpret_cast(in_tensors_[2]->data_c())[i]; } } else if (in_tensors_[2]->data_type() == kNumberTypeFloat16 && !enable_fp16_) { for (int i = 0; i < co; i++) { - reinterpret_cast(bias_)[i] = reinterpret_cast(in_tensors_[2]->MutableData())[i]; + reinterpret_cast(bias_)[i] = reinterpret_cast(in_tensors_[2]->data_c())[i]; } } else { - memcpy(bias_, in_tensors_[2]->MutableData(), co * dtype_size); + memcpy(bias_, in_tensors_[2]->data_c(), co * dtype_size); } } allocator->UnmapBuffer(bias_); @@ -210,10 +210,10 @@ int MatMulOpenCLKernel::Run() { int arg_count = 0; cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; - ocl_runtime->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF); ocl_runtime->SetKernelArg(kernel_, arg_count++, bias_); - ocl_runtime->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_count++, in_shape); ocl_runtime->SetKernelArg(kernel_, arg_count++, out_shape); ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc index fa1095dd8c..39711cec9f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc @@ -135,8 +135,8 @@ int PoolingOpenCLKernel::Run() { cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape); ocl_runtime->SetKernelArg(kernel_, arg_idx++, output_shape); ocl_runtime->SetKernelArg(kernel_, arg_idx++, stride); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc index f345330207..1af36eeefa 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc @@ -50,22 +50,22 @@ void PReluOpenCLKernel::InitBuffer() { if (enable_fp16_) { if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { auto PReluWeight_fp16 = reinterpret_cast(PReluWeight_); - auto in_tensor_data_fp32 = reinterpret_cast(in_tensors_[1]->MutableData()); + auto in_tensor_data_fp32 = reinterpret_cast(in_tensors_[1]->data_c()); for (int i = 0; i < elem_num; i++) { PReluWeight_fp16[i] = static_cast(in_tensor_data_fp32[i]); } } else { - memcpy(PReluWeight_, in_tensors_[1]->MutableData(), elem_num * fp_size); + memcpy(PReluWeight_, in_tensors_[1]->data_c(), elem_num * fp_size); } } else { if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { auto PReluWeight_fp32 = reinterpret_cast(PReluWeight_); - auto in_tensor_data_fp16 = reinterpret_cast(in_tensors_[1]->MutableData()); + auto in_tensor_data_fp16 = reinterpret_cast(in_tensors_[1]->data_c()); for (int i = 0; i < elem_num; i++) { PReluWeight_fp32[i] = static_cast(in_tensor_data_fp16[i]); } } else { - memcpy(PReluWeight_, in_tensors_[1]->MutableData(), elem_num * fp_size); + memcpy(PReluWeight_, in_tensors_[1]->data_c(), elem_num * fp_size); } } allocator->UnmapBuffer(PReluWeight_); @@ -110,8 +110,8 @@ int PReluOpenCLKernel::Run() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); std::map data_type{{schema::Format::Format_NHWC4, 1}, {schema::Format::Format_NC4HW4, 2}}; int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); ocl_runtime->SetKernelArg(kernel_, arg_idx++, PReluWeight_); ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc index 7a7b64c88b..682cbc7daa 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc @@ -135,8 +135,8 @@ int ReduceOpenCLKernel::Run() { std::vector global = {static_cast(c4)}; cl_int4 size = {h, w, c4, 1}; int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, size); ocl_runtime->RunKernel(kernel_, global, local, nullptr); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc index 6beba68518..54f0b24d7f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc @@ -36,7 +36,10 @@ int ReshapeOpenCLKernel::Init() { kernel_name += "_" + std::string(EnumNameFormat(op_format_)); auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); enable_fp16_ = ocl_runtime->GetFp16Enable(); - + if (out_tensors_[0]->shape().size() != 2 && out_tensors_[0]->shape().size() != 4) { + MS_LOG(ERROR) << "Reshape output size should in 2,4"; + return RET_ERROR; + } if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) { MS_LOG(ERROR) << "Reshape input channel " << in_tensors_[0]->shape().back() << " should equal output channel" << out_tensors_[0]->shape().back(); @@ -115,8 +118,8 @@ int ReshapeOpenCLKernel::Run() { cl_int4 size = {h, w, c4, 1}; cl_int4 size_out = {oh, ow, c4, 1}; int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); ocl_runtime->SetKernelArg(kernel_, arg_idx++, size); ocl_runtime->SetKernelArg(kernel_, arg_idx++, size_out); ocl_runtime->RunKernel(kernel_, global, local, nullptr); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc index bd8724871c..1dfc076380 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc @@ -110,7 +110,7 @@ int ScaleOpenCLKernel::InitBuffer() { if (!element_flag_) { return RET_OK; } - if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->MutableData() != nullptr) { + if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->data_c() != nullptr) { auto allocator = ocl_runtime_->GetAllocator(); std::vector img_size; GetImageSize(0, &img_size); @@ -118,9 +118,9 @@ int ScaleOpenCLKernel::InitBuffer() { img_size[0] = 1; img_size[1] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM); scale_ptr_ = - allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size); + allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size); offset_ptr_ = - allocator->CreateImageFromHost(in_tensors_[2]->MutableData(), in_tensors_[2]->ElementsNum(), img_size); + allocator->CreateImageFromHost(in_tensors_[2]->data_c(), in_tensors_[2]->ElementsNum(), img_size); return RET_OK; } int pack_weight_size = in_tensors_[1]->ElementsC4Num(); @@ -130,9 +130,9 @@ int ScaleOpenCLKernel::InitBuffer() { if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) { if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) { scale_ptr_ = - allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size); + allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size); offset_ptr_ = - allocator->CreateImageFromHost(in_tensors_[2]->MutableData(), in_tensors_[2]->ElementsNum(), img_size); + allocator->CreateImageFromHost(in_tensors_[2]->data_c(), in_tensors_[2]->ElementsNum(), img_size); } else { MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to " << in_tensors_[0]->data_type(); @@ -153,8 +153,8 @@ int ScaleOpenCLKernel::InitBuffer() { return RET_ERROR; } std::function to_dtype = [](float x) -> float { return x; }; - PackNHWCToNC4HW4(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); - PackNHWCToNC4HW4(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); + PackNHWCToNC4HW4(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); + PackNHWCToNC4HW4(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); delete[] scale; @@ -172,8 +172,8 @@ int ScaleOpenCLKernel::InitBuffer() { return RET_ERROR; } std::function to_dtype = [](float x) -> float16_t { return static_cast(x); }; - PackNHWCToNC4HW4(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); - PackNHWCToNC4HW4(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); + PackNHWCToNC4HW4(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); + PackNHWCToNC4HW4(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); delete[] scale; @@ -203,8 +203,8 @@ int ScaleOpenCLKernel::InitBuffer() { return RET_ERROR; } std::function to_dtype = [](float x) -> float { return x; }; - PackNHWCToNHWC4(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); - PackNHWCToNHWC4(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); + PackNHWCToNHWC4(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); + PackNHWCToNHWC4(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); delete[] scale; @@ -222,8 +222,8 @@ int ScaleOpenCLKernel::InitBuffer() { return RET_ERROR; } std::function to_dtype = [](float x) -> float16_t { return static_cast(x); }; - PackNHWCToNHWC4(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); - PackNHWCToNHWC4(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); + PackNHWCToNHWC4(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); + PackNHWCToNHWC4(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); delete[] scale; @@ -309,27 +309,27 @@ int ScaleOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (element_flag_) { - void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->MutableData() : scale_ptr_; - void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->MutableData() : offset_ptr_; + void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->data_c() : scale_ptr_; + void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->data_c() : offset_ptr_; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); } else { if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { - float scale = static_cast(in_tensors_[1]->MutableData())[0]; - float offset = static_cast(in_tensors_[2]->MutableData())[0]; + float scale = static_cast(in_tensors_[1]->data_c())[0]; + float offset = static_cast(in_tensors_[2]->data_c())[0]; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { - float scale = static_cast(in_tensors_[1]->MutableData())[0]; - float offset = static_cast(in_tensors_[2]->MutableData())[0]; + float scale = static_cast(in_tensors_[1]->data_c())[0]; + float offset = static_cast(in_tensors_[2]->data_c())[0]; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale)); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset)); } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { - float16_t scale = static_cast(in_tensors_[1]->MutableData())[0]; - float16_t offset = static_cast(in_tensors_[2]->MutableData())[0]; + float16_t scale = static_cast(in_tensors_[1]->data_c())[0]; + float16_t offset = static_cast(in_tensors_[2]->data_c())[0]; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale)); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset)); } else { @@ -338,7 +338,7 @@ int ScaleOpenCLKernel::Run() { } } } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); int H = 0; int W = 0; if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc index 7fb58b9e9f..f55131046b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc @@ -110,8 +110,8 @@ int SliceOpenCLKernel::Run() { std::vector global = {1, OH, OW}; SlcieGetWorkGroup(global, &local, max_global[0]); int arg_cn = 0; - ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); // input tensor - ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); // out tensor + ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor + ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, size_); ocl_runtime->SetKernelArg(kernel_, arg_cn++, begin_); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc index 9defeb9e80..e774c78118 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc @@ -158,11 +158,11 @@ int SoftmaxOpenCLKernel::Run() { auto mask_ = GetMaskForLastChannel(channel_size); cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]}; - runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); + runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (is_image_out_) { - runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); } else { - runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData(), lite::opencl::MemType::BUF); + runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); } runtime_->SetKernelArg(kernel_, arg_idx++, mask); runtime_->SetKernelArg(kernel_, arg_idx++, slices); @@ -172,11 +172,11 @@ int SoftmaxOpenCLKernel::Run() { int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices}; - runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); + runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (is_image_out_) { - runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); } else { - runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData(), lite::opencl::MemType::BUF); + runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); } runtime_->SetKernelArg(kernel_, arg_idx, input_shape); SetWorkGroupSize(); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc index 719ddbe777..65d045a39f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc @@ -76,6 +76,14 @@ int ToFormatOpenCLKernel::InitNHWCShape() { nhwc_shape_ = {n, h, w, c}; return RET_OK; } + if (shapex.size() == 3) { + n = 1; + h = 1; + w = 1; + c = 1; + nhwc_shape_ = {n, h, w, c}; + return RET_OK; + } if (out_tensors_[0]->GetFormat() == schema::Format::Format_NC4HW4 || out_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4 || out_tensors_[0]->GetFormat() == schema::Format::Format_NHWC) { @@ -159,8 +167,8 @@ int ToFormatOpenCLKernel::Run() { cl_int4 gsize{(cl_int)global[0], (cl_int)global[1], (cl_int)global[2], 1}; auto src_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG; auto dst_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::IMG : lite::opencl::MemType::BUF; - ocl_runtime->SetKernelArg(kernel_, 0, in_tensors_[0]->MutableData(), src_mem_type); - ocl_runtime->SetKernelArg(kernel_, 1, out_tensors_[0]->MutableData(), dst_mem_type); + ocl_runtime->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type); + ocl_runtime->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type); ocl_runtime->SetKernelArg(kernel_, 2, gsize); ocl_runtime->SetKernelArg(kernel_, 3, shape); ocl_runtime->RunKernel(kernel_, global, local, nullptr); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc index 9241caf827..29ae999015 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc @@ -121,11 +121,11 @@ int TransposeOpenCLKernel::Run() { cl_int2 HW = {h * w, hw4}; cl_int2 C = {c, c4}; int arg_idx = 0; - ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); if (out_mem_type_ == OpenCLMemType::BUF) { - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData(), lite::opencl::MemType::BUF); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); } else { - ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); + ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); } ocl_runtime->SetKernelArg(kernel_, arg_idx++, HW); ocl_runtime->SetKernelArg(kernel_, arg_idx++, C); diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc index d77797a23e..844aa04bfe 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc @@ -312,7 +312,7 @@ int SubGraphOpenCLKernel::ReSize() { return RET_OK; } int SubGraphOpenCLKernel::Run() { auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); for (auto &tensor : in_tensors_) { - allocator_->UnmapBuffer(tensor->MutableData()); + allocator_->UnmapBuffer(tensor->data_c()); } lite::opencl::OpenCLExecutor executor; diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/opencl/opencl_runtime.h index 80a14b0512..157400c308 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_runtime.h +++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.h @@ -127,6 +127,7 @@ class OpenCLRuntime { int UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const; int UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue = nullptr) const; bool SyncCommandQueue(cl::CommandQueue *command_queue = nullptr); + bool IsInitOK() {return init_done_;} /** * Get kernel max worker group size. diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index a243f9f543..14de6fe173 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -24,6 +24,7 @@ #include "src/common/utils.h" #if SUPPORT_GPU #include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" +#include "src/runtime/opencl/opencl_runtime.h" #endif namespace mindspore::lite { @@ -242,7 +243,8 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector &in_tens MS_ASSERT(primitive != nullptr); TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors); kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, static_cast(primitive->Type())}; - if (context_->device_type_ == DT_GPU) { +#if SUPPORT_GPU + if (context_->device_type_ == DT_GPU && lite::opencl::OpenCLRuntime::GetInstance()->IsInitOK()) { desc.arch = kernel::KERNEL_ARCH::kGPU; auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, desc); if (kernel != nullptr) { @@ -254,7 +256,7 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector &in_tens << node->name_; } } - +#endif desc.arch = kernel::KERNEL_ARCH::kCPU; kernel::LiteKernel *kernel = nullptr; if ((context_->float16_priority && data_type == kNumberTypeFloat32) || data_type == kNumberTypeFloat16) { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc index d4bf33f6b4..9152b21219 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc @@ -50,7 +50,7 @@ void LoadActivationData(void *dst, size_t dst_size, const std::string &file_path template void CompareRes(lite::Tensor *output_tensor, const std::string &standard_answer_file) { - auto *output_data = reinterpret_cast(output_tensor->MutableData()); + auto *output_data = reinterpret_cast(output_tensor->data_c()); size_t output_size = output_tensor->Size(); auto expect_data = reinterpret_cast(mindspore::lite::ReadFile(standard_answer_file.c_str(), &output_size)); constexpr float atol = 0.001; @@ -70,7 +70,7 @@ void CompareRes(lite::Tensor *output_tensor, const std::string &standard_answer_ template void printf_tensor(const std::string &str, mindspore::lite::Tensor *in_data) { MS_LOG(INFO) << str; - auto input_data = reinterpret_cast(in_data->MutableData()); + auto input_data = reinterpret_cast(in_data->data_c()); for (int i = 0; i < in_data->ElementsNum(); ++i) { printf("%f ", input_data[i]); } @@ -107,7 +107,7 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { std::vector inputs{input_tensor}; std::vector outputs{output_tensor}; inputs[0]->MallocData(allocator); - LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); + LoadActivationData(inputs[0]->data_c(), inputs[0]->Size(), in_file); if (enable_fp16) { printf_tensor("ReluFp16:--input data---", inputs[0]); } else { @@ -221,7 +221,7 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { auto allocator = ocl_runtime->GetAllocator(); inputs[0]->MallocData(allocator); MS_LOG(INFO) << "Initialize input data"; - LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); + LoadActivationData(inputs[0]->data_c(), inputs[0]->Size(), in_file); if (enable_fp16) { printf_tensor("Relu6:FP16--input data--", inputs[0]); } else { @@ -336,7 +336,7 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { auto allocator = ocl_runtime->GetAllocator(); inputs[0]->MallocData(allocator); MS_LOG(INFO) << "Initialize input data"; - LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); + LoadActivationData(inputs[0]->data_c(), inputs[0]->Size(), in_file); if (enable_fp16) { printf_tensor("Sigmoid:FP16--input data--", inputs[0]); } else { @@ -451,7 +451,7 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { auto allocator = ocl_runtime->GetAllocator(); inputs[0]->MallocData(allocator); MS_LOG(INFO) << "Initialize input data"; - LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); + LoadActivationData(inputs[0]->data_c(), inputs[0]->Size(), in_file); if (enable_fp16) { printf_tensor("Leaky Relu:FP16--input data--", inputs[0]); } else { @@ -566,7 +566,7 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { auto allocator = ocl_runtime->GetAllocator(); inputs[0]->MallocData(allocator); MS_LOG(INFO) << "Initialize input data"; - LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); + LoadActivationData(inputs[0]->data_c(), inputs[0]->Size(), in_file); if (enable_fp16) { printf_tensor("Tanh:FP16--input data--", inputs[0]); } else { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc index 00f4fa8889..25cbbb29e7 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc @@ -114,11 +114,11 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { } sub_graph->Init(); MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->MutableData(), input_data1, input1_size); + memcpy(inputs[0]->data_c(), input_data1, input1_size); std::cout << "==================output data================" << std::endl; sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->MutableData()); + auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); CompareOutputData1(input_data1, output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); for (auto tensor : inputs) { delete tensor; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc index 3222cfe5d6..dd443bc683 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc @@ -122,7 +122,7 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh inputs.push_back(tensor_b); } else { tensor_b->MallocData(); - memcpy(tensor_b->MutableData(), data_b, sizeof(T)); + memcpy(tensor_b->data_c(), data_b, sizeof(T)); } std::vector outputs = {tensor_c}; @@ -178,19 +178,19 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh } kernel->Init(); - memcpy(inputs[0]->MutableData(), data_a, sizeof(T) * element_num); + memcpy(inputs[0]->data_c(), data_a, sizeof(T) * element_num); if (!is_bias_add) { - memcpy(inputs[1]->MutableData(), data_b, sizeof(T) * element_num_b); + memcpy(inputs[1]->data_c(), data_b, sizeof(T) * element_num_b); } kernel->Run(); - memcpy(data_c_ocl, outputs[0]->MutableData(), sizeof(T) * element_num); + memcpy(data_c_ocl, outputs[0]->data_c(), sizeof(T) * element_num); LogData(data_a, 10, "Data A : "); LogData(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : "); LogData(data_c_cpu, 10, "Expect compute : "); - LogData(outputs[0]->MutableData(), 10, "OpenCL compute : "); + LogData(outputs[0]->data_c(), 10, "OpenCL compute : "); bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num); MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!"); EXPECT_EQ(true, cmp); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc index 5dc3a76a7c..2f09536f2c 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/avg_pooling_tests.cc @@ -111,14 +111,14 @@ void RunTestCaseAvgPooling(const std::vector &shape, void *input_data, void return; } pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); + memcpy(inputs[0]->data_c(), input_data, inputs[0]->ElementsNum() * dtype_size); pGraph->Run(); if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), + CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), 2e-2); } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); + CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); } inputs[0]->SetData(nullptr); outputs[0]->SetData(nullptr); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc index cde15bc740..df75b7bc3d 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc @@ -130,15 +130,15 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { } sub_graph->Init(); MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->MutableData(), input_data, input_size); - memcpy(inputs[1]->MutableData(), scale_data, scale_size); - memcpy(inputs[2]->MutableData(), offset_data, offset_size); - memcpy(inputs[3]->MutableData(), mean_data, mean_size); - memcpy(inputs[4]->MutableData(), var_data, var_size); + memcpy(inputs[0]->data_c(), input_data, input_size); + memcpy(inputs[1]->data_c(), scale_data, scale_size); + memcpy(inputs[2]->data_c(), offset_data, offset_size); + memcpy(inputs[3]->data_c(), mean_data, mean_size); + memcpy(inputs[4]->data_c(), var_data, var_size); std::cout << "==================output data================" << std::endl; sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->MutableData()); + auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.01); for (auto tensor : inputs) { delete tensor; @@ -247,15 +247,15 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { } sub_graph->Init(); MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->MutableData(), input_data, input_size); - memcpy(inputs[1]->MutableData(), scale_data, scale_size); - memcpy(inputs[2]->MutableData(), offset_data, offset_size); - memcpy(inputs[3]->MutableData(), mean_data, mean_size); - memcpy(inputs[4]->MutableData(), var_data, var_size); + memcpy(inputs[0]->data_c(), input_data, input_size); + memcpy(inputs[1]->data_c(), scale_data, scale_size); + memcpy(inputs[2]->data_c(), offset_data, offset_size); + memcpy(inputs[3]->data_c(), mean_data, mean_size); + memcpy(inputs[4]->data_c(), var_data, var_size); std::cout << "==================output data================" << std::endl; sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->MutableData()); + auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); for (auto tensor : inputs) { delete tensor; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc index ee281758b6..339f300a3b 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc @@ -43,7 +43,7 @@ void LoadDataBiasAdd(void *dst, size_t dst_size, const std::string &file_path) { template void CompareOutBiasAdd(lite::Tensor *output_tensor, const std::string &standard_answer_file) { size_t output_size = output_tensor->ElementsNum(); - auto output_data = reinterpret_cast(output_tensor->MutableData()); + auto output_data = reinterpret_cast(output_tensor->data_c()); auto expect_data = reinterpret_cast(mindspore::lite::ReadFile(standard_answer_file.c_str(), &output_size)); constexpr float atol = 0.0002; for (int i = 0; i < output_tensor->ElementsNum(); ++i) { @@ -62,7 +62,7 @@ void CompareOutBiasAdd(lite::Tensor *output_tensor, const std::string &standard_ template void printf_tensor_BiasAdd(const std::string log, mindspore::lite::Tensor *in_data, int size) { MS_LOG(INFO) << log; - auto input_data = reinterpret_cast(in_data->MutableData()); + auto input_data = reinterpret_cast(in_data->data_c()); for (int i = 0; i < size; ++i) { printf("%f ", input_data[i]); } @@ -114,8 +114,8 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { auto allocator = ocl_runtime->GetAllocator(); inputs[0]->MallocData(allocator); inputs[1]->MallocData(allocator); - LoadDataBiasAdd(input_tensor->MutableData(), input_tensor->Size(), in_file); - LoadDataBiasAdd(weight_tensor->MutableData(), weight_tensor->Size(), weight_file); + LoadDataBiasAdd(input_tensor->data_c(), input_tensor->Size(), in_file); + LoadDataBiasAdd(weight_tensor->data_c(), weight_tensor->Size(), weight_file); if (ocl_runtime->GetFp16Enable()) { printf_tensor_BiasAdd("BiasAdd:FP16--input data", inputs[0], input_tensor->ElementsNum()); printf_tensor_BiasAdd("BiasAdd:FP16--weight data", inputs[1], weight_tensor->ElementsNum()); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc index 4f79dbbcd8..2342b0668f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc @@ -138,24 +138,24 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) { sub_graph->Init(); MS_LOG(INFO) << " initialize input data "; if (inputs.size() == 2) { - memcpy(inputs[0]->MutableData(), input_data1, input1_size); - memcpy(inputs[1]->MutableData(), input_data2, input2_size); + memcpy(inputs[0]->data_c(), input_data1, input1_size); + memcpy(inputs[1]->data_c(), input_data2, input2_size); } else if (inputs.size() == 3) { - memcpy(inputs[0]->MutableData(), input_data1, input1_size); - memcpy(inputs[1]->MutableData(), input_data2, input2_size); - memcpy(inputs[2]->MutableData(), input_data3, input3_size); + memcpy(inputs[0]->data_c(), input_data1, input1_size); + memcpy(inputs[1]->data_c(), input_data2, input2_size); + memcpy(inputs[2]->data_c(), input_data3, input3_size); } else if (inputs.size() == 4) { - memcpy(inputs[0]->MutableData(), input_data1, input1_size); - memcpy(inputs[1]->MutableData(), input_data2, input2_size); - memcpy(inputs[2]->MutableData(), input_data3, input3_size); - memcpy(inputs[3]->MutableData(), input_data4, input4_size); + memcpy(inputs[0]->data_c(), input_data1, input1_size); + memcpy(inputs[1]->data_c(), input_data2, input2_size); + memcpy(inputs[2]->data_c(), input_data3, input3_size); + memcpy(inputs[3]->data_c(), input_data4, input4_size); } else { MS_LOG(ERROR) << " input size must be 2 or 3 or 4"; } std::cout << "==================output data================" << std::endl; sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->MutableData()); + auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); for (auto tensor : inputs) { delete tensor; @@ -263,19 +263,19 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { sub_graph->Init(); MS_LOG(INFO) << " initialize input data "; if (inputs.size() == 2) { - memcpy(inputs[0]->MutableData(), input_data1, input1_size); - memcpy(inputs[1]->MutableData(), input_data2, input2_size); + memcpy(inputs[0]->data_c(), input_data1, input1_size); + memcpy(inputs[1]->data_c(), input_data2, input2_size); } else if (inputs.size() == 3) { - memcpy(inputs[0]->MutableData(), input_data1, input1_size); - memcpy(inputs[1]->MutableData(), input_data2, input2_size); - memcpy(inputs[2]->MutableData(), input_data3, input3_size); + memcpy(inputs[0]->data_c(), input_data1, input1_size); + memcpy(inputs[1]->data_c(), input_data2, input2_size); + memcpy(inputs[2]->data_c(), input_data3, input3_size); } else { MS_LOG(ERROR) << " input size must be 2 or 3 "; } std::cout << "==================output data================" << std::endl; sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->MutableData()); + auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001); for (auto tensor : inputs) { delete tensor; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc index 7983820064..f7ab7ce0b3 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc @@ -124,12 +124,12 @@ void RunTestCaseConv2dTranspose(const std::vector &shape, void *input_data, } pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, n * h * w * ci * dtype_size); + memcpy(inputs[0]->data_c(), input_data, n * h * w * ci * dtype_size); pGraph->Run(); if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, n * oh * ow * co, static_cast(1e-3), 2e-2); + CompareOutput(outputs[0]->data_c(), output_data, n * oh * ow * co, static_cast(1e-3), 2e-2); } else { - CompareOutput(outputs[0]->MutableData(), output_data, n * oh * ow * co, static_cast(1e-5)); + CompareOutput(outputs[0]->data_c(), output_data, n * oh * ow * co, static_cast(1e-5)); } inputs[0]->SetData(nullptr); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc index 9a09c6fa56..774414cebf 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc @@ -112,11 +112,11 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat // freamework to do!!! inputs[0]->MallocData(allocator); - memcpy(inputs[0]->MutableData(), packed_input, sizeof(T2) * pack_input_size); + memcpy(inputs[0]->data_c(), packed_input, sizeof(T2) * pack_input_size); pGraph->Run(); if (is_compare) { - T2 *packed_output = reinterpret_cast(outputs[0]->MutableData()); + T2 *packed_output = reinterpret_cast(outputs[0]->data_c()); auto packed_correct_data = std::make_unique(packed_output_size); if (packed_correct_data.get() == nullptr) { delete[] packed_input; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc index 5694a703f5..0d1f548930 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc @@ -71,7 +71,7 @@ void test_main_gather(void *input_data, void *correct_data, const std::vectorInit(); MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->MutableData(), input_data, input_size); + memcpy(inputs[0]->data_c(), input_data, input_size); sub_graph->Run(); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc index dd46e107d4..b7353499dc 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc @@ -111,13 +111,13 @@ void RunTestCaseMatMul(const std::vector &shape, void *input_data, void *we return; } pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, tensor_x->ElementsNum() * dtype_size); + memcpy(inputs[0]->data_c(), input_data, tensor_x->ElementsNum() * dtype_size); pGraph->Run(); if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-3), + CompareOutput(outputs[0]->data_c(), output_data, tensor_out->ElementsNum(), static_cast(1e-3), 2e-2); } else { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-5)); + CompareOutput(outputs[0]->data_c(), output_data, tensor_out->ElementsNum(), static_cast(1e-5)); } tensor_x->SetData(nullptr); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/max_pooling_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/max_pooling_tests.cc index 1994d9b620..cf0d4d4f83 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/max_pooling_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/max_pooling_tests.cc @@ -111,14 +111,14 @@ void RunTestCaseMaxPooling(const std::vector &shape, void *input_data, void return; } pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); + memcpy(inputs[0]->data_c(), input_data, inputs[0]->ElementsNum() * dtype_size); pGraph->Run(); if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), + CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), 2e-2); } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); + CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); } inputs[0]->SetData(nullptr); outputs[0]->SetData(nullptr); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc index 0d9263393b..2de2b17a49 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc @@ -44,7 +44,7 @@ void LoadDataPRelu(void *dst, size_t dst_size, const std::string &file_path) { template void CompareOutPRelu(lite::Tensor *output_tensor, const std::string &standard_answer_file) { - auto *output_data = reinterpret_cast(output_tensor->MutableData()); + auto *output_data = reinterpret_cast(output_tensor->data_c()); size_t output_size = output_tensor->Size(); auto expect_data = reinterpret_cast(mindspore::lite::ReadFile(standard_answer_file.c_str(), &output_size)); constexpr float atol = 0.0002; @@ -64,7 +64,7 @@ void CompareOutPRelu(lite::Tensor *output_tensor, const std::string &standard_an template void printf_tensor_Prelu(const std::string &log, mindspore::lite::Tensor *in_data, int size) { MS_LOG(INFO) << log; - auto input_data = reinterpret_cast(in_data->MutableData()); + auto input_data = reinterpret_cast(in_data->data_c()); for (int i = 0; i < size; ++i) { printf("%f ", input_data[i]); } @@ -113,8 +113,8 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { inputs[1]->MallocData(allocator); MS_LOG(INFO) << "initialize input data"; - LoadDataPRelu(input_tensor->MutableData(), input_tensor->Size(), in_file); - LoadDataPRelu(weight_tensor->MutableData(), weight_tensor->Size(), weight_file); + LoadDataPRelu(input_tensor->data_c(), input_tensor->Size(), in_file); + LoadDataPRelu(weight_tensor->data_c(), weight_tensor->Size(), weight_file); if (ocl_runtime->GetFp16Enable()) { printf_tensor_Prelu("PRELU:FP16--input data", input_tensor, inputs[0]->ElementsNum()); printf_tensor_Prelu("PRELU:FP16--weight data", weight_tensor, weight_tensor->ElementsNum()); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reduce_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reduce_tests.cc index 54959f14b7..0f038a5259 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reduce_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reduce_tests.cc @@ -90,14 +90,14 @@ void RunTestCaseReduce(const std::vector &shape, void *input_data, void *ou return; } pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); + memcpy(inputs[0]->data_c(), input_data, inputs[0]->ElementsNum() * dtype_size); pGraph->Run(); if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), + CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), 2e-2); } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); + CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); } inputs[0]->SetData(nullptr); outputs[0]->SetData(nullptr); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc index 0fd237c6c2..ce858d3bb0 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc @@ -85,14 +85,14 @@ void RunTestCaseReshape(const std::vector &shape, void *input_data, void *o return; } pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); + memcpy(inputs[0]->data_c(), input_data, inputs[0]->ElementsNum() * dtype_size); pGraph->Run(); if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), + CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), 2e-2); } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); + CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); } inputs[0]->SetData(nullptr); outputs[0]->SetData(nullptr); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc index bb3816067a..81cb41f86a 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc @@ -132,8 +132,8 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh } else { tensor_scale->MallocData(); tensor_offset->MallocData(); - memcpy(tensor_scale->MutableData(), data_scale, sizeof(T)); - memcpy(tensor_offset->MutableData(), data_offset, sizeof(T)); + memcpy(tensor_scale->data_c(), data_scale, sizeof(T)); + memcpy(tensor_offset->data_c(), data_offset, sizeof(T)); } std::vector outputs = {tensor_out}; @@ -195,21 +195,21 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh } kernel->Init(); - memcpy(inputs[0]->MutableData(), data_in, sizeof(T) * element_num); + memcpy(inputs[0]->data_c(), data_in, sizeof(T) * element_num); if (!is_broadcast) { - memcpy(inputs[1]->MutableData(), data_scale, sizeof(T) * element_num_b); - memcpy(inputs[2]->MutableData(), data_offset, sizeof(T) * element_num_b); + memcpy(inputs[1]->data_c(), data_scale, sizeof(T) * element_num_b); + memcpy(inputs[2]->data_c(), data_offset, sizeof(T) * element_num_b); } kernel->Run(); - memcpy(data_out_ocl, outputs[0]->MutableData(), sizeof(T) * element_num); + memcpy(data_out_ocl, outputs[0]->data_c(), sizeof(T) * element_num); LogData(data_in, 10, "Data input : "); LogData(data_scale, tensor_scale->shape().empty() ? 1 : 10, "Data scale : "); LogData(data_offset, tensor_offset->shape().empty() ? 1 : 10, "Data offset : "); LogData(data_out_cpu, 10, "Expect compute : "); - LogData(outputs[0]->MutableData(), 10, "OpenCL compute : "); + LogData(outputs[0]->data_c(), 10, "OpenCL compute : "); bool cmp = DataCompare(data_out_cpu, data_out_ocl, element_num); MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!"); EXPECT_EQ(true, cmp); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc index 7a3c7beb35..fcb7cc86c7 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc @@ -130,12 +130,12 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { sub_graph->Init(); MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->MutableData(), input_data, input_size); + memcpy(inputs[0]->data_c(), input_data, input_size); std::cout << "==================output data================" << std::endl; sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->MutableData()); + auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); for (auto tensor : inputs) { delete tensor; @@ -238,12 +238,12 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { sub_graph->Init(); MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->MutableData(), input_data, input_size); + memcpy(inputs[0]->data_c(), input_data, input_size); std::cout << "==================output data================" << std::endl; sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->MutableData()); + auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); for (auto tensor : inputs) { delete tensor; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc index 4edd07b730..630575908c 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc @@ -89,14 +89,14 @@ void RunTestCaseSoftmax(const std::vector &shape, void *input_data, void *o return; } pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); + memcpy(inputs[0]->data_c(), input_data, inputs[0]->ElementsNum() * dtype_size); pGraph->Run(); if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), + CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), 2e-2); } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); + CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); } inputs[0]->SetData(nullptr); outputs[0]->SetData(nullptr); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc index 220176ee1f..344400c5b0 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc @@ -76,7 +76,7 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) { return; } pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, input_size); + memcpy(inputs[0]->data_c(), input_data, input_size); pGraph->Run(); size_t output_size; @@ -87,7 +87,7 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) { return; } printf("==================output data=================\n"); - float *output_data = reinterpret_cast(tensor_out->MutableData()); + float *output_data = reinterpret_cast(tensor_out->data_c()); std::cout << std::endl; int size_n = h * w * c; size_n = size_n > 100 ? 100 : size_n; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc index 7f7447a8af..10c430b470 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc @@ -89,13 +89,13 @@ void RunTestTranspose(const std::vector &shape, void *input_data, void *out return; } pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, h * w * c * dtype_size); + memcpy(inputs[0]->data_c(), input_data, h * w * c * dtype_size); pGraph->Run(); if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, h * w * c, static_cast(1e-3), 2e-2); + CompareOutput(outputs[0]->data_c(), output_data, h * w * c, static_cast(1e-3), 2e-2); } else { - CompareOutput(outputs[0]->MutableData(), output_data, h * w * c, static_cast(1e-5)); + CompareOutput(outputs[0]->data_c(), output_data, h * w * c, static_cast(1e-5)); } inputs[0]->SetData(nullptr); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h index 86eb77e7e7..82d0bc11fb 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h @@ -57,7 +57,7 @@ template void CompareOutput(lite::Tensor *output_tensor, const std::string &file_path, T atol, float rtol = 1e-5) { size_t output_size; auto expect_data = mindspore::lite::ReadFile(file_path.c_str(), &output_size); - CompareOutput(output_tensor->MutableData(), expect_data, output_tensor->ElementsNum(), atol, rtol); + CompareOutput(output_tensor->data_c(), expect_data, output_tensor->ElementsNum(), atol, rtol); } } // namespace mindspore