| @@ -91,8 +91,8 @@ int ActivationOpenClKernel::Run() { | |||||
| cl_int4 img2d_shape = GetImg2dShape(); | cl_int4 img2d_shape = GetImg2dShape(); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, img2d_shape); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, img2d_shape); | ||||
| if (type_ == ActivationType_LEAKY_RELU) { | if (type_ == ActivationType_LEAKY_RELU) { | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, alpha_); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, alpha_); | ||||
| @@ -105,7 +105,7 @@ int ArithmeticOpenCLKernel::GetImageSize(size_t idx, std::vector<size_t> *img_si | |||||
| int ArithmeticOpenCLKernel::InitBuffer() { | int ArithmeticOpenCLKernel::InitBuffer() { | ||||
| const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_); | const ArithmeticParameter *arithmetic_parameter = reinterpret_cast<const ArithmeticParameter *>(op_parameter_); | ||||
| if (!arithmetic_parameter->broadcasting_) { | if (!arithmetic_parameter->broadcasting_) { | ||||
| if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->MutableData() != nullptr) { | |||||
| if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->data_c() != nullptr) { | |||||
| auto allocator = runtime_->GetAllocator(); | auto allocator = runtime_->GetAllocator(); | ||||
| std::vector<size_t> img_size; | std::vector<size_t> img_size; | ||||
| GetImageSize(0, &img_size); | GetImageSize(0, &img_size); | ||||
| @@ -117,7 +117,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||||
| if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) { | if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) { | ||||
| if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) { | if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) { | ||||
| weight_ptr_ = | weight_ptr_ = | ||||
| allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size); | |||||
| allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to " | MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to " | ||||
| << in_tensors_[0]->data_type(); | << in_tensors_[0]->data_type(); | ||||
| @@ -132,7 +132,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | ||||
| PackNHWCToNC4HW4<float, float>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNC4HW4<float, float>(in_tensors_[1]->data_c(), weight, batch, plane, channel, to_dtype); | |||||
| weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); | weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); | ||||
| delete[] weight; | delete[] weight; | ||||
| } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { | } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { | ||||
| @@ -142,7 +142,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | ||||
| PackNHWCToNC4HW4<float, float16_t>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNC4HW4<float, float16_t>(in_tensors_[1]->data_c(), weight, batch, plane, channel, to_dtype); | |||||
| weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); | weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); | ||||
| delete[] weight; | delete[] weight; | ||||
| } else { | } else { | ||||
| @@ -164,7 +164,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | ||||
| PackNHWCToNHWC4<float, float>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNHWC4<float, float>(in_tensors_[1]->data_c(), weight, batch, plane, channel, to_dtype); | |||||
| weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); | weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); | ||||
| delete[] weight; | delete[] weight; | ||||
| } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { | } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { | ||||
| @@ -174,7 +174,7 @@ int ArithmeticOpenCLKernel::InitBuffer() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | ||||
| PackNHWCToNHWC4<float, float16_t>(in_tensors_[1]->MutableData(), weight, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNHWC4<float, float16_t>(in_tensors_[1]->data_c(), weight, batch, plane, channel, to_dtype); | |||||
| weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); | weight_ptr_ = allocator->CreateImageFromHost(weight, in_tensors_[1]->ElementsNum(), img_size); | ||||
| delete[] weight; | delete[] weight; | ||||
| } else { | } else { | ||||
| @@ -302,23 +302,23 @@ int ArithmeticOpenCLKernel::Run() { | |||||
| MS_LOG(DEBUG) << this->name() << " Running!"; | MS_LOG(DEBUG) << this->name() << " Running!"; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| if (element_flag_) { | if (element_flag_) { | ||||
| void *weight = weight_ptr_ == nullptr ? in_tensors_[1]->MutableData() : weight_ptr_; | |||||
| void *weight = weight_ptr_ == nullptr ? in_tensors_[1]->data_c() : weight_ptr_; | |||||
| runtime_->SetKernelArg(kernel_, arg_idx++, weight); | runtime_->SetKernelArg(kernel_, arg_idx++, weight); | ||||
| } else { | } else { | ||||
| float weight = 0.f; | float weight = 0.f; | ||||
| if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { | if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { | ||||
| weight = static_cast<float *>(in_tensors_[1]->MutableData())[0]; | |||||
| weight = static_cast<float *>(in_tensors_[1]->data_c())[0]; | |||||
| } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { | } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { | ||||
| weight = static_cast<float>(static_cast<float16_t *>(in_tensors_[1]->MutableData())[0]); | |||||
| weight = static_cast<float>(static_cast<float16_t *>(in_tensors_[1]->data_c())[0]); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type(); | MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, weight); | runtime_->SetKernelArg(kernel_, arg_idx++, weight); | ||||
| } | } | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| int H = 0; | int H = 0; | ||||
| int W = 0; | int W = 0; | ||||
| @@ -176,8 +176,8 @@ int ArithmeticSelfOpenCLKernel::Run() { | |||||
| ArithmeticSelfGetWorkGroup(global, &local, max_global[0]); | ArithmeticSelfGetWorkGroup(global, &local, max_global[0]); | ||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); | ||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ||||
| @@ -112,12 +112,12 @@ int BatchNormOpenCLKernel::Run() { | |||||
| std::vector<size_t> global = {OH, OW, OC}; | std::vector<size_t> global = {OH, OW, OC}; | ||||
| BatchNormGetWorkGroup(global, &local, max_global[0]); | BatchNormGetWorkGroup(global, &local, max_global[0]); | ||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); // input tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->MutableData()); // scale | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->MutableData()); // offest | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->MutableData()); // mean | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->MutableData()); // variance | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); // out tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); // scale | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); // offest | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->epsilon_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, param->epsilon_); | ||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ||||
| @@ -47,7 +47,7 @@ void BiasAddOpenCLKernel::InitBuffer() { | |||||
| BiasAdd_ = allocator->Malloc(div_ci * C4NUM * fp_size, img_size); | BiasAdd_ = allocator->Malloc(div_ci * C4NUM * fp_size, img_size); | ||||
| BiasAdd_ = allocator->MapBuffer(BiasAdd_, CL_MAP_WRITE, nullptr, true); | BiasAdd_ = allocator->MapBuffer(BiasAdd_, CL_MAP_WRITE, nullptr, true); | ||||
| memset(BiasAdd_, 0x00, div_ci * C4NUM * fp_size); | memset(BiasAdd_, 0x00, div_ci * C4NUM * fp_size); | ||||
| memcpy(BiasAdd_, in_tensors_[1]->MutableData(), C * fp_size); | |||||
| memcpy(BiasAdd_, in_tensors_[1]->data_c(), C * fp_size); | |||||
| allocator->UnmapBuffer(BiasAdd_); | allocator->UnmapBuffer(BiasAdd_); | ||||
| } | } | ||||
| @@ -93,8 +93,8 @@ int BiasAddOpenCLKernel::Run() { | |||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| std::map<schema::Format, int> data_type{ | std::map<schema::Format, int> data_type{ | ||||
| {schema::Format::Format_NC4, 1}, {schema::Format::Format_NHWC4, 2}, {schema::Format::Format_NC4HW4, 3}}; | {schema::Format::Format_NC4, 1}, {schema::Format::Format_NHWC4, 2}, {schema::Format::Format_NC4HW4, 3}}; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, BiasAdd_); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, BiasAdd_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); | ||||
| @@ -55,11 +55,11 @@ int ConcatOpenCLKernel::RunAxis0() { | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | ||||
| auto allocator_ = ocl_runtime->GetAllocator(); | auto allocator_ = ocl_runtime->GetAllocator(); | ||||
| std::vector<size_t> img_size; | std::vector<size_t> img_size; | ||||
| auto dst_data = out_tensors_[0]->MutableData(); | |||||
| auto dst_data = out_tensors_[0]->data_c(); | |||||
| auto dst_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; | auto dst_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; | ||||
| cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data)); | cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data)); | ||||
| for (int i = 0; i < in_tensors_.size(); i++) { | for (int i = 0; i < in_tensors_.size(); i++) { | ||||
| auto src_data = in_tensors_[i]->MutableData(); | |||||
| auto src_data = in_tensors_[i]->data_c(); | |||||
| allocator_->GetImageSize(src_data, &img_size); | allocator_->GetImageSize(src_data, &img_size); | ||||
| auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; | auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0}; | ||||
| auto region = cl::array<cl::size_type, 3U>{img_size[0], img_size[1], 1}; | auto region = cl::array<cl::size_type, 3U>{img_size[0], img_size[1], 1}; | ||||
| @@ -176,9 +176,9 @@ int ConcatOpenCLKernel::Run() { | |||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| if (in_tensors_.size() == 2) { | if (in_tensors_.size() == 2) { | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, output_shape_); | ||||
| @@ -187,10 +187,10 @@ int ConcatOpenCLKernel::Run() { | |||||
| auto input3_shape = in_tensors_[2]->shape(); | auto input3_shape = in_tensors_[2]->shape(); | ||||
| cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; | cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); | ||||
| @@ -202,11 +202,11 @@ int ConcatOpenCLKernel::Run() { | |||||
| cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; | cl_int4 input_shape3_ = {input3_shape[0], input3_shape[1], input3_shape[2], UP_DIV(input3_shape[3], C4NUM)}; | ||||
| cl_int4 input_shape4_ = {input4_shape[0], input4_shape[1], input4_shape[2], UP_DIV(input4_shape[3], C4NUM)}; | cl_int4 input_shape4_ = {input4_shape[0], input4_shape[1], input4_shape[2], UP_DIV(input4_shape[3], C4NUM)}; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[1]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[2]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape1_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape2_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape3_); | ||||
| @@ -79,7 +79,7 @@ void Conv2dTransposeOpenCLKernel::PadWeight() { | |||||
| padWeight_ = allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size); | padWeight_ = allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size); | ||||
| padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); | padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); | ||||
| memset(padWeight_, 0x00, div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size); | memset(padWeight_, 0x00, div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size); | ||||
| auto origin_weight = in_tensors_.at(kWeightIndex)->MutableData(); | |||||
| auto origin_weight = in_tensors_.at(kWeightIndex)->data_c(); | |||||
| auto weight_dtype = in_tensors_.at(kWeightIndex)->data_type(); | auto weight_dtype = in_tensors_.at(kWeightIndex)->data_type(); | ||||
| int index = 0; | int index = 0; | ||||
| for (int co_i = 0; co_i < div_co; co_i++) { | for (int co_i = 0; co_i < div_co; co_i++) { | ||||
| @@ -136,14 +136,14 @@ void Conv2dTransposeOpenCLKernel::PadWeight() { | |||||
| auto bias_dtype = in_tensors_[2]->data_type(); | auto bias_dtype = in_tensors_[2]->data_type(); | ||||
| if (bias_dtype == kNumberTypeFloat32 && enable_fp16_) { | if (bias_dtype == kNumberTypeFloat32 && enable_fp16_) { | ||||
| for (int i = 0; i < co; i++) { | for (int i = 0; i < co; i++) { | ||||
| reinterpret_cast<float16_t *>(bias_)[i] = reinterpret_cast<float *>(in_tensors_[2]->MutableData())[i]; | |||||
| reinterpret_cast<float16_t *>(bias_)[i] = reinterpret_cast<float *>(in_tensors_[2]->data_c())[i]; | |||||
| } | } | ||||
| } else if (bias_dtype == kNumberTypeFloat16 && !enable_fp16_) { | } else if (bias_dtype == kNumberTypeFloat16 && !enable_fp16_) { | ||||
| for (int i = 0; i < co; i++) { | for (int i = 0; i < co; i++) { | ||||
| reinterpret_cast<float *>(bias_)[i] = reinterpret_cast<float16_t *>(in_tensors_[2]->MutableData())[i]; | |||||
| reinterpret_cast<float *>(bias_)[i] = reinterpret_cast<float16_t *>(in_tensors_[2]->data_c())[i]; | |||||
| } | } | ||||
| } else { | } else { | ||||
| memcpy(bias_, in_tensors_[2]->MutableData(), co * data_size); | |||||
| memcpy(bias_, in_tensors_[2]->data_c(), co * data_size); | |||||
| } | } | ||||
| } | } | ||||
| allocator->UnmapBuffer(bias_); | allocator->UnmapBuffer(bias_); | ||||
| @@ -200,10 +200,10 @@ int Conv2dTransposeOpenCLKernel::Run() { | |||||
| cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), 1}; | cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), 1}; | ||||
| cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), 1}; | cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), 1}; | ||||
| int arg_cnt = 0; | int arg_cnt = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF); | ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_); | ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size); | ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride); | ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding); | ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding); | ||||
| @@ -89,7 +89,7 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() { | |||||
| bool is_fp16 = ocl_runtime->GetFp16Enable(); | bool is_fp16 = ocl_runtime->GetFp16Enable(); | ||||
| // weight: o, h, w, i; o == group, i == 1 | // weight: o, h, w, i; o == group, i == 1 | ||||
| void *origin_weight = in_tensors_.at(kWeightIndex)->MutableData(); | |||||
| void *origin_weight = in_tensors_.at(kWeightIndex)->data_c(); | |||||
| int CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | int CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); | ||||
| int pack_weight_size = C4NUM * CO4 * parameter->kernel_h_ * parameter->kernel_w_; | int pack_weight_size = C4NUM * CO4 * parameter->kernel_h_ * parameter->kernel_w_; | ||||
| @@ -133,7 +133,7 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() { | |||||
| bias_data_ = allocator->MapBuffer(bias_data_, CL_MAP_WRITE, nullptr, true); | bias_data_ = allocator->MapBuffer(bias_data_, CL_MAP_WRITE, nullptr, true); | ||||
| size_t up_co_size = C4NUM * CO4 * dtype_size; | size_t up_co_size = C4NUM * CO4 * dtype_size; | ||||
| memset(bias_data_, 0, up_co_size); | memset(bias_data_, 0, up_co_size); | ||||
| auto ori_bias = in_tensors_.at(kBiasIndex)->MutableData(); | |||||
| auto ori_bias = in_tensors_.at(kBiasIndex)->data_c(); | |||||
| if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) { | if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) { | ||||
| float16_t *bias_ptr = static_cast<float16_t*>(bias_data_); | float16_t *bias_ptr = static_cast<float16_t*>(bias_data_); | ||||
| for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) { | for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) { | ||||
| @@ -207,10 +207,10 @@ int DepthwiseConv2dOpenCLKernel::Run() { | |||||
| (cl_int)out_tensors_[0]->Batch()}; | (cl_int)out_tensors_[0]->Batch()}; | ||||
| int arg_cnt = 0; | int arg_cnt = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF); | ocl_runtime->SetKernelArg(kernel_, arg_cnt++, packed_weight_, lite::opencl::MemType::BUF); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF); | ocl_runtime->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size); | ocl_runtime->SetKernelArg(kernel_, arg_cnt++, kernel_size); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride); | ocl_runtime->SetKernelArg(kernel_, arg_cnt++, stride); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding); | ocl_runtime->SetKernelArg(kernel_, arg_cnt++, padding); | ||||
| @@ -93,8 +93,8 @@ void MatMulOpenCLKernel::PadWeight() { | |||||
| auto padWeightFp32 = reinterpret_cast<float *>(padWeight_); | auto padWeightFp32 = reinterpret_cast<float *>(padWeight_); | ||||
| auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_); | auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_); | ||||
| memset(padWeight_, 0x00, a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size); | memset(padWeight_, 0x00, a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size); | ||||
| auto originWeightFp32 = reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->MutableData()); | |||||
| auto originWeightFp16 = reinterpret_cast<float16_t *>(in_tensors_.at(kWeightIndex)->MutableData()); | |||||
| auto originWeightFp32 = reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c()); | |||||
| auto originWeightFp16 = reinterpret_cast<float16_t *>(in_tensors_.at(kWeightIndex)->data_c()); | |||||
| bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16; | bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16; | ||||
| // pad weight | // pad weight | ||||
| @@ -153,14 +153,14 @@ void MatMulOpenCLKernel::PadWeight() { | |||||
| if (in_tensors_.size() >= 3) { | if (in_tensors_.size() >= 3) { | ||||
| if (in_tensors_[2]->data_type() == kNumberTypeFloat32 && enable_fp16_) { | if (in_tensors_[2]->data_type() == kNumberTypeFloat32 && enable_fp16_) { | ||||
| for (int i = 0; i < co; i++) { | for (int i = 0; i < co; i++) { | ||||
| reinterpret_cast<float16_t *>(bias_)[i] = reinterpret_cast<float *>(in_tensors_[2]->MutableData())[i]; | |||||
| reinterpret_cast<float16_t *>(bias_)[i] = reinterpret_cast<float *>(in_tensors_[2]->data_c())[i]; | |||||
| } | } | ||||
| } else if (in_tensors_[2]->data_type() == kNumberTypeFloat16 && !enable_fp16_) { | } else if (in_tensors_[2]->data_type() == kNumberTypeFloat16 && !enable_fp16_) { | ||||
| for (int i = 0; i < co; i++) { | for (int i = 0; i < co; i++) { | ||||
| reinterpret_cast<float *>(bias_)[i] = reinterpret_cast<float16_t *>(in_tensors_[2]->MutableData())[i]; | |||||
| reinterpret_cast<float *>(bias_)[i] = reinterpret_cast<float16_t *>(in_tensors_[2]->data_c())[i]; | |||||
| } | } | ||||
| } else { | } else { | ||||
| memcpy(bias_, in_tensors_[2]->MutableData(), co * dtype_size); | |||||
| memcpy(bias_, in_tensors_[2]->data_c(), co * dtype_size); | |||||
| } | } | ||||
| } | } | ||||
| allocator->UnmapBuffer(bias_); | allocator->UnmapBuffer(bias_); | ||||
| @@ -210,10 +210,10 @@ int MatMulOpenCLKernel::Run() { | |||||
| int arg_count = 0; | int arg_count = 0; | ||||
| cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; | cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; | ||||
| cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; | cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF); | ocl_runtime->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, bias_); | ocl_runtime->SetKernelArg(kernel_, arg_count++, bias_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, in_shape); | ocl_runtime->SetKernelArg(kernel_, arg_count++, in_shape); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, out_shape); | ocl_runtime->SetKernelArg(kernel_, arg_count++, out_shape); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0); | ocl_runtime->SetKernelArg(kernel_, arg_count++, hasBias_ ? 1 : 0); | ||||
| @@ -135,8 +135,8 @@ int PoolingOpenCLKernel::Run() { | |||||
| cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; | cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, output_shape); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, output_shape); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, stride); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, stride); | ||||
| @@ -50,22 +50,22 @@ void PReluOpenCLKernel::InitBuffer() { | |||||
| if (enable_fp16_) { | if (enable_fp16_) { | ||||
| if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { | if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { | ||||
| auto PReluWeight_fp16 = reinterpret_cast<uint16_t *>(PReluWeight_); | auto PReluWeight_fp16 = reinterpret_cast<uint16_t *>(PReluWeight_); | ||||
| auto in_tensor_data_fp32 = reinterpret_cast<float *>(in_tensors_[1]->MutableData()); | |||||
| auto in_tensor_data_fp32 = reinterpret_cast<float *>(in_tensors_[1]->data_c()); | |||||
| for (int i = 0; i < elem_num; i++) { | for (int i = 0; i < elem_num; i++) { | ||||
| PReluWeight_fp16[i] = static_cast<float16_t>(in_tensor_data_fp32[i]); | PReluWeight_fp16[i] = static_cast<float16_t>(in_tensor_data_fp32[i]); | ||||
| } | } | ||||
| } else { | } else { | ||||
| memcpy(PReluWeight_, in_tensors_[1]->MutableData(), elem_num * fp_size); | |||||
| memcpy(PReluWeight_, in_tensors_[1]->data_c(), elem_num * fp_size); | |||||
| } | } | ||||
| } else { | } else { | ||||
| if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { | if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { | ||||
| auto PReluWeight_fp32 = reinterpret_cast<float *>(PReluWeight_); | auto PReluWeight_fp32 = reinterpret_cast<float *>(PReluWeight_); | ||||
| auto in_tensor_data_fp16 = reinterpret_cast<float16_t *>(in_tensors_[1]->MutableData()); | |||||
| auto in_tensor_data_fp16 = reinterpret_cast<float16_t *>(in_tensors_[1]->data_c()); | |||||
| for (int i = 0; i < elem_num; i++) { | for (int i = 0; i < elem_num; i++) { | ||||
| PReluWeight_fp32[i] = static_cast<float>(in_tensor_data_fp16[i]); | PReluWeight_fp32[i] = static_cast<float>(in_tensor_data_fp16[i]); | ||||
| } | } | ||||
| } else { | } else { | ||||
| memcpy(PReluWeight_, in_tensors_[1]->MutableData(), elem_num * fp_size); | |||||
| memcpy(PReluWeight_, in_tensors_[1]->data_c(), elem_num * fp_size); | |||||
| } | } | ||||
| } | } | ||||
| allocator->UnmapBuffer(PReluWeight_); | allocator->UnmapBuffer(PReluWeight_); | ||||
| @@ -110,8 +110,8 @@ int PReluOpenCLKernel::Run() { | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | ||||
| std::map<schema::Format, int> data_type{{schema::Format::Format_NHWC4, 1}, {schema::Format::Format_NC4HW4, 2}}; | std::map<schema::Format, int> data_type{{schema::Format::Format_NHWC4, 1}, {schema::Format::Format_NC4HW4, 2}}; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, input_shape_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, PReluWeight_); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, PReluWeight_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, data_type[op_format_]); | ||||
| @@ -135,8 +135,8 @@ int ReduceOpenCLKernel::Run() { | |||||
| std::vector<size_t> global = {static_cast<size_t>(c4)}; | std::vector<size_t> global = {static_cast<size_t>(c4)}; | ||||
| cl_int4 size = {h, w, c4, 1}; | cl_int4 size = {h, w, c4, 1}; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, size); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, size); | ||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -36,7 +36,10 @@ int ReshapeOpenCLKernel::Init() { | |||||
| kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | kernel_name += "_" + std::string(EnumNameFormat(op_format_)); | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | ||||
| enable_fp16_ = ocl_runtime->GetFp16Enable(); | enable_fp16_ = ocl_runtime->GetFp16Enable(); | ||||
| if (out_tensors_[0]->shape().size() != 2 && out_tensors_[0]->shape().size() != 4) { | |||||
| MS_LOG(ERROR) << "Reshape output size should in 2,4"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) { | if (in_tensors_[0]->shape().back() != out_tensors_[0]->shape().back()) { | ||||
| MS_LOG(ERROR) << "Reshape input channel " << in_tensors_[0]->shape().back() << " should equal output channel" | MS_LOG(ERROR) << "Reshape input channel " << in_tensors_[0]->shape().back() << " should equal output channel" | ||||
| << out_tensors_[0]->shape().back(); | << out_tensors_[0]->shape().back(); | ||||
| @@ -115,8 +118,8 @@ int ReshapeOpenCLKernel::Run() { | |||||
| cl_int4 size = {h, w, c4, 1}; | cl_int4 size = {h, w, c4, 1}; | ||||
| cl_int4 size_out = {oh, ow, c4, 1}; | cl_int4 size_out = {oh, ow, c4, 1}; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, size); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, size); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, size_out); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, size_out); | ||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ||||
| @@ -110,7 +110,7 @@ int ScaleOpenCLKernel::InitBuffer() { | |||||
| if (!element_flag_) { | if (!element_flag_) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->MutableData() != nullptr) { | |||||
| if (in_tensors_[1]->category() == lite::Tensor::Category::CONST && in_tensors_[1]->data_c() != nullptr) { | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | auto allocator = ocl_runtime_->GetAllocator(); | ||||
| std::vector<size_t> img_size; | std::vector<size_t> img_size; | ||||
| GetImageSize(0, &img_size); | GetImageSize(0, &img_size); | ||||
| @@ -118,9 +118,9 @@ int ScaleOpenCLKernel::InitBuffer() { | |||||
| img_size[0] = 1; | img_size[0] = 1; | ||||
| img_size[1] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM); | img_size[1] = UP_DIV(in_tensors_[1]->shape()[0], C4NUM); | ||||
| scale_ptr_ = | scale_ptr_ = | ||||
| allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size); | |||||
| allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size); | |||||
| offset_ptr_ = | offset_ptr_ = | ||||
| allocator->CreateImageFromHost(in_tensors_[2]->MutableData(), in_tensors_[2]->ElementsNum(), img_size); | |||||
| allocator->CreateImageFromHost(in_tensors_[2]->data_c(), in_tensors_[2]->ElementsNum(), img_size); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int pack_weight_size = in_tensors_[1]->ElementsC4Num(); | int pack_weight_size = in_tensors_[1]->ElementsC4Num(); | ||||
| @@ -130,9 +130,9 @@ int ScaleOpenCLKernel::InitBuffer() { | |||||
| if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) { | if (in_tensors_[0]->GetFormat() == in_tensors_[1]->GetFormat()) { | ||||
| if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) { | if (in_tensors_[0]->data_type() == in_tensors_[1]->data_type()) { | ||||
| scale_ptr_ = | scale_ptr_ = | ||||
| allocator->CreateImageFromHost(in_tensors_[1]->MutableData(), in_tensors_[1]->ElementsNum(), img_size); | |||||
| allocator->CreateImageFromHost(in_tensors_[1]->data_c(), in_tensors_[1]->ElementsNum(), img_size); | |||||
| offset_ptr_ = | offset_ptr_ = | ||||
| allocator->CreateImageFromHost(in_tensors_[2]->MutableData(), in_tensors_[2]->ElementsNum(), img_size); | |||||
| allocator->CreateImageFromHost(in_tensors_[2]->data_c(), in_tensors_[2]->ElementsNum(), img_size); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to " | MS_LOG(ERROR) << "Unsupport data type transpose from " << in_tensors_[1]->data_type() << "to " | ||||
| << in_tensors_[0]->data_type(); | << in_tensors_[0]->data_type(); | ||||
| @@ -153,8 +153,8 @@ int ScaleOpenCLKernel::InitBuffer() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | ||||
| PackNHWCToNC4HW4<float, float>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNC4HW4<float, float>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNC4HW4<float, float>(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNC4HW4<float, float>(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); | |||||
| scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); | scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); | ||||
| offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); | offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); | ||||
| delete[] scale; | delete[] scale; | ||||
| @@ -172,8 +172,8 @@ int ScaleOpenCLKernel::InitBuffer() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | ||||
| PackNHWCToNC4HW4<float, float16_t>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNC4HW4<float, float16_t>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNC4HW4<float, float16_t>(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNC4HW4<float, float16_t>(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); | |||||
| scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); | scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); | ||||
| offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); | offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); | ||||
| delete[] scale; | delete[] scale; | ||||
| @@ -203,8 +203,8 @@ int ScaleOpenCLKernel::InitBuffer() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | std::function<float(float)> to_dtype = [](float x) -> float { return x; }; | ||||
| PackNHWCToNHWC4<float, float>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNHWC4<float, float>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNHWC4<float, float>(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNHWC4<float, float>(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); | |||||
| scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); | scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); | ||||
| offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); | offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); | ||||
| delete[] scale; | delete[] scale; | ||||
| @@ -222,8 +222,8 @@ int ScaleOpenCLKernel::InitBuffer() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | std::function<float16_t(float)> to_dtype = [](float x) -> float16_t { return static_cast<float16_t>(x); }; | ||||
| PackNHWCToNHWC4<float, float16_t>(in_tensors_[1]->MutableData(), scale, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNHWC4<float, float16_t>(in_tensors_[2]->MutableData(), offset, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNHWC4<float, float16_t>(in_tensors_[1]->data_c(), scale, batch, plane, channel, to_dtype); | |||||
| PackNHWCToNHWC4<float, float16_t>(in_tensors_[2]->data_c(), offset, batch, plane, channel, to_dtype); | |||||
| scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); | scale_ptr_ = allocator->CreateImageFromHost(scale, in_tensors_[1]->ElementsNum(), img_size); | ||||
| offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); | offset_ptr_ = allocator->CreateImageFromHost(offset, in_tensors_[2]->ElementsNum(), img_size); | ||||
| delete[] scale; | delete[] scale; | ||||
| @@ -309,27 +309,27 @@ int ScaleOpenCLKernel::Run() { | |||||
| MS_LOG(DEBUG) << this->name() << " Running!"; | MS_LOG(DEBUG) << this->name() << " Running!"; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| if (element_flag_) { | if (element_flag_) { | ||||
| void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->MutableData() : scale_ptr_; | |||||
| void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->MutableData() : offset_ptr_; | |||||
| void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->data_c() : scale_ptr_; | |||||
| void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->data_c() : offset_ptr_; | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); | ||||
| } else { | } else { | ||||
| if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { | if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { | ||||
| float scale = static_cast<float *>(in_tensors_[1]->MutableData())[0]; | |||||
| float offset = static_cast<float *>(in_tensors_[2]->MutableData())[0]; | |||||
| float scale = static_cast<float *>(in_tensors_[1]->data_c())[0]; | |||||
| float offset = static_cast<float *>(in_tensors_[2]->data_c())[0]; | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); | ||||
| } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { | } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { | ||||
| if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { | if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { | ||||
| float scale = static_cast<float *>(in_tensors_[1]->MutableData())[0]; | |||||
| float offset = static_cast<float *>(in_tensors_[2]->MutableData())[0]; | |||||
| float scale = static_cast<float *>(in_tensors_[1]->data_c())[0]; | |||||
| float offset = static_cast<float *>(in_tensors_[2]->data_c())[0]; | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale)); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale)); | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset)); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset)); | ||||
| } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { | } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { | ||||
| float16_t scale = static_cast<float16_t *>(in_tensors_[1]->MutableData())[0]; | |||||
| float16_t offset = static_cast<float16_t *>(in_tensors_[2]->MutableData())[0]; | |||||
| float16_t scale = static_cast<float16_t *>(in_tensors_[1]->data_c())[0]; | |||||
| float16_t offset = static_cast<float16_t *>(in_tensors_[2]->data_c())[0]; | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale)); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale)); | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset)); | ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset)); | ||||
| } else { | } else { | ||||
| @@ -338,7 +338,7 @@ int ScaleOpenCLKernel::Run() { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| int H = 0; | int H = 0; | ||||
| int W = 0; | int W = 0; | ||||
| if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { | if (out_tensors_[0]->GetFormat() == schema::Format_NC4HW4) { | ||||
| @@ -110,8 +110,8 @@ int SliceOpenCLKernel::Run() { | |||||
| std::vector<size_t> global = {1, OH, OW}; | std::vector<size_t> global = {1, OH, OW}; | ||||
| SlcieGetWorkGroup(global, &local, max_global[0]); | SlcieGetWorkGroup(global, &local, max_global[0]); | ||||
| int arg_cn = 0; | int arg_cn = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->MutableData()); // input tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->MutableData()); // out tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, input_shape_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, size_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, size_); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_cn++, begin_); | ocl_runtime->SetKernelArg(kernel_, arg_cn++, begin_); | ||||
| @@ -158,11 +158,11 @@ int SoftmaxOpenCLKernel::Run() { | |||||
| auto mask_ = GetMaskForLastChannel(channel_size); | auto mask_ = GetMaskForLastChannel(channel_size); | ||||
| cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]}; | cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]}; | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| if (is_image_out_) { | if (is_image_out_) { | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| } else { | } else { | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData(), lite::opencl::MemType::BUF); | |||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||||
| } | } | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, mask); | runtime_->SetKernelArg(kernel_, arg_idx++, mask); | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, slices); | runtime_->SetKernelArg(kernel_, arg_idx++, slices); | ||||
| @@ -172,11 +172,11 @@ int SoftmaxOpenCLKernel::Run() { | |||||
| int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); | int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); | ||||
| cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices}; | cl_int4 input_shape = {in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], in_tensors_[0]->shape()[3], slices}; | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| if (is_image_out_) { | if (is_image_out_) { | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| } else { | } else { | ||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData(), lite::opencl::MemType::BUF); | |||||
| runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||||
| } | } | ||||
| runtime_->SetKernelArg(kernel_, arg_idx, input_shape); | runtime_->SetKernelArg(kernel_, arg_idx, input_shape); | ||||
| SetWorkGroupSize(); | SetWorkGroupSize(); | ||||
| @@ -76,6 +76,14 @@ int ToFormatOpenCLKernel::InitNHWCShape() { | |||||
| nhwc_shape_ = {n, h, w, c}; | nhwc_shape_ = {n, h, w, c}; | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| if (shapex.size() == 3) { | |||||
| n = 1; | |||||
| h = 1; | |||||
| w = 1; | |||||
| c = 1; | |||||
| nhwc_shape_ = {n, h, w, c}; | |||||
| return RET_OK; | |||||
| } | |||||
| if (out_tensors_[0]->GetFormat() == schema::Format::Format_NC4HW4 || | if (out_tensors_[0]->GetFormat() == schema::Format::Format_NC4HW4 || | ||||
| out_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4 || | out_tensors_[0]->GetFormat() == schema::Format::Format_NHWC4 || | ||||
| out_tensors_[0]->GetFormat() == schema::Format::Format_NHWC) { | out_tensors_[0]->GetFormat() == schema::Format::Format_NHWC) { | ||||
| @@ -159,8 +167,8 @@ int ToFormatOpenCLKernel::Run() { | |||||
| cl_int4 gsize{(cl_int)global[0], (cl_int)global[1], (cl_int)global[2], 1}; | cl_int4 gsize{(cl_int)global[0], (cl_int)global[1], (cl_int)global[2], 1}; | ||||
| auto src_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG; | auto src_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG; | ||||
| auto dst_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::IMG : lite::opencl::MemType::BUF; | auto dst_mem_type = (out_mem_type_ == OpenCLMemType::IMG) ? lite::opencl::MemType::IMG : lite::opencl::MemType::BUF; | ||||
| ocl_runtime->SetKernelArg(kernel_, 0, in_tensors_[0]->MutableData(), src_mem_type); | |||||
| ocl_runtime->SetKernelArg(kernel_, 1, out_tensors_[0]->MutableData(), dst_mem_type); | |||||
| ocl_runtime->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type); | |||||
| ocl_runtime->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type); | |||||
| ocl_runtime->SetKernelArg(kernel_, 2, gsize); | ocl_runtime->SetKernelArg(kernel_, 2, gsize); | ||||
| ocl_runtime->SetKernelArg(kernel_, 3, shape); | ocl_runtime->SetKernelArg(kernel_, 3, shape); | ||||
| ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ocl_runtime->RunKernel(kernel_, global, local, nullptr); | ||||
| @@ -121,11 +121,11 @@ int TransposeOpenCLKernel::Run() { | |||||
| cl_int2 HW = {h * w, hw4}; | cl_int2 HW = {h * w, hw4}; | ||||
| cl_int2 C = {c, c4}; | cl_int2 C = {c, c4}; | ||||
| int arg_idx = 0; | int arg_idx = 0; | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); | |||||
| if (out_mem_type_ == OpenCLMemType::BUF) { | if (out_mem_type_ == OpenCLMemType::BUF) { | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData(), lite::opencl::MemType::BUF); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); | |||||
| } else { | } else { | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->MutableData()); | |||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); | |||||
| } | } | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, HW); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, HW); | ||||
| ocl_runtime->SetKernelArg(kernel_, arg_idx++, C); | ocl_runtime->SetKernelArg(kernel_, arg_idx++, C); | ||||
| @@ -312,7 +312,7 @@ int SubGraphOpenCLKernel::ReSize() { return RET_OK; } | |||||
| int SubGraphOpenCLKernel::Run() { | int SubGraphOpenCLKernel::Run() { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | ||||
| for (auto &tensor : in_tensors_) { | for (auto &tensor : in_tensors_) { | ||||
| allocator_->UnmapBuffer(tensor->MutableData()); | |||||
| allocator_->UnmapBuffer(tensor->data_c()); | |||||
| } | } | ||||
| lite::opencl::OpenCLExecutor executor; | lite::opencl::OpenCLExecutor executor; | ||||
| @@ -127,6 +127,7 @@ class OpenCLRuntime { | |||||
| int UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const; | int UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue = nullptr) const; | ||||
| int UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue = nullptr) const; | int UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue = nullptr) const; | ||||
| bool SyncCommandQueue(cl::CommandQueue *command_queue = nullptr); | bool SyncCommandQueue(cl::CommandQueue *command_queue = nullptr); | ||||
| bool IsInitOK() {return init_done_;} | |||||
| /** | /** | ||||
| * Get kernel max worker group size. | * Get kernel max worker group size. | ||||
| @@ -24,6 +24,7 @@ | |||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #if SUPPORT_GPU | #if SUPPORT_GPU | ||||
| #include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | #include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #endif | #endif | ||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| @@ -242,7 +243,8 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tens | |||||
| MS_ASSERT(primitive != nullptr); | MS_ASSERT(primitive != nullptr); | ||||
| TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors); | TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors); | ||||
| kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, static_cast<schema::PrimitiveType>(primitive->Type())}; | kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, static_cast<schema::PrimitiveType>(primitive->Type())}; | ||||
| if (context_->device_type_ == DT_GPU) { | |||||
| #if SUPPORT_GPU | |||||
| if (context_->device_type_ == DT_GPU && lite::opencl::OpenCLRuntime::GetInstance()->IsInitOK()) { | |||||
| desc.arch = kernel::KERNEL_ARCH::kGPU; | desc.arch = kernel::KERNEL_ARCH::kGPU; | ||||
| auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, desc); | auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, desc); | ||||
| if (kernel != nullptr) { | if (kernel != nullptr) { | ||||
| @@ -254,7 +256,7 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tens | |||||
| << node->name_; | << node->name_; | ||||
| } | } | ||||
| } | } | ||||
| #endif | |||||
| desc.arch = kernel::KERNEL_ARCH::kCPU; | desc.arch = kernel::KERNEL_ARCH::kCPU; | ||||
| kernel::LiteKernel *kernel = nullptr; | kernel::LiteKernel *kernel = nullptr; | ||||
| if ((context_->float16_priority && data_type == kNumberTypeFloat32) || data_type == kNumberTypeFloat16) { | if ((context_->float16_priority && data_type == kNumberTypeFloat32) || data_type == kNumberTypeFloat16) { | ||||
| @@ -50,7 +50,7 @@ void LoadActivationData(void *dst, size_t dst_size, const std::string &file_path | |||||
| template <typename T> | template <typename T> | ||||
| void CompareRes(lite::Tensor *output_tensor, const std::string &standard_answer_file) { | void CompareRes(lite::Tensor *output_tensor, const std::string &standard_answer_file) { | ||||
| auto *output_data = reinterpret_cast<T *>(output_tensor->MutableData()); | |||||
| auto *output_data = reinterpret_cast<T *>(output_tensor->data_c()); | |||||
| size_t output_size = output_tensor->Size(); | size_t output_size = output_tensor->Size(); | ||||
| auto expect_data = reinterpret_cast<T *>(mindspore::lite::ReadFile(standard_answer_file.c_str(), &output_size)); | auto expect_data = reinterpret_cast<T *>(mindspore::lite::ReadFile(standard_answer_file.c_str(), &output_size)); | ||||
| constexpr float atol = 0.001; | constexpr float atol = 0.001; | ||||
| @@ -70,7 +70,7 @@ void CompareRes(lite::Tensor *output_tensor, const std::string &standard_answer_ | |||||
| template <typename T> | template <typename T> | ||||
| void printf_tensor(const std::string &str, mindspore::lite::Tensor *in_data) { | void printf_tensor(const std::string &str, mindspore::lite::Tensor *in_data) { | ||||
| MS_LOG(INFO) << str; | MS_LOG(INFO) << str; | ||||
| auto input_data = reinterpret_cast<T *>(in_data->MutableData()); | |||||
| auto input_data = reinterpret_cast<T *>(in_data->data_c()); | |||||
| for (int i = 0; i < in_data->ElementsNum(); ++i) { | for (int i = 0; i < in_data->ElementsNum(); ++i) { | ||||
| printf("%f ", input_data[i]); | printf("%f ", input_data[i]); | ||||
| } | } | ||||
| @@ -107,7 +107,7 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { | |||||
| std::vector<lite::Tensor *> inputs{input_tensor}; | std::vector<lite::Tensor *> inputs{input_tensor}; | ||||
| std::vector<lite::Tensor *> outputs{output_tensor}; | std::vector<lite::Tensor *> outputs{output_tensor}; | ||||
| inputs[0]->MallocData(allocator); | inputs[0]->MallocData(allocator); | ||||
| LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); | |||||
| LoadActivationData(inputs[0]->data_c(), inputs[0]->Size(), in_file); | |||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| printf_tensor<float16_t>("ReluFp16:--input data---", inputs[0]); | printf_tensor<float16_t>("ReluFp16:--input data---", inputs[0]); | ||||
| } else { | } else { | ||||
| @@ -221,7 +221,7 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { | |||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| inputs[0]->MallocData(allocator); | inputs[0]->MallocData(allocator); | ||||
| MS_LOG(INFO) << "Initialize input data"; | MS_LOG(INFO) << "Initialize input data"; | ||||
| LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); | |||||
| LoadActivationData(inputs[0]->data_c(), inputs[0]->Size(), in_file); | |||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| printf_tensor<float16_t>("Relu6:FP16--input data--", inputs[0]); | printf_tensor<float16_t>("Relu6:FP16--input data--", inputs[0]); | ||||
| } else { | } else { | ||||
| @@ -336,7 +336,7 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { | |||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| inputs[0]->MallocData(allocator); | inputs[0]->MallocData(allocator); | ||||
| MS_LOG(INFO) << "Initialize input data"; | MS_LOG(INFO) << "Initialize input data"; | ||||
| LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); | |||||
| LoadActivationData(inputs[0]->data_c(), inputs[0]->Size(), in_file); | |||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| printf_tensor<float16_t>("Sigmoid:FP16--input data--", inputs[0]); | printf_tensor<float16_t>("Sigmoid:FP16--input data--", inputs[0]); | ||||
| } else { | } else { | ||||
| @@ -451,7 +451,7 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { | |||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| inputs[0]->MallocData(allocator); | inputs[0]->MallocData(allocator); | ||||
| MS_LOG(INFO) << "Initialize input data"; | MS_LOG(INFO) << "Initialize input data"; | ||||
| LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); | |||||
| LoadActivationData(inputs[0]->data_c(), inputs[0]->Size(), in_file); | |||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| printf_tensor<float16_t>("Leaky Relu:FP16--input data--", inputs[0]); | printf_tensor<float16_t>("Leaky Relu:FP16--input data--", inputs[0]); | ||||
| } else { | } else { | ||||
| @@ -566,7 +566,7 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { | |||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| inputs[0]->MallocData(allocator); | inputs[0]->MallocData(allocator); | ||||
| MS_LOG(INFO) << "Initialize input data"; | MS_LOG(INFO) << "Initialize input data"; | ||||
| LoadActivationData(inputs[0]->MutableData(), inputs[0]->Size(), in_file); | |||||
| LoadActivationData(inputs[0]->data_c(), inputs[0]->Size(), in_file); | |||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| printf_tensor<float16_t>("Tanh:FP16--input data--", inputs[0]); | printf_tensor<float16_t>("Tanh:FP16--input data--", inputs[0]); | ||||
| } else { | } else { | ||||
| @@ -114,11 +114,11 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { | |||||
| } | } | ||||
| sub_graph->Init(); | sub_graph->Init(); | ||||
| MS_LOG(INFO) << " initialize input data "; | MS_LOG(INFO) << " initialize input data "; | ||||
| memcpy(inputs[0]->MutableData(), input_data1, input1_size); | |||||
| memcpy(inputs[0]->data_c(), input_data1, input1_size); | |||||
| std::cout << "==================output data================" << std::endl; | std::cout << "==================output data================" << std::endl; | ||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->MutableData()); | |||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | |||||
| CompareOutputData1(input_data1, output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | CompareOutputData1(input_data1, output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | ||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| @@ -122,7 +122,7 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh | |||||
| inputs.push_back(tensor_b); | inputs.push_back(tensor_b); | ||||
| } else { | } else { | ||||
| tensor_b->MallocData(); | tensor_b->MallocData(); | ||||
| memcpy(tensor_b->MutableData(), data_b, sizeof(T)); | |||||
| memcpy(tensor_b->data_c(), data_b, sizeof(T)); | |||||
| } | } | ||||
| std::vector<lite::Tensor *> outputs = {tensor_c}; | std::vector<lite::Tensor *> outputs = {tensor_c}; | ||||
| @@ -178,19 +178,19 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh | |||||
| } | } | ||||
| kernel->Init(); | kernel->Init(); | ||||
| memcpy(inputs[0]->MutableData(), data_a, sizeof(T) * element_num); | |||||
| memcpy(inputs[0]->data_c(), data_a, sizeof(T) * element_num); | |||||
| if (!is_bias_add) { | if (!is_bias_add) { | ||||
| memcpy(inputs[1]->MutableData(), data_b, sizeof(T) * element_num_b); | |||||
| memcpy(inputs[1]->data_c(), data_b, sizeof(T) * element_num_b); | |||||
| } | } | ||||
| kernel->Run(); | kernel->Run(); | ||||
| memcpy(data_c_ocl, outputs[0]->MutableData(), sizeof(T) * element_num); | |||||
| memcpy(data_c_ocl, outputs[0]->data_c(), sizeof(T) * element_num); | |||||
| LogData<T>(data_a, 10, "Data A : "); | LogData<T>(data_a, 10, "Data A : "); | ||||
| LogData<T>(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : "); | LogData<T>(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : "); | ||||
| LogData<T>(data_c_cpu, 10, "Expect compute : "); | LogData<T>(data_c_cpu, 10, "Expect compute : "); | ||||
| LogData<T>(outputs[0]->MutableData(), 10, "OpenCL compute : "); | |||||
| LogData<T>(outputs[0]->data_c(), 10, "OpenCL compute : "); | |||||
| bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num); | bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num); | ||||
| MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!"); | MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!"); | ||||
| EXPECT_EQ(true, cmp); | EXPECT_EQ(true, cmp); | ||||
| @@ -111,14 +111,14 @@ void RunTestCaseAvgPooling(const std::vector<int> &shape, void *input_data, void | |||||
| return; | return; | ||||
| } | } | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, inputs[0]->ElementsNum() * dtype_size); | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| 2e-2); | 2e-2); | ||||
| } else { | } else { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast<float>(1e-5)); | |||||
| CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast<float>(1e-5)); | |||||
| } | } | ||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| outputs[0]->SetData(nullptr); | outputs[0]->SetData(nullptr); | ||||
| @@ -130,15 +130,15 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { | |||||
| } | } | ||||
| sub_graph->Init(); | sub_graph->Init(); | ||||
| MS_LOG(INFO) << " init tensors "; | MS_LOG(INFO) << " init tensors "; | ||||
| memcpy(inputs[0]->MutableData(), input_data, input_size); | |||||
| memcpy(inputs[1]->MutableData(), scale_data, scale_size); | |||||
| memcpy(inputs[2]->MutableData(), offset_data, offset_size); | |||||
| memcpy(inputs[3]->MutableData(), mean_data, mean_size); | |||||
| memcpy(inputs[4]->MutableData(), var_data, var_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, input_size); | |||||
| memcpy(inputs[1]->data_c(), scale_data, scale_size); | |||||
| memcpy(inputs[2]->data_c(), offset_data, offset_size); | |||||
| memcpy(inputs[3]->data_c(), mean_data, mean_size); | |||||
| memcpy(inputs[4]->data_c(), var_data, var_size); | |||||
| std::cout << "==================output data================" << std::endl; | std::cout << "==================output data================" << std::endl; | ||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->MutableData()); | |||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | |||||
| CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.01); | CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.01); | ||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| @@ -247,15 +247,15 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { | |||||
| } | } | ||||
| sub_graph->Init(); | sub_graph->Init(); | ||||
| MS_LOG(INFO) << " init tensors "; | MS_LOG(INFO) << " init tensors "; | ||||
| memcpy(inputs[0]->MutableData(), input_data, input_size); | |||||
| memcpy(inputs[1]->MutableData(), scale_data, scale_size); | |||||
| memcpy(inputs[2]->MutableData(), offset_data, offset_size); | |||||
| memcpy(inputs[3]->MutableData(), mean_data, mean_size); | |||||
| memcpy(inputs[4]->MutableData(), var_data, var_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, input_size); | |||||
| memcpy(inputs[1]->data_c(), scale_data, scale_size); | |||||
| memcpy(inputs[2]->data_c(), offset_data, offset_size); | |||||
| memcpy(inputs[3]->data_c(), mean_data, mean_size); | |||||
| memcpy(inputs[4]->data_c(), var_data, var_size); | |||||
| std::cout << "==================output data================" << std::endl; | std::cout << "==================output data================" << std::endl; | ||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->MutableData()); | |||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | |||||
| CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | ||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| @@ -43,7 +43,7 @@ void LoadDataBiasAdd(void *dst, size_t dst_size, const std::string &file_path) { | |||||
| template <typename T> | template <typename T> | ||||
| void CompareOutBiasAdd(lite::Tensor *output_tensor, const std::string &standard_answer_file) { | void CompareOutBiasAdd(lite::Tensor *output_tensor, const std::string &standard_answer_file) { | ||||
| size_t output_size = output_tensor->ElementsNum(); | size_t output_size = output_tensor->ElementsNum(); | ||||
| auto output_data = reinterpret_cast<T *>(output_tensor->MutableData()); | |||||
| auto output_data = reinterpret_cast<T *>(output_tensor->data_c()); | |||||
| auto expect_data = reinterpret_cast<T *>(mindspore::lite::ReadFile(standard_answer_file.c_str(), &output_size)); | auto expect_data = reinterpret_cast<T *>(mindspore::lite::ReadFile(standard_answer_file.c_str(), &output_size)); | ||||
| constexpr float atol = 0.0002; | constexpr float atol = 0.0002; | ||||
| for (int i = 0; i < output_tensor->ElementsNum(); ++i) { | for (int i = 0; i < output_tensor->ElementsNum(); ++i) { | ||||
| @@ -62,7 +62,7 @@ void CompareOutBiasAdd(lite::Tensor *output_tensor, const std::string &standard_ | |||||
| template <typename T> | template <typename T> | ||||
| void printf_tensor_BiasAdd(const std::string log, mindspore::lite::Tensor *in_data, int size) { | void printf_tensor_BiasAdd(const std::string log, mindspore::lite::Tensor *in_data, int size) { | ||||
| MS_LOG(INFO) << log; | MS_LOG(INFO) << log; | ||||
| auto input_data = reinterpret_cast<T *>(in_data->MutableData()); | |||||
| auto input_data = reinterpret_cast<T *>(in_data->data_c()); | |||||
| for (int i = 0; i < size; ++i) { | for (int i = 0; i < size; ++i) { | ||||
| printf("%f ", input_data[i]); | printf("%f ", input_data[i]); | ||||
| } | } | ||||
| @@ -114,8 +114,8 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { | |||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| inputs[0]->MallocData(allocator); | inputs[0]->MallocData(allocator); | ||||
| inputs[1]->MallocData(allocator); | inputs[1]->MallocData(allocator); | ||||
| LoadDataBiasAdd(input_tensor->MutableData(), input_tensor->Size(), in_file); | |||||
| LoadDataBiasAdd(weight_tensor->MutableData(), weight_tensor->Size(), weight_file); | |||||
| LoadDataBiasAdd(input_tensor->data_c(), input_tensor->Size(), in_file); | |||||
| LoadDataBiasAdd(weight_tensor->data_c(), weight_tensor->Size(), weight_file); | |||||
| if (ocl_runtime->GetFp16Enable()) { | if (ocl_runtime->GetFp16Enable()) { | ||||
| printf_tensor_BiasAdd<float16_t>("BiasAdd:FP16--input data", inputs[0], input_tensor->ElementsNum()); | printf_tensor_BiasAdd<float16_t>("BiasAdd:FP16--input data", inputs[0], input_tensor->ElementsNum()); | ||||
| printf_tensor_BiasAdd<float16_t>("BiasAdd:FP16--weight data", inputs[1], weight_tensor->ElementsNum()); | printf_tensor_BiasAdd<float16_t>("BiasAdd:FP16--weight data", inputs[1], weight_tensor->ElementsNum()); | ||||
| @@ -138,24 +138,24 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis3) { | |||||
| sub_graph->Init(); | sub_graph->Init(); | ||||
| MS_LOG(INFO) << " initialize input data "; | MS_LOG(INFO) << " initialize input data "; | ||||
| if (inputs.size() == 2) { | if (inputs.size() == 2) { | ||||
| memcpy(inputs[0]->MutableData(), input_data1, input1_size); | |||||
| memcpy(inputs[1]->MutableData(), input_data2, input2_size); | |||||
| memcpy(inputs[0]->data_c(), input_data1, input1_size); | |||||
| memcpy(inputs[1]->data_c(), input_data2, input2_size); | |||||
| } else if (inputs.size() == 3) { | } else if (inputs.size() == 3) { | ||||
| memcpy(inputs[0]->MutableData(), input_data1, input1_size); | |||||
| memcpy(inputs[1]->MutableData(), input_data2, input2_size); | |||||
| memcpy(inputs[2]->MutableData(), input_data3, input3_size); | |||||
| memcpy(inputs[0]->data_c(), input_data1, input1_size); | |||||
| memcpy(inputs[1]->data_c(), input_data2, input2_size); | |||||
| memcpy(inputs[2]->data_c(), input_data3, input3_size); | |||||
| } else if (inputs.size() == 4) { | } else if (inputs.size() == 4) { | ||||
| memcpy(inputs[0]->MutableData(), input_data1, input1_size); | |||||
| memcpy(inputs[1]->MutableData(), input_data2, input2_size); | |||||
| memcpy(inputs[2]->MutableData(), input_data3, input3_size); | |||||
| memcpy(inputs[3]->MutableData(), input_data4, input4_size); | |||||
| memcpy(inputs[0]->data_c(), input_data1, input1_size); | |||||
| memcpy(inputs[1]->data_c(), input_data2, input2_size); | |||||
| memcpy(inputs[2]->data_c(), input_data3, input3_size); | |||||
| memcpy(inputs[3]->data_c(), input_data4, input4_size); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << " input size must be 2 or 3 or 4"; | MS_LOG(ERROR) << " input size must be 2 or 3 or 4"; | ||||
| } | } | ||||
| std::cout << "==================output data================" << std::endl; | std::cout << "==================output data================" << std::endl; | ||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->MutableData()); | |||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | |||||
| CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | ||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| @@ -263,19 +263,19 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { | |||||
| sub_graph->Init(); | sub_graph->Init(); | ||||
| MS_LOG(INFO) << " initialize input data "; | MS_LOG(INFO) << " initialize input data "; | ||||
| if (inputs.size() == 2) { | if (inputs.size() == 2) { | ||||
| memcpy(inputs[0]->MutableData(), input_data1, input1_size); | |||||
| memcpy(inputs[1]->MutableData(), input_data2, input2_size); | |||||
| memcpy(inputs[0]->data_c(), input_data1, input1_size); | |||||
| memcpy(inputs[1]->data_c(), input_data2, input2_size); | |||||
| } else if (inputs.size() == 3) { | } else if (inputs.size() == 3) { | ||||
| memcpy(inputs[0]->MutableData(), input_data1, input1_size); | |||||
| memcpy(inputs[1]->MutableData(), input_data2, input2_size); | |||||
| memcpy(inputs[2]->MutableData(), input_data3, input3_size); | |||||
| memcpy(inputs[0]->data_c(), input_data1, input1_size); | |||||
| memcpy(inputs[1]->data_c(), input_data2, input2_size); | |||||
| memcpy(inputs[2]->data_c(), input_data3, input3_size); | |||||
| } else { | } else { | ||||
| MS_LOG(ERROR) << " input size must be 2 or 3 "; | MS_LOG(ERROR) << " input size must be 2 or 3 "; | ||||
| } | } | ||||
| std::cout << "==================output data================" << std::endl; | std::cout << "==================output data================" << std::endl; | ||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->MutableData()); | |||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | |||||
| CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001); | CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001); | ||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| @@ -124,12 +124,12 @@ void RunTestCaseConv2dTranspose(const std::vector<int> &shape, void *input_data, | |||||
| } | } | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| memcpy(inputs[0]->MutableData(), input_data, n * h * w * ci * dtype_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, n * h * w * ci * dtype_size); | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, n * oh * ow * co, static_cast<float16_t>(1e-3), 2e-2); | |||||
| CompareOutput(outputs[0]->data_c(), output_data, n * oh * ow * co, static_cast<float16_t>(1e-3), 2e-2); | |||||
| } else { | } else { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, n * oh * ow * co, static_cast<float>(1e-5)); | |||||
| CompareOutput(outputs[0]->data_c(), output_data, n * oh * ow * co, static_cast<float>(1e-5)); | |||||
| } | } | ||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| @@ -112,11 +112,11 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat | |||||
| // freamework to do!!! | // freamework to do!!! | ||||
| inputs[0]->MallocData(allocator); | inputs[0]->MallocData(allocator); | ||||
| memcpy(inputs[0]->MutableData(), packed_input, sizeof(T2) * pack_input_size); | |||||
| memcpy(inputs[0]->data_c(), packed_input, sizeof(T2) * pack_input_size); | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| if (is_compare) { | if (is_compare) { | ||||
| T2 *packed_output = reinterpret_cast<T2 *>(outputs[0]->MutableData()); | |||||
| T2 *packed_output = reinterpret_cast<T2 *>(outputs[0]->data_c()); | |||||
| auto packed_correct_data = std::make_unique<T2>(packed_output_size); | auto packed_correct_data = std::make_unique<T2>(packed_output_size); | ||||
| if (packed_correct_data.get() == nullptr) { | if (packed_correct_data.get() == nullptr) { | ||||
| delete[] packed_input; | delete[] packed_input; | ||||
| @@ -71,7 +71,7 @@ void test_main_gather(void *input_data, void *correct_data, const std::vector<in | |||||
| sub_graph->Init(); | sub_graph->Init(); | ||||
| MS_LOG(INFO) << " init tensors "; | MS_LOG(INFO) << " init tensors "; | ||||
| memcpy(inputs[0]->MutableData(), input_data, input_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, input_size); | |||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| @@ -111,13 +111,13 @@ void RunTestCaseMatMul(const std::vector<int> &shape, void *input_data, void *we | |||||
| return; | return; | ||||
| } | } | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| memcpy(inputs[0]->MutableData(), input_data, tensor_x->ElementsNum() * dtype_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, tensor_x->ElementsNum() * dtype_size); | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| CompareOutput(outputs[0]->data_c(), output_data, tensor_out->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| 2e-2); | 2e-2); | ||||
| } else { | } else { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast<float>(1e-5)); | |||||
| CompareOutput(outputs[0]->data_c(), output_data, tensor_out->ElementsNum(), static_cast<float>(1e-5)); | |||||
| } | } | ||||
| tensor_x->SetData(nullptr); | tensor_x->SetData(nullptr); | ||||
| @@ -111,14 +111,14 @@ void RunTestCaseMaxPooling(const std::vector<int> &shape, void *input_data, void | |||||
| return; | return; | ||||
| } | } | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, inputs[0]->ElementsNum() * dtype_size); | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| 2e-2); | 2e-2); | ||||
| } else { | } else { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast<float>(1e-5)); | |||||
| CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast<float>(1e-5)); | |||||
| } | } | ||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| outputs[0]->SetData(nullptr); | outputs[0]->SetData(nullptr); | ||||
| @@ -44,7 +44,7 @@ void LoadDataPRelu(void *dst, size_t dst_size, const std::string &file_path) { | |||||
| template <typename T> | template <typename T> | ||||
| void CompareOutPRelu(lite::Tensor *output_tensor, const std::string &standard_answer_file) { | void CompareOutPRelu(lite::Tensor *output_tensor, const std::string &standard_answer_file) { | ||||
| auto *output_data = reinterpret_cast<T *>(output_tensor->MutableData()); | |||||
| auto *output_data = reinterpret_cast<T *>(output_tensor->data_c()); | |||||
| size_t output_size = output_tensor->Size(); | size_t output_size = output_tensor->Size(); | ||||
| auto expect_data = reinterpret_cast<T *>(mindspore::lite::ReadFile(standard_answer_file.c_str(), &output_size)); | auto expect_data = reinterpret_cast<T *>(mindspore::lite::ReadFile(standard_answer_file.c_str(), &output_size)); | ||||
| constexpr float atol = 0.0002; | constexpr float atol = 0.0002; | ||||
| @@ -64,7 +64,7 @@ void CompareOutPRelu(lite::Tensor *output_tensor, const std::string &standard_an | |||||
| template <typename T> | template <typename T> | ||||
| void printf_tensor_Prelu(const std::string &log, mindspore::lite::Tensor *in_data, int size) { | void printf_tensor_Prelu(const std::string &log, mindspore::lite::Tensor *in_data, int size) { | ||||
| MS_LOG(INFO) << log; | MS_LOG(INFO) << log; | ||||
| auto input_data = reinterpret_cast<T *>(in_data->MutableData()); | |||||
| auto input_data = reinterpret_cast<T *>(in_data->data_c()); | |||||
| for (int i = 0; i < size; ++i) { | for (int i = 0; i < size; ++i) { | ||||
| printf("%f ", input_data[i]); | printf("%f ", input_data[i]); | ||||
| } | } | ||||
| @@ -113,8 +113,8 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { | |||||
| inputs[1]->MallocData(allocator); | inputs[1]->MallocData(allocator); | ||||
| MS_LOG(INFO) << "initialize input data"; | MS_LOG(INFO) << "initialize input data"; | ||||
| LoadDataPRelu(input_tensor->MutableData(), input_tensor->Size(), in_file); | |||||
| LoadDataPRelu(weight_tensor->MutableData(), weight_tensor->Size(), weight_file); | |||||
| LoadDataPRelu(input_tensor->data_c(), input_tensor->Size(), in_file); | |||||
| LoadDataPRelu(weight_tensor->data_c(), weight_tensor->Size(), weight_file); | |||||
| if (ocl_runtime->GetFp16Enable()) { | if (ocl_runtime->GetFp16Enable()) { | ||||
| printf_tensor_Prelu<float16_t>("PRELU:FP16--input data", input_tensor, inputs[0]->ElementsNum()); | printf_tensor_Prelu<float16_t>("PRELU:FP16--input data", input_tensor, inputs[0]->ElementsNum()); | ||||
| printf_tensor_Prelu<float16_t>("PRELU:FP16--weight data", weight_tensor, weight_tensor->ElementsNum()); | printf_tensor_Prelu<float16_t>("PRELU:FP16--weight data", weight_tensor, weight_tensor->ElementsNum()); | ||||
| @@ -90,14 +90,14 @@ void RunTestCaseReduce(const std::vector<int> &shape, void *input_data, void *ou | |||||
| return; | return; | ||||
| } | } | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, inputs[0]->ElementsNum() * dtype_size); | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| 2e-2); | 2e-2); | ||||
| } else { | } else { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast<float>(1e-5)); | |||||
| CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast<float>(1e-5)); | |||||
| } | } | ||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| outputs[0]->SetData(nullptr); | outputs[0]->SetData(nullptr); | ||||
| @@ -85,14 +85,14 @@ void RunTestCaseReshape(const std::vector<int> &shape, void *input_data, void *o | |||||
| return; | return; | ||||
| } | } | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, inputs[0]->ElementsNum() * dtype_size); | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| 2e-2); | 2e-2); | ||||
| } else { | } else { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast<float>(1e-5)); | |||||
| CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast<float>(1e-5)); | |||||
| } | } | ||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| outputs[0]->SetData(nullptr); | outputs[0]->SetData(nullptr); | ||||
| @@ -132,8 +132,8 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh | |||||
| } else { | } else { | ||||
| tensor_scale->MallocData(); | tensor_scale->MallocData(); | ||||
| tensor_offset->MallocData(); | tensor_offset->MallocData(); | ||||
| memcpy(tensor_scale->MutableData(), data_scale, sizeof(T)); | |||||
| memcpy(tensor_offset->MutableData(), data_offset, sizeof(T)); | |||||
| memcpy(tensor_scale->data_c(), data_scale, sizeof(T)); | |||||
| memcpy(tensor_offset->data_c(), data_offset, sizeof(T)); | |||||
| } | } | ||||
| std::vector<lite::Tensor *> outputs = {tensor_out}; | std::vector<lite::Tensor *> outputs = {tensor_out}; | ||||
| @@ -195,21 +195,21 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh | |||||
| } | } | ||||
| kernel->Init(); | kernel->Init(); | ||||
| memcpy(inputs[0]->MutableData(), data_in, sizeof(T) * element_num); | |||||
| memcpy(inputs[0]->data_c(), data_in, sizeof(T) * element_num); | |||||
| if (!is_broadcast) { | if (!is_broadcast) { | ||||
| memcpy(inputs[1]->MutableData(), data_scale, sizeof(T) * element_num_b); | |||||
| memcpy(inputs[2]->MutableData(), data_offset, sizeof(T) * element_num_b); | |||||
| memcpy(inputs[1]->data_c(), data_scale, sizeof(T) * element_num_b); | |||||
| memcpy(inputs[2]->data_c(), data_offset, sizeof(T) * element_num_b); | |||||
| } | } | ||||
| kernel->Run(); | kernel->Run(); | ||||
| memcpy(data_out_ocl, outputs[0]->MutableData(), sizeof(T) * element_num); | |||||
| memcpy(data_out_ocl, outputs[0]->data_c(), sizeof(T) * element_num); | |||||
| LogData<T>(data_in, 10, "Data input : "); | LogData<T>(data_in, 10, "Data input : "); | ||||
| LogData<T>(data_scale, tensor_scale->shape().empty() ? 1 : 10, "Data scale : "); | LogData<T>(data_scale, tensor_scale->shape().empty() ? 1 : 10, "Data scale : "); | ||||
| LogData<T>(data_offset, tensor_offset->shape().empty() ? 1 : 10, "Data offset : "); | LogData<T>(data_offset, tensor_offset->shape().empty() ? 1 : 10, "Data offset : "); | ||||
| LogData<T>(data_out_cpu, 10, "Expect compute : "); | LogData<T>(data_out_cpu, 10, "Expect compute : "); | ||||
| LogData<T>(outputs[0]->MutableData(), 10, "OpenCL compute : "); | |||||
| LogData<T>(outputs[0]->data_c(), 10, "OpenCL compute : "); | |||||
| bool cmp = DataCompare(data_out_cpu, data_out_ocl, element_num); | bool cmp = DataCompare(data_out_cpu, data_out_ocl, element_num); | ||||
| MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!"); | MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!"); | ||||
| EXPECT_EQ(true, cmp); | EXPECT_EQ(true, cmp); | ||||
| @@ -130,12 +130,12 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { | |||||
| sub_graph->Init(); | sub_graph->Init(); | ||||
| MS_LOG(INFO) << " init tensors "; | MS_LOG(INFO) << " init tensors "; | ||||
| memcpy(inputs[0]->MutableData(), input_data, input_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, input_size); | |||||
| std::cout << "==================output data================" << std::endl; | std::cout << "==================output data================" << std::endl; | ||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->MutableData()); | |||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | |||||
| CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | ||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| @@ -238,12 +238,12 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { | |||||
| sub_graph->Init(); | sub_graph->Init(); | ||||
| MS_LOG(INFO) << " init tensors "; | MS_LOG(INFO) << " init tensors "; | ||||
| memcpy(inputs[0]->MutableData(), input_data, input_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, input_size); | |||||
| std::cout << "==================output data================" << std::endl; | std::cout << "==================output data================" << std::endl; | ||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->MutableData()); | |||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | |||||
| CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | ||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| delete tensor; | delete tensor; | ||||
| @@ -89,14 +89,14 @@ void RunTestCaseSoftmax(const std::vector<int> &shape, void *input_data, void *o | |||||
| return; | return; | ||||
| } | } | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, inputs[0]->ElementsNum() * dtype_size); | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast<float16_t>(1e-3), | |||||
| 2e-2); | 2e-2); | ||||
| } else { | } else { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast<float>(1e-5)); | |||||
| CompareOutput(outputs[0]->data_c(), output_data, outputs[0]->ElementsNum(), static_cast<float>(1e-5)); | |||||
| } | } | ||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| outputs[0]->SetData(nullptr); | outputs[0]->SetData(nullptr); | ||||
| @@ -76,7 +76,7 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) { | |||||
| return; | return; | ||||
| } | } | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| memcpy(inputs[0]->MutableData(), input_data, input_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, input_size); | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| size_t output_size; | size_t output_size; | ||||
| @@ -87,7 +87,7 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) { | |||||
| return; | return; | ||||
| } | } | ||||
| printf("==================output data=================\n"); | printf("==================output data=================\n"); | ||||
| float *output_data = reinterpret_cast<float *>(tensor_out->MutableData()); | |||||
| float *output_data = reinterpret_cast<float *>(tensor_out->data_c()); | |||||
| std::cout << std::endl; | std::cout << std::endl; | ||||
| int size_n = h * w * c; | int size_n = h * w * c; | ||||
| size_n = size_n > 100 ? 100 : size_n; | size_n = size_n > 100 ? 100 : size_n; | ||||
| @@ -89,13 +89,13 @@ void RunTestTranspose(const std::vector<int> &shape, void *input_data, void *out | |||||
| return; | return; | ||||
| } | } | ||||
| pGraph->Init(); | pGraph->Init(); | ||||
| memcpy(inputs[0]->MutableData(), input_data, h * w * c * dtype_size); | |||||
| memcpy(inputs[0]->data_c(), input_data, h * w * c * dtype_size); | |||||
| pGraph->Run(); | pGraph->Run(); | ||||
| if (enable_fp16) { | if (enable_fp16) { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, h * w * c, static_cast<float16_t>(1e-3), 2e-2); | |||||
| CompareOutput(outputs[0]->data_c(), output_data, h * w * c, static_cast<float16_t>(1e-3), 2e-2); | |||||
| } else { | } else { | ||||
| CompareOutput(outputs[0]->MutableData(), output_data, h * w * c, static_cast<float>(1e-5)); | |||||
| CompareOutput(outputs[0]->data_c(), output_data, h * w * c, static_cast<float>(1e-5)); | |||||
| } | } | ||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| @@ -57,7 +57,7 @@ template <typename T> | |||||
| void CompareOutput(lite::Tensor *output_tensor, const std::string &file_path, T atol, float rtol = 1e-5) { | void CompareOutput(lite::Tensor *output_tensor, const std::string &file_path, T atol, float rtol = 1e-5) { | ||||
| size_t output_size; | size_t output_size; | ||||
| auto expect_data = mindspore::lite::ReadFile(file_path.c_str(), &output_size); | auto expect_data = mindspore::lite::ReadFile(file_path.c_str(), &output_size); | ||||
| CompareOutput(output_tensor->MutableData(), expect_data, output_tensor->ElementsNum(), atol, rtol); | |||||
| CompareOutput(output_tensor->data_c(), expect_data, output_tensor->ElementsNum(), atol, rtol); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||