| @@ -26,9 +26,6 @@ | |||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #include "src/common/graph_util.h" | #include "src/common/graph_util.h" | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #if SUPPORT_GPU | |||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #endif | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| @@ -343,7 +340,7 @@ int LiteSession::Init(Context *context) { | |||||
| } | } | ||||
| #if SUPPORT_GPU | #if SUPPORT_GPU | ||||
| if (context_->device_type_ == DT_GPU) { | if (context_->device_type_ == DT_GPU) { | ||||
| auto opencl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto opencl_runtime = ocl_runtime_wrap_.GetInstance(); | |||||
| opencl_runtime->SetFp16Enable(context_->float16_priority); | opencl_runtime->SetFp16Enable(context_->float16_priority); | ||||
| if (opencl_runtime->Init() != RET_OK) { | if (opencl_runtime->Init() != RET_OK) { | ||||
| context_->device_type_ = DT_CPU; | context_->device_type_ = DT_CPU; | ||||
| @@ -394,11 +391,6 @@ LiteSession::~LiteSession() { | |||||
| for (auto *kernel : kernels_) { | for (auto *kernel : kernels_) { | ||||
| delete kernel; | delete kernel; | ||||
| } | } | ||||
| #if SUPPORT_GPU | |||||
| if (context_->device_type_ == DT_GPU) { | |||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | |||||
| #endif | |||||
| delete this->context_; | delete this->context_; | ||||
| delete this->executor; | delete this->executor; | ||||
| this->executor = nullptr; | this->executor = nullptr; | ||||
| @@ -30,6 +30,9 @@ | |||||
| #include "schema/model_generated.h" | #include "schema/model_generated.h" | ||||
| #include "src/executor.h" | #include "src/executor.h" | ||||
| #include "src/tensor.h" | #include "src/tensor.h" | ||||
| #if SUPPORT_GPU | |||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #endif | |||||
| namespace mindspore { | namespace mindspore { | ||||
| namespace lite { | namespace lite { | ||||
| @@ -108,6 +111,9 @@ class LiteSession : public session::LiteSession { | |||||
| std::unordered_map<std::string, mindspore::tensor::MSTensor *> output_tensor_map_; | std::unordered_map<std::string, mindspore::tensor::MSTensor *> output_tensor_map_; | ||||
| Executor *executor = nullptr; | Executor *executor = nullptr; | ||||
| std::atomic<bool> is_running_ = false; | std::atomic<bool> is_running_ = false; | ||||
| #if SUPPORT_GPU | |||||
| opencl::OpenCLRuntimeWrapper ocl_runtime_wrap_; | |||||
| #endif | |||||
| }; | }; | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -54,7 +54,7 @@ int GatherOpenCLKernel::Init() { | |||||
| auto indices_tensor = in_tensors_.at(1); | auto indices_tensor = in_tensors_.at(1); | ||||
| int indices_num = indices_tensor->ElementsNum(); | int indices_num = indices_tensor->ElementsNum(); | ||||
| bool isIndicesInt32 = indices_tensor->data_type() == kNumberTypeInt32; | bool isIndicesInt32 = indices_tensor->data_type() == kNumberTypeInt32; | ||||
| auto allocator = lite::opencl::OpenCLRuntime::GetInstance()->GetAllocator(); | |||||
| auto allocator = ocl_runtime_->GetAllocator(); | |||||
| if (!isIndicesInt32) { | if (!isIndicesInt32) { | ||||
| indices_data_ = reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num)); | indices_data_ = reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num)); | ||||
| if (indices_data_ == nullptr) { | if (indices_data_ == nullptr) { | ||||
| @@ -38,15 +38,10 @@ class OpenCLKernel : public LiteKernel { | |||||
| explicit OpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | explicit OpenCLKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs) | const std::vector<lite::Tensor *> &outputs) | ||||
| : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) { | : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) { | ||||
| ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); | |||||
| } | } | ||||
| ~OpenCLKernel() { | |||||
| if (ocl_runtime_ != nullptr) { | |||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| ocl_runtime_ = nullptr; | |||||
| } | |||||
| } | |||||
| ~OpenCLKernel() {} | |||||
| virtual int Init() { return RET_ERROR; } | virtual int Init() { return RET_ERROR; } | ||||
| virtual int Prepare() { return RET_ERROR; } | virtual int Prepare() { return RET_ERROR; } | ||||
| @@ -69,7 +64,8 @@ class OpenCLKernel : public LiteKernel { | |||||
| schema::Format in_ori_format_{schema::Format::Format_NHWC}; | schema::Format in_ori_format_{schema::Format::Format_NHWC}; | ||||
| schema::Format out_ori_format_{schema::Format::Format_NHWC4}; | schema::Format out_ori_format_{schema::Format::Format_NHWC4}; | ||||
| schema::Format op_format_{schema::Format::Format_NHWC4}; | schema::Format op_format_{schema::Format::Format_NHWC4}; | ||||
| lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr}; | |||||
| lite::opencl::OpenCLRuntimeWrapper ocl_runtime_wrap_; | |||||
| lite::opencl::OpenCLRuntime *ocl_runtime_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -17,7 +17,6 @@ | |||||
| #include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | #include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" | ||||
| #include <set> | #include <set> | ||||
| #include "src/runtime/opencl/opencl_executor.h" | #include "src/runtime/opencl/opencl_executor.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| #include "src/runtime/kernel/opencl/utils.h" | #include "src/runtime/kernel/opencl/utils.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| @@ -161,7 +160,6 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector<lite::Tensor *> &in_te | |||||
| } | } | ||||
| int SubGraphOpenCLKernel::Init() { | int SubGraphOpenCLKernel::Init() { | ||||
| ocl_runtime_ = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| allocator_ = ocl_runtime_->GetAllocator(); | allocator_ = ocl_runtime_->GetAllocator(); | ||||
| MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size(); | MS_LOG(DEBUG) << "input num=" << in_tensors_.size() << ", output num=" << out_tensors_.size(); | ||||
| for (const auto tensor : in_tensors_) { | for (const auto tensor : in_tensors_) { | ||||
| @@ -308,10 +306,6 @@ int SubGraphOpenCLKernel::UnInit() { | |||||
| nodes_.clear(); | nodes_.clear(); | ||||
| in_convert_ops_.clear(); | in_convert_ops_.clear(); | ||||
| out_convert_ops_.clear(); | out_convert_ops_.clear(); | ||||
| if (ocl_runtime_ != nullptr) { | |||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| ocl_runtime_ = nullptr; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -36,7 +36,9 @@ class SubGraphOpenCLKernel : public SubGraphKernel { | |||||
| const std::vector<kernel::LiteKernel *> outKernels, | const std::vector<kernel::LiteKernel *> outKernels, | ||||
| const std::vector<kernel::LiteKernel *> nodes, const lite::InnerContext *ctx = nullptr, | const std::vector<kernel::LiteKernel *> nodes, const lite::InnerContext *ctx = nullptr, | ||||
| const mindspore::lite::PrimitiveC *primitive = nullptr) | const mindspore::lite::PrimitiveC *primitive = nullptr) | ||||
| : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx, primitive) {} | |||||
| : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx, primitive) { | |||||
| ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); | |||||
| } | |||||
| ~SubGraphOpenCLKernel() override; | ~SubGraphOpenCLKernel() override; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -64,6 +66,7 @@ class SubGraphOpenCLKernel : public SubGraphKernel { | |||||
| std::vector<OpenCLToFormatParameter *> out_parameters_; | std::vector<OpenCLToFormatParameter *> out_parameters_; | ||||
| std::vector<LiteKernel *> in_convert_ops_; | std::vector<LiteKernel *> in_convert_ops_; | ||||
| std::vector<LiteKernel *> out_convert_ops_; | std::vector<LiteKernel *> out_convert_ops_; | ||||
| lite::opencl::OpenCLRuntimeWrapper ocl_runtime_wrap_; | |||||
| lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr}; | lite::opencl::OpenCLRuntime *ocl_runtime_{nullptr}; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -15,9 +15,11 @@ | |||||
| */ | */ | ||||
| #include "src/runtime/kernel/opencl/utils.h" | #include "src/runtime/kernel/opencl/utils.h" | ||||
| #include <fstream> | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include <vector> | #include <vector> | ||||
| #include "src/kernel_registry.h" | #include "src/kernel_registry.h" | ||||
| #include "src/runtime/opencl/opencl_runtime.h" | |||||
| using mindspore::lite::KernelRegistrar; | using mindspore::lite::KernelRegistrar; | ||||
| @@ -221,4 +223,64 @@ std::string CLErrorCode(cl_int error_code) { | |||||
| return "Unknown OpenCL error code"; | return "Unknown OpenCL error code"; | ||||
| } | } | ||||
| } | } | ||||
| void Write2File(void *mem, const std::string &file_name, int size) { | |||||
| std::fstream os; | |||||
| os.open(file_name, std::ios::out | std::ios::binary); | |||||
| os.write(static_cast<char *>(mem), size); | |||||
| os.close(); | |||||
| } | |||||
| void PrintTensor(lite::Tensor *tensor, int num, const std::string &out_file) { | |||||
| if (tensor->data_c() == nullptr) { | |||||
| return; | |||||
| } | |||||
| auto runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| runtime->SyncCommandQueue(); | |||||
| auto allocator = runtime->GetAllocator(); | |||||
| auto origin_data = tensor->data_c(); | |||||
| allocator->MapBuffer(origin_data, CL_MAP_READ, nullptr, true); | |||||
| tensor->SetData(origin_data); | |||||
| auto Height = tensor->shape().size() == 4 ? tensor->Height() : 1; | |||||
| auto Width = tensor->shape().size() == 4 ? tensor->Width() : 1; | |||||
| auto SLICES = UP_DIV(tensor->Channel(), C4NUM); | |||||
| auto alignment = runtime->GetImagePitchAlignment(); | |||||
| auto dtype_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half4) : sizeof(cl_float4); | |||||
| auto row_pitch = (Width * SLICES + alignment - 1) / alignment * alignment * dtype_size; | |||||
| auto row_size = Width * SLICES * dtype_size; | |||||
| std::cout << "tensor->GetFormat() =" << tensor->GetFormat() << "\n"; | |||||
| std::cout << "Height =" << Height << "\n"; | |||||
| std::cout << "Width =" << Width << "\n"; | |||||
| std::cout << "SLICES =" << SLICES << "\n"; | |||||
| std::cout << "image_alignment =" << alignment << "\n"; | |||||
| std::cout << "dtype_size =" << dtype_size << "\n"; | |||||
| std::cout << "row_pitch =" << row_pitch << "\n"; | |||||
| std::cout << "row_size =" << row_size << "\n"; | |||||
| std::cout << "tensor->Size() =" << tensor->Size() << "\n"; | |||||
| std::vector<char> data(tensor->Size()); | |||||
| for (int i = 0; i < Height; ++i) { | |||||
| memcpy(static_cast<char *>(data.data()) + i * row_size, static_cast<char *>(origin_data) + i * row_pitch, row_size); | |||||
| } | |||||
| std::cout << "shape=("; | |||||
| for (auto x : tensor->shape()) { | |||||
| printf("%3d,", x); | |||||
| } | |||||
| printf("): "); | |||||
| for (size_t i = 0; i < num && i < tensor->ElementsNum(); ++i) { | |||||
| if (tensor->data_type() == kNumberTypeFloat16) | |||||
| printf("%zu %6.3f | ", i, (reinterpret_cast<float16_t *>(data.data()))[i]); | |||||
| else | |||||
| printf("%zu %6.3f | ", i, (reinterpret_cast<float *>(data.data()))[i]); | |||||
| } | |||||
| printf("\n"); | |||||
| if (!out_file.empty()) { | |||||
| Write2File(data.data(), out_file, tensor->Size()); | |||||
| } | |||||
| allocator->UnmapBuffer(origin_data); | |||||
| } | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -44,6 +44,10 @@ std::vector<size_t> GetCommonLocalSize(const std::vector<size_t> &global, int ma | |||||
| std::string CLErrorCode(cl_int error_code); | std::string CLErrorCode(cl_int error_code); | ||||
| void Write2File(void *mem, const std::string &file_name, int size); | |||||
| void PrintTensor(lite::Tensor *tensor, int num = 10, const std::string &out_file = ""); | |||||
| template <class T1, class T2> | template <class T1, class T2> | ||||
| void PackNCHWToNC4HW4(void *src, void *dst, int batch, int plane, int channel, const std::function<T2(T1)> &to_dtype) { | void PackNCHWToNC4HW4(void *src, void *dst, int batch, int plane, int channel, const std::function<T2(T1)> &to_dtype) { | ||||
| int c4 = UP_DIV(channel, C4NUM); | int c4 = UP_DIV(channel, C4NUM); | ||||
| @@ -27,11 +27,7 @@ | |||||
| namespace mindspore::lite::opencl { | namespace mindspore::lite::opencl { | ||||
| class OpenCLExecutor : Executor { | class OpenCLExecutor : Executor { | ||||
| public: | public: | ||||
| OpenCLExecutor() : Executor() { | |||||
| auto ocl_runtime = OpenCLRuntime::GetInstance(); | |||||
| allocator_ = ocl_runtime->GetAllocator(); | |||||
| OpenCLRuntime::DeleteInstance(); | |||||
| } | |||||
| OpenCLExecutor() : Executor() { allocator_ = ocl_runtime.GetInstance()->GetAllocator(); } | |||||
| int Prepare(const std::vector<kernel::LiteKernel *> &kernels); | int Prepare(const std::vector<kernel::LiteKernel *> &kernels); | ||||
| @@ -42,6 +38,7 @@ class OpenCLExecutor : Executor { | |||||
| protected: | protected: | ||||
| InnerContext *context = nullptr; | InnerContext *context = nullptr; | ||||
| OpenCLAllocator *allocator_; | OpenCLAllocator *allocator_; | ||||
| OpenCLRuntimeWrapper ocl_runtime; | |||||
| }; | }; | ||||
| } // namespace mindspore::lite::opencl | } // namespace mindspore::lite::opencl | ||||
| #endif | #endif | ||||
| @@ -393,11 +393,16 @@ int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> | |||||
| cl::Event event; | cl::Event event; | ||||
| cl_int ret = CL_SUCCESS; | cl_int ret = CL_SUCCESS; | ||||
| ret = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event); | ret = command_queue->enqueueNDRangeKernel(kernel, cl::NullRange, global_range, local_range, nullptr, &event); | ||||
| if (ret != CL_SUCCESS) { | if (ret != CL_SUCCESS) { | ||||
| MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(ret); | MS_LOG(ERROR) << "Kernel execute failed:" << CLErrorCode(ret); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| static int cnt = 0; | |||||
| const int flush_period = 10; | |||||
| if (cnt % flush_period == 0) { | |||||
| command_queue->flush(); | |||||
| } | |||||
| cnt++; | |||||
| MS_LOG(DEBUG) << "RunKernel success!"; | MS_LOG(DEBUG) << "RunKernel success!"; | ||||
| #if MS_OPENCL_PROFILE | #if MS_OPENCL_PROFILE | ||||
| event.wait(); | event.wait(); | ||||
| @@ -37,11 +37,10 @@ struct GpuInfo { | |||||
| int model_num = 0; | int model_num = 0; | ||||
| float opencl_version = 0; | float opencl_version = 0; | ||||
| }; | }; | ||||
| class OpenCLRuntimeWrapper; | |||||
| class OpenCLRuntime { | class OpenCLRuntime { | ||||
| public: | public: | ||||
| static OpenCLRuntime *GetInstance(); | |||||
| static void DeleteInstance(); | |||||
| friend OpenCLRuntimeWrapper; | |||||
| ~OpenCLRuntime(); | ~OpenCLRuntime(); | ||||
| OpenCLRuntime(const OpenCLRuntime &) = delete; | OpenCLRuntime(const OpenCLRuntime &) = delete; | ||||
| @@ -138,6 +137,8 @@ class OpenCLRuntime { | |||||
| int GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id); | int GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id); | ||||
| private: | private: | ||||
| static OpenCLRuntime *GetInstance(); | |||||
| static void DeleteInstance(); | |||||
| OpenCLRuntime(); | OpenCLRuntime(); | ||||
| GpuInfo ParseGpuInfo(std::string device_name, std::string device_version); | GpuInfo ParseGpuInfo(std::string device_name, std::string device_version); | ||||
| @@ -169,5 +170,16 @@ class OpenCLRuntime { | |||||
| void *handle_{nullptr}; | void *handle_{nullptr}; | ||||
| }; | }; | ||||
| class OpenCLRuntimeWrapper { | |||||
| public: | |||||
| OpenCLRuntimeWrapper() { ocl_runtime_ = OpenCLRuntime::GetInstance(); } | |||||
| ~OpenCLRuntimeWrapper() { OpenCLRuntime::DeleteInstance(); } | |||||
| explicit OpenCLRuntimeWrapper(const OpenCLRuntime &) = delete; | |||||
| OpenCLRuntimeWrapper &operator=(const OpenCLRuntime &) = delete; | |||||
| OpenCLRuntime *GetInstance() { return ocl_runtime_; } | |||||
| private: | |||||
| OpenCLRuntime *ocl_runtime_{nullptr}; | |||||
| }; | |||||
| } // namespace mindspore::lite::opencl | } // namespace mindspore::lite::opencl | ||||
| #endif // MINDSPORE_LITE_SRC_OPENCL_RUNTIME_H_ | #endif // MINDSPORE_LITE_SRC_OPENCL_RUNTIME_H_ | ||||
| @@ -82,7 +82,7 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { | |||||
| std::string in_file = "/data/local/tmp/in_data.bin"; | std::string in_file = "/data/local/tmp/in_data.bin"; | ||||
| std::string out_file = "/data/local/tmp/relu.bin"; | std::string out_file = "/data/local/tmp/relu.bin"; | ||||
| MS_LOG(INFO) << "Relu Begin test!"; | MS_LOG(INFO) << "Relu Begin test!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| auto data_type = kNumberTypeFloat16; | auto data_type = kNumberTypeFloat16; | ||||
| @@ -184,14 +184,13 @@ TEST_F(TestActivationOpenCL, ReluFp_dim4) { | |||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { | TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { | ||||
| std::string in_file = "/data/local/tmp/in_data.bin"; | std::string in_file = "/data/local/tmp/in_data.bin"; | ||||
| std::string out_file = "/data/local/tmp/relu6.bin"; | std::string out_file = "/data/local/tmp/relu6.bin"; | ||||
| MS_LOG(INFO) << "Relu6 Begin test!"; | MS_LOG(INFO) << "Relu6 Begin test!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| auto data_type = kNumberTypeFloat16; | auto data_type = kNumberTypeFloat16; | ||||
| ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ||||
| bool enable_fp16 = ocl_runtime->GetFp16Enable(); | bool enable_fp16 = ocl_runtime->GetFp16Enable(); | ||||
| @@ -296,14 +295,13 @@ TEST_F(TestActivationOpenCL, Relu6Fp_dim4) { | |||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { | TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { | ||||
| std::string in_file = "/data/local/tmp/in_data.bin"; | std::string in_file = "/data/local/tmp/in_data.bin"; | ||||
| std::string out_file = "/data/local/tmp/sigmoid.bin"; | std::string out_file = "/data/local/tmp/sigmoid.bin"; | ||||
| MS_LOG(INFO) << "Sigmoid Begin test!"; | MS_LOG(INFO) << "Sigmoid Begin test!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto data_type = kNumberTypeFloat32; | auto data_type = kNumberTypeFloat32; | ||||
| ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ||||
| @@ -408,14 +406,13 @@ TEST_F(TestActivationOpenCL, SigmoidFp_dim4) { | |||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { | TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { | ||||
| std::string in_file = "/data/local/tmp/in_data.bin"; | std::string in_file = "/data/local/tmp/in_data.bin"; | ||||
| std::string out_file = "/data/local/tmp/leaky_relu.bin"; | std::string out_file = "/data/local/tmp/leaky_relu.bin"; | ||||
| MS_LOG(INFO) << "Leaky relu Begin test!"; | MS_LOG(INFO) << "Leaky relu Begin test!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto data_type = kNumberTypeFloat16; // need modify | auto data_type = kNumberTypeFloat16; // need modify | ||||
| ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ||||
| @@ -519,14 +516,13 @@ TEST_F(TestActivationOpenCL, LeakyReluFp_dim4) { | |||||
| delete param; | delete param; | ||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { | TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { | ||||
| std::string in_file = "/data/local/tmp/test_data/in_tanhfp16.bin"; | std::string in_file = "/data/local/tmp/test_data/in_tanhfp16.bin"; | ||||
| std::string out_file = "/data/local/tmp/test_data/out_tanhfp16.bin"; | std::string out_file = "/data/local/tmp/test_data/out_tanhfp16.bin"; | ||||
| MS_LOG(INFO) << "Tanh Begin test!"; | MS_LOG(INFO) << "Tanh Begin test!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto data_type = kNumberTypeFloat16; | auto data_type = kNumberTypeFloat16; | ||||
| ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ||||
| @@ -627,7 +623,6 @@ TEST_F(TestActivationOpenCLTanh, TanhFp_dim4) { | |||||
| printf_tensor<float>("Tanh:FP32--output data---", outputs[0]); | printf_tensor<float>("Tanh:FP32--output data---", outputs[0]); | ||||
| CompareRes<float>(output_tensor, out_file); | CompareRes<float>(output_tensor, out_file); | ||||
| } | } | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| input_tensor->SetData(nullptr); | input_tensor->SetData(nullptr); | ||||
| delete input_tensor; | delete input_tensor; | ||||
| output_tensor->SetData(nullptr); | output_tensor->SetData(nullptr); | ||||
| @@ -43,7 +43,7 @@ void CompareOutputData1(T *input_data1, T *output_data, T *correct_data, int siz | |||||
| TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { | TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->SetFp16Enable(true); | ocl_runtime->SetFp16Enable(true); | ||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -125,7 +125,6 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { | |||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | ||||
| CompareOutputData1(input_data1, output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | CompareOutputData1(input_data1, output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -139,7 +138,7 @@ TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { | |||||
| TEST_F(TestArithmeticSelfOpenCLCI, ArithmeticSelfRound) { | TEST_F(TestArithmeticSelfOpenCLCI, ArithmeticSelfRound) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| float input_data1[] = {0.75f, 0.06f, 0.74f, 0.30f, 0.9f, 0.59f, 0.03f, 0.37f, | float input_data1[] = {0.75f, 0.06f, 0.74f, 0.30f, 0.9f, 0.59f, 0.03f, 0.37f, | ||||
| @@ -216,7 +215,6 @@ TEST_F(TestArithmeticSelfOpenCLCI, ArithmeticSelfRound) { | |||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | ||||
| CompareOutputData1(input_data1, output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | CompareOutputData1(input_data1, output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -68,7 +68,7 @@ static void LogData(void *data, const int size, const std::string prefix) { | |||||
| template <class T> | template <class T> | ||||
| static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) { | static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) { | ||||
| bool is_log_data = false; | bool is_log_data = false; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| bool is_bias_add = shape_b.empty(); | bool is_bias_add = shape_b.empty(); | ||||
| @@ -212,7 +212,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh | |||||
| for (auto tensor : outputs) { | for (auto tensor : outputs) { | ||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| class TestArithmeticOpenCL : public mindspore::CommonTest { | class TestArithmeticOpenCL : public mindspore::CommonTest { | ||||
| @@ -53,7 +53,7 @@ void InitAvgPoolingParam(PoolingParameter *param) { | |||||
| } | } | ||||
| void RunTestCaseAvgPooling(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16) { | void RunTestCaseAvgPooling(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | ||||
| ocl_runtime->SetFp16Enable(enable_fp16); | ocl_runtime->SetFp16Enable(enable_fp16); | ||||
| @@ -125,7 +125,6 @@ void RunTestCaseAvgPooling(const std::vector<int> &shape, void *input_data, void | |||||
| } | } | ||||
| MS_LOG(INFO) << "Test AvgPool2d passed"; | MS_LOG(INFO) << "Test AvgPool2d passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestAvgPoolingOpenCL, AvgPoolingFp32) { | TEST_F(TestAvgPoolingOpenCL, AvgPoolingFp32) { | ||||
| @@ -38,7 +38,7 @@ class TestBatchnormOpenCLCI : public mindspore::CommonTest { | |||||
| TEST_F(TestBatchnormOpenCLCI, Batchnormfp32CI) { | TEST_F(TestBatchnormOpenCLCI, Batchnormfp32CI) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -142,7 +142,6 @@ TEST_F(TestBatchnormOpenCLCI, Batchnormfp32CI) { | |||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | ||||
| CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -156,7 +155,7 @@ TEST_F(TestBatchnormOpenCLCI, Batchnormfp32CI) { | |||||
| TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { | TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { | ||||
| MS_LOG(INFO) << "begin test"; | MS_LOG(INFO) << "begin test"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->SetFp16Enable(true); | ocl_runtime->SetFp16Enable(true); | ||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -262,7 +261,6 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { | |||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | ||||
| CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.01); | CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.01); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -276,7 +274,7 @@ TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { | |||||
| TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { | TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -381,7 +379,6 @@ TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { | |||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | ||||
| CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -75,7 +75,7 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { | |||||
| std::string weight_file = "/data/local/tmp/weight_data.bin"; | std::string weight_file = "/data/local/tmp/weight_data.bin"; | ||||
| std::string standard_answer_file = "/data/local/tmp/biasadd.bin"; | std::string standard_answer_file = "/data/local/tmp/biasadd.bin"; | ||||
| MS_LOG(INFO) << "BiasAdd Begin test:"; | MS_LOG(INFO) << "BiasAdd Begin test:"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto data_type = kNumberTypeFloat16; // need modify | auto data_type = kNumberTypeFloat16; // need modify | ||||
| ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ||||
| @@ -200,6 +200,5 @@ TEST_F(TestBiasAddOpenCL, BiasAddFp32_dim4) { | |||||
| delete output_tensor; | delete output_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| delete param; | delete param; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -38,7 +38,7 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou | |||||
| TEST_F(TestCastSelfOpenCL, Castfp32tofp16) { | TEST_F(TestCastSelfOpenCL, Castfp32tofp16) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -113,7 +113,6 @@ TEST_F(TestCastSelfOpenCL, Castfp32tofp16) { | |||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | ||||
| CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -127,7 +126,7 @@ TEST_F(TestCastSelfOpenCL, Castfp32tofp16) { | |||||
| TEST_F(TestCastSelfOpenCL, Castfp16tofp32) { | TEST_F(TestCastSelfOpenCL, Castfp16tofp32) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -201,7 +200,6 @@ TEST_F(TestCastSelfOpenCL, Castfp16tofp32) { | |||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | ||||
| CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -47,7 +47,7 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou | |||||
| TEST_F(TestConcatOpenCLCI, ConcatFp32_2inputforCI) { | TEST_F(TestConcatOpenCLCI, ConcatFp32_2inputforCI) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -134,7 +134,6 @@ TEST_F(TestConcatOpenCLCI, ConcatFp32_2inputforCI) { | |||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | ||||
| CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001); | CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -148,7 +147,7 @@ TEST_F(TestConcatOpenCLCI, ConcatFp32_2inputforCI) { | |||||
| TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis1) { | TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis1) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->SetFp16Enable(true); | ocl_runtime->SetFp16Enable(true); | ||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -264,7 +263,6 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis1) { | |||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | ||||
| CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -278,7 +276,7 @@ TEST_F(TestConcatOpenCLfp16, ConcatFp16_2input_dim4_axis1) { | |||||
| TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { | TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -385,7 +383,6 @@ TEST_F(TestConcatOpenCLfp32, ConcatFp32_2input_dim4_axis3) { | |||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | ||||
| CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001); | CompareOutputData1(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -32,7 +32,7 @@ class TestConv2dTransposeOpenCL : public mindspore::CommonTest { | |||||
| void RunTestCaseConv2dTranspose(const std::vector<int> &shape, void *input_data, void *weight_data, void *bias_data, | void RunTestCaseConv2dTranspose(const std::vector<int> &shape, void *input_data, void *weight_data, void *bias_data, | ||||
| void *output_data, bool enable_fp16) { | void *output_data, bool enable_fp16) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | ||||
| ocl_runtime->SetFp16Enable(enable_fp16); | ocl_runtime->SetFp16Enable(enable_fp16); | ||||
| @@ -134,7 +134,6 @@ void RunTestCaseConv2dTranspose(const std::vector<int> &shape, void *input_data, | |||||
| for (auto t : outputs) { | for (auto t : outputs) { | ||||
| t->SetData(nullptr); | t->SetData(nullptr); | ||||
| } | } | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) { | TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) { | ||||
| @@ -157,7 +157,7 @@ void TEST_MAIN(const std::string &attr, Format input_format, Format output_forma | |||||
| ¶m->dilation_h_, ¶m->dilation_w_); | ¶m->dilation_h_, ¶m->dilation_w_); | ||||
| MS_LOG(DEBUG) << "initialize OpenCLRuntime and OpenCLAllocator"; | MS_LOG(DEBUG) << "initialize OpenCLRuntime and OpenCLAllocator"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -201,7 +201,6 @@ void TEST_MAIN(const std::string &attr, Format input_format, Format output_forma | |||||
| input.SetData(nullptr); | input.SetData(nullptr); | ||||
| output.SetData(nullptr); | output.SetData(nullptr); | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| void TEST_MAIN(const std::string &attr, Format input_format, Format output_format, const TypeId data_type, | void TEST_MAIN(const std::string &attr, Format input_format, Format output_format, const TypeId data_type, | ||||
| @@ -33,7 +33,7 @@ class TestConvolutionDwOpenCL : public mindspore::CommonTest { | |||||
| template <class T1, class T2> | template <class T1, class T2> | ||||
| void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_data, T2 *gnd_data, schema::Format format, | void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_data, T2 *gnd_data, schema::Format format, | ||||
| TypeId dtype = kNumberTypeFloat32, bool is_compare = true, T2 err_max = 1e-5) { | TypeId dtype = kNumberTypeFloat32, bool is_compare = true, T2 err_max = 1e-5) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| if (dtype == kNumberTypeFloat16) { | if (dtype == kNumberTypeFloat16) { | ||||
| @@ -167,7 +167,6 @@ void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_dat | |||||
| inputs[1]->SetData(nullptr); | inputs[1]->SetData(nullptr); | ||||
| inputs[2]->SetData(nullptr); | inputs[2]->SetData(nullptr); | ||||
| delete[] packed_input; | delete[] packed_input; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| outputs[0]->SetData(nullptr); | outputs[0]->SetData(nullptr); | ||||
| return; | return; | ||||
| @@ -32,7 +32,7 @@ void test_main_gather(void *input_data, void *correct_data, const std::vector<in | |||||
| const std::vector<int> &indices, GatherParameter *param, TypeId data_type, | const std::vector<int> &indices, GatherParameter *param, TypeId data_type, | ||||
| schema::Format format) { | schema::Format format) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -31,7 +31,7 @@ class TestMatMulOpenCL : public mindspore::CommonTest { | |||||
| void RunTestCaseMatMul(const std::vector<int> &shape, void *input_data, void *weight_data, void *output_data, | void RunTestCaseMatMul(const std::vector<int> &shape, void *input_data, void *weight_data, void *output_data, | ||||
| bool enable_fp16, int dims) { | bool enable_fp16, int dims) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | ||||
| ocl_runtime->SetFp16Enable(enable_fp16); | ocl_runtime->SetFp16Enable(enable_fp16); | ||||
| @@ -123,7 +123,6 @@ void RunTestCaseMatMul(const std::vector<int> &shape, void *input_data, void *we | |||||
| t->SetData(nullptr); | t->SetData(nullptr); | ||||
| } | } | ||||
| MS_LOG(INFO) << "TestMatMul passed"; | MS_LOG(INFO) << "TestMatMul passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestMatMulOpenCL, MatMul2DFp32) { | TEST_F(TestMatMulOpenCL, MatMul2DFp32) { | ||||
| @@ -53,7 +53,7 @@ void InitMaxPoolingParam(PoolingParameter *param) { | |||||
| } | } | ||||
| void RunTestCaseMaxPooling(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16) { | void RunTestCaseMaxPooling(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | ||||
| ocl_runtime->SetFp16Enable(enable_fp16); | ocl_runtime->SetFp16Enable(enable_fp16); | ||||
| @@ -124,7 +124,6 @@ void RunTestCaseMaxPooling(const std::vector<int> &shape, void *input_data, void | |||||
| } | } | ||||
| MS_LOG(INFO) << "Test MaxPool2d passed"; | MS_LOG(INFO) << "Test MaxPool2d passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestMaxPoolingOpenCL, MaxPoolingFp32) { | TEST_F(TestMaxPoolingOpenCL, MaxPoolingFp32) { | ||||
| @@ -77,7 +77,7 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { | |||||
| std::string weight_file = "/data/local/tmp/weight_data.bin"; | std::string weight_file = "/data/local/tmp/weight_data.bin"; | ||||
| std::string standard_answer_file = "/data/local/tmp/caffe_prelu.bin"; | std::string standard_answer_file = "/data/local/tmp/caffe_prelu.bin"; | ||||
| MS_LOG(INFO) << "-------------------->> Begin test PRelu!"; | MS_LOG(INFO) << "-------------------->> Begin test PRelu!"; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -194,6 +194,5 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { | |||||
| delete weight_tensor; | delete weight_tensor; | ||||
| delete param; | delete param; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -31,7 +31,7 @@ class TestReduceOpenCL : public mindspore::CommonTest { | |||||
| void RunTestCaseReduce(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16, | void RunTestCaseReduce(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16, | ||||
| int reduce_mode) { | int reduce_mode) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | ||||
| ocl_runtime->SetFp16Enable(enable_fp16); | ocl_runtime->SetFp16Enable(enable_fp16); | ||||
| @@ -103,7 +103,6 @@ void RunTestCaseReduce(const std::vector<int> &shape, void *input_data, void *ou | |||||
| } | } | ||||
| MS_LOG(INFO) << "Test Reduce passed"; | MS_LOG(INFO) << "Test Reduce passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestReduceOpenCL, ReduceMeanFp32) { | TEST_F(TestReduceOpenCL, ReduceMeanFp32) { | ||||
| @@ -31,7 +31,7 @@ class TestReshapeOpenCL : public mindspore::CommonTest { | |||||
| void RunTestCaseReshape(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16, | void RunTestCaseReshape(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16, | ||||
| bool is_output_2d) { | bool is_output_2d) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | ||||
| ocl_runtime->SetFp16Enable(enable_fp16); | ocl_runtime->SetFp16Enable(enable_fp16); | ||||
| @@ -99,7 +99,6 @@ void RunTestCaseReshape(const std::vector<int> &shape, void *input_data, void *o | |||||
| } | } | ||||
| MS_LOG(INFO) << "Test Reshape passed"; | MS_LOG(INFO) << "Test Reshape passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestReshapeOpenCL, ReshapeFp32) { | TEST_F(TestReshapeOpenCL, ReshapeFp32) { | ||||
| @@ -68,7 +68,7 @@ static void LogData(void *data, const int size, const std::string prefix) { | |||||
| template <class T> | template <class T> | ||||
| static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) { | static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) { | ||||
| bool is_log_data = false; | bool is_log_data = false; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| bool is_broadcast = shape_b.empty(); | bool is_broadcast = shape_b.empty(); | ||||
| @@ -232,7 +232,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh | |||||
| for (auto tensor : outputs) { | for (auto tensor : outputs) { | ||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| class TestScaleOpenCL : public mindspore::CommonTest { | class TestScaleOpenCL : public mindspore::CommonTest { | ||||
| @@ -42,7 +42,7 @@ void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bou | |||||
| TEST_F(TestSliceOpenCLfp32, Slicefp32CI) { | TEST_F(TestSliceOpenCLfp32, Slicefp32CI) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -139,7 +139,6 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32CI) { | |||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | ||||
| CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -153,7 +152,7 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32CI) { | |||||
| TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { | TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -248,7 +247,6 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { | |||||
| auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c()); | ||||
| CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -262,7 +260,7 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { | |||||
| TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { | TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { | ||||
| MS_LOG(INFO) << " begin test "; | MS_LOG(INFO) << " begin test "; | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->SetFp16Enable(true); | ocl_runtime->SetFp16Enable(true); | ||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| @@ -358,7 +356,6 @@ TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { | |||||
| sub_graph->Run(); | sub_graph->Run(); | ||||
| auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c()); | ||||
| CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| for (auto tensor : inputs) { | for (auto tensor : inputs) { | ||||
| tensor->SetData(nullptr); | tensor->SetData(nullptr); | ||||
| delete tensor; | delete tensor; | ||||
| @@ -30,7 +30,7 @@ class TestSoftmaxOpenCL : public mindspore::CommonTest { | |||||
| }; | }; | ||||
| void RunTestCaseSoftmax(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16) { | void RunTestCaseSoftmax(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | ||||
| ocl_runtime->SetFp16Enable(enable_fp16); | ocl_runtime->SetFp16Enable(enable_fp16); | ||||
| @@ -103,7 +103,6 @@ void RunTestCaseSoftmax(const std::vector<int> &shape, void *input_data, void *o | |||||
| } | } | ||||
| MS_LOG(INFO) << "Test Softmax passed"; | MS_LOG(INFO) << "Test Softmax passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestSoftmaxOpenCL, Softmax2DFp32) { | TEST_F(TestSoftmaxOpenCL, Softmax2DFp32) { | ||||
| @@ -29,7 +29,7 @@ class TestToFormatOpenCL : public mindspore::CommonTest { | |||||
| }; | }; | ||||
| TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) { | TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| auto allocator = ocl_runtime->GetAllocator(); | auto allocator = ocl_runtime->GetAllocator(); | ||||
| int h = 64; | int h = 64; | ||||
| @@ -102,6 +102,5 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) { | |||||
| // compare | // compare | ||||
| CompareOutputData(output_data, correct_data, h * w * c, 0.00001); | CompareOutputData(output_data, correct_data, h * w * c, 0.00001); | ||||
| MS_LOG(INFO) << "Test TransposeFp32 passed"; | MS_LOG(INFO) << "Test TransposeFp32 passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -30,7 +30,7 @@ class TestTransposeOpenCL : public mindspore::CommonTest { | |||||
| }; | }; | ||||
| void RunTestTranspose(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16) { | void RunTestTranspose(const std::vector<int> &shape, void *input_data, void *output_data, bool enable_fp16) { | ||||
| auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); | |||||
| auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); | |||||
| ocl_runtime->Init(); | ocl_runtime->Init(); | ||||
| size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); | ||||
| ocl_runtime->SetFp16Enable(enable_fp16); | ocl_runtime->SetFp16Enable(enable_fp16); | ||||
| @@ -103,7 +103,6 @@ void RunTestTranspose(const std::vector<int> &shape, void *input_data, void *out | |||||
| } | } | ||||
| MS_LOG(INFO) << "Test TransposeFp32 passed"; | MS_LOG(INFO) << "Test TransposeFp32 passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestTransposeOpenCL, TransposeNHWC2NCHWFp32) { | TEST_F(TestTransposeOpenCL, TransposeNHWC2NCHWFp32) { | ||||