| @@ -35,39 +35,17 @@ using mindspore::kernel::CLErrorCode; | |||||
| namespace mindspore::lite::opencl { | namespace mindspore::lite::opencl { | ||||
| std::map<std::string, std::string> g_opencl_program_map; | |||||
| static std::map<std::string, std::string> g_opencl_program_map; | |||||
| static std::mutex g_mtx; | static std::mutex g_mtx; | ||||
| static std::mutex g_init_mtx; | static std::mutex g_init_mtx; | ||||
| // magic number | |||||
| static std::map<int, int> AdrenoSubGroup{ | |||||
| {640, 128}, {630, 128}, {616, 128}, {612, 64}, {610, 64}, {540, 32}, {530, 32}, | |||||
| {512, 32}, {510, 32}, {509, 32}, {506, 32}, {505, 32}, {405, 32}, {330, 16}, | |||||
| }; | |||||
| #ifdef USE_OPENCL_WRAPPER | |||||
| std::shared_ptr<OpenCLWrapper> OpenCLWrapper::opencl_wrapper_singleton_ = nullptr; | |||||
| #endif | |||||
| std::shared_ptr<OpenCLRuntime> OpenCLRuntime::opencl_runtime_singleton_ = nullptr; | |||||
| bool OpenCLRuntime::init_done_ = false; | bool OpenCLRuntime::init_done_ = false; | ||||
| OpenCLRuntime *OpenCLRuntime::GetInstance() { | OpenCLRuntime *OpenCLRuntime::GetInstance() { | ||||
| std::unique_lock<std::mutex> lck(g_mtx); | std::unique_lock<std::mutex> lck(g_mtx); | ||||
| if (opencl_runtime_singleton_.get() == nullptr) { | |||||
| opencl_runtime_singleton_.reset(new OpenCLRuntime()); | |||||
| opencl_runtime_singleton_->Init(); | |||||
| } | |||||
| return opencl_runtime_singleton_.get(); | |||||
| } | |||||
| void OpenCLRuntime::DeleteInstance() { | |||||
| std::unique_lock<std::mutex> lck(g_mtx); | |||||
| init_done_ = false; | |||||
| if (opencl_runtime_singleton_ != nullptr) { | |||||
| opencl_runtime_singleton_.reset(); | |||||
| opencl_runtime_singleton_ = nullptr; | |||||
| } | |||||
| static OpenCLRuntime ocl_runtime; | |||||
| ocl_runtime.Init(); | |||||
| return &ocl_runtime; | |||||
| } | } | ||||
| OpenCLRuntime::OpenCLRuntime() { default_build_opts_ = " -cl-mad-enable -cl-fast-relaxed-math -Werror"; } | OpenCLRuntime::OpenCLRuntime() { default_build_opts_ = " -cl-mad-enable -cl-fast-relaxed-math -Werror"; } | ||||
| @@ -88,7 +66,7 @@ int OpenCLRuntime::Init() { | |||||
| MS_LOG(INFO) << "CL_HPP_MINIMUM_OPENCL_VERSION " << CL_HPP_MINIMUM_OPENCL_VERSION; | MS_LOG(INFO) << "CL_HPP_MINIMUM_OPENCL_VERSION " << CL_HPP_MINIMUM_OPENCL_VERSION; | ||||
| #ifdef USE_OPENCL_WRAPPER | #ifdef USE_OPENCL_WRAPPER | ||||
| if (false == OpenCLWrapper::GetInstance()->LoadOpenCLLibrary()) { | |||||
| if (OpenCLWrapper::GetInstance()->LoadOpenCLLibrary() == false) { | |||||
| MS_LOG(ERROR) << "Load OpenCL symbols failed!"; | MS_LOG(ERROR) << "Load OpenCL symbols failed!"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -123,7 +101,11 @@ int OpenCLRuntime::Init() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| device_ = std::make_shared<cl::Device>(); | |||||
| device_ = new (std::nothrow) cl::Device(); | |||||
| if (device_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Create OpenCL device failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| *device_ = devices[0]; | *device_ = devices[0]; | ||||
| max_work_item_sizes_ = device_->getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>(); | max_work_item_sizes_ = device_->getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>(); | ||||
| const std::string device_name = device_->getInfo<CL_DEVICE_NAME>(); | const std::string device_name = device_->getInfo<CL_DEVICE_NAME>(); | ||||
| @@ -144,20 +126,21 @@ int OpenCLRuntime::Init() { | |||||
| MS_LOG(INFO) << "Create special opencl context to share with OpenGL"; | MS_LOG(INFO) << "Create special opencl context to share with OpenGL"; | ||||
| cl_context_properties context_prop[] = {CL_GL_CONTEXT_KHR, (cl_context_properties)eglGetCurrentContext(), | cl_context_properties context_prop[] = {CL_GL_CONTEXT_KHR, (cl_context_properties)eglGetCurrentContext(), | ||||
| CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), 0}; | CL_EGL_DISPLAY_KHR, (cl_context_properties)eglGetCurrentDisplay(), 0}; | ||||
| context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret); | |||||
| context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &ret); | |||||
| if (ret != CL_SUCCESS) { | |||||
| MS_LOG(ERROR) << "Create special OpenCL context falied, Create common OpenCL context then."; | |||||
| context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret); | |||||
| if (ret != CL_SUCCESS || context_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Create special OpenCL context failed, Create common OpenCL context then."; | |||||
| context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret); | |||||
| if (context_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Create OpenCL context failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| #else | #else | ||||
| MS_LOG(INFO) << "Create common opencl context"; | MS_LOG(INFO) << "Create common opencl context"; | ||||
| // cl_context_properties context_prop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](), | |||||
| // CL_PRINTF_CALLBACK_ARM, (cl_context_properties)printf_callback, 0}; | |||||
| // context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, context_prop, nullptr, nullptr, &err); | |||||
| context_ = std::make_shared<cl::Context>(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret); | |||||
| context_ = new (std::nothrow) cl::Context(std::vector<cl::Device>{*device_}, nullptr, nullptr, nullptr, &ret); | |||||
| #endif | #endif | ||||
| if (ret != CL_SUCCESS) { | |||||
| if (ret != CL_SUCCESS || context_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret); | MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -203,13 +186,17 @@ int OpenCLRuntime::Init() { | |||||
| properties |= CL_QUEUE_PROFILING_ENABLE; | properties |= CL_QUEUE_PROFILING_ENABLE; | ||||
| #endif | #endif | ||||
| default_command_queue_ = std::make_shared<cl::CommandQueue>(*context_, *device_, properties, &ret); | |||||
| if (ret != CL_SUCCESS) { | |||||
| default_command_queue_ = new (std::nothrow) cl::CommandQueue(*context_, *device_, properties, &ret); | |||||
| if (ret != CL_SUCCESS || default_command_queue_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret); | MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| allocator_ = std::make_shared<OpenCLAllocator>(); | |||||
| allocator_ = new (std::nothrow) OpenCLAllocator(); | |||||
| if (allocator_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Command OpenCL allocator failed!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| #ifdef PROGRAM_WITH_IL | #ifdef PROGRAM_WITH_IL | ||||
| std::string flag = ""; | std::string flag = ""; | ||||
| binary_program_ = CreateProgramFromIL(g_program_binary, flag); | binary_program_ = CreateProgramFromIL(g_program_binary, flag); | ||||
| @@ -221,17 +208,18 @@ int OpenCLRuntime::Init() { | |||||
| } | } | ||||
| OpenCLRuntime::~OpenCLRuntime() { | OpenCLRuntime::~OpenCLRuntime() { | ||||
| init_done_ = false; | |||||
| program_map_.clear(); | program_map_.clear(); | ||||
| // allocator_->Clear(); | |||||
| allocator_.reset(); | |||||
| default_command_queue_.reset(); | |||||
| context_.reset(); | |||||
| device_.reset(); | |||||
| delete allocator_; | |||||
| delete default_command_queue_; | |||||
| delete context_; | |||||
| delete device_; | |||||
| OpenCLWrapper::GetInstance()->UnLoadOpenCLLibrary(); | |||||
| } | } | ||||
| cl::Context *OpenCLRuntime::Context() { return context_.get(); } | |||||
| cl::Context *OpenCLRuntime::Context() { return context_; } | |||||
| cl::Device *OpenCLRuntime::Device() { return device_.get(); } | |||||
| cl::Device *OpenCLRuntime::Device() { return device_; } | |||||
| uint64_t OpenCLRuntime::DeviceGlobalMemoryCacheSize() const { return global_memery_cachesize_; } | uint64_t OpenCLRuntime::DeviceGlobalMemoryCacheSize() const { return global_memery_cachesize_; } | ||||
| @@ -262,9 +250,7 @@ uint32_t OpenCLRuntime::GetSubGroupSize(const cl::Kernel &kernel, const cl::NDRa | |||||
| sub_group_size = 0; | sub_group_size = 0; | ||||
| } | } | ||||
| #else | #else | ||||
| if (AdrenoSubGroup.find(gpu_info_.model_num) != AdrenoSubGroup.end()) { | |||||
| sub_group_size = AdrenoSubGroup[gpu_info_.model_num]; | |||||
| } | |||||
| sub_group_size = 0; | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -337,7 +323,7 @@ int OpenCLRuntime::BuildKernel(cl::Kernel &kernel, const std::string &program_na | |||||
| int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global, | int OpenCLRuntime::RunKernel(const cl::Kernel &kernel, const std::vector<size_t> &global, | ||||
| const std::vector<size_t> &local, cl::CommandQueue *command_queue) { | const std::vector<size_t> &local, cl::CommandQueue *command_queue) { | ||||
| if (command_queue == nullptr) { | if (command_queue == nullptr) { | ||||
| command_queue = default_command_queue_.get(); | |||||
| command_queue = default_command_queue_; | |||||
| } | } | ||||
| MS_ASSERT(local.size() == 0 || local.size() == global.size()); | MS_ASSERT(local.size() == 0 || local.size() == global.size()); | ||||
| std::vector<size_t> internal_global_ws = global; | std::vector<size_t> internal_global_ws = global; | ||||
| @@ -462,7 +448,7 @@ bool OpenCLRuntime::BuildProgram(const std::string &build_options, const cl::Pro | |||||
| bool OpenCLRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size, cl::CommandQueue *command_queue, | bool OpenCLRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size, cl::CommandQueue *command_queue, | ||||
| bool sync) const { | bool sync) const { | ||||
| if (command_queue == nullptr) { | if (command_queue == nullptr) { | ||||
| command_queue = default_command_queue_.get(); | |||||
| command_queue = default_command_queue_; | |||||
| } | } | ||||
| cl_int cl_ret = CL_SUCCESS; | cl_int cl_ret = CL_SUCCESS; | ||||
| const cl::Buffer *buffer = static_cast<const cl::Buffer *>(src); | const cl::Buffer *buffer = static_cast<const cl::Buffer *>(src); | ||||
| @@ -475,7 +461,7 @@ bool OpenCLRuntime::CopyDeviceMemToHost(void *dst, const void *src, size_t size, | |||||
| bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t size, cl::CommandQueue *command_queue, | bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t size, cl::CommandQueue *command_queue, | ||||
| bool sync) const { | bool sync) const { | ||||
| if (command_queue == nullptr) { | if (command_queue == nullptr) { | ||||
| command_queue = default_command_queue_.get(); | |||||
| command_queue = default_command_queue_; | |||||
| } | } | ||||
| cl_int cl_ret = CL_SUCCESS; | cl_int cl_ret = CL_SUCCESS; | ||||
| const cl::Buffer *buffer = static_cast<const cl::Buffer *>(dst); | const cl::Buffer *buffer = static_cast<const cl::Buffer *>(dst); | ||||
| @@ -488,7 +474,7 @@ bool OpenCLRuntime::CopyHostMemToDevice(const void *dst, const void *src, size_t | |||||
| void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, cl::CommandQueue *command_queue, | void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, cl::CommandQueue *command_queue, | ||||
| bool sync) const { | bool sync) const { | ||||
| if (command_queue == nullptr) { | if (command_queue == nullptr) { | ||||
| command_queue = default_command_queue_.get(); | |||||
| command_queue = default_command_queue_; | |||||
| } | } | ||||
| return command_queue->enqueueMapBuffer(buffer, sync, flags, 0, size); | return command_queue->enqueueMapBuffer(buffer, sync, flags, 0, size); | ||||
| } | } | ||||
| @@ -498,7 +484,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| if (command_queue == nullptr) { | if (command_queue == nullptr) { | ||||
| command_queue = default_command_queue_.get(); | |||||
| command_queue = default_command_queue_; | |||||
| } | } | ||||
| return command_queue->enqueueMapSVM(host_ptr, sync, flags, size); | return command_queue->enqueueMapSVM(host_ptr, sync, flags, size); | ||||
| } | } | ||||
| @@ -506,7 +492,7 @@ int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::Command | |||||
| void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> ®ion, | void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> ®ion, | ||||
| cl::CommandQueue *command_queue) const { | cl::CommandQueue *command_queue) const { | ||||
| if (command_queue == nullptr) { | if (command_queue == nullptr) { | ||||
| command_queue = default_command_queue_.get(); | |||||
| command_queue = default_command_queue_; | |||||
| } | } | ||||
| cl::size_type row_pitch; | cl::size_type row_pitch; | ||||
| cl::size_type slice_pitch; | cl::size_type slice_pitch; | ||||
| @@ -517,7 +503,7 @@ void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, | |||||
| int OpenCLRuntime::UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue) const { | int OpenCLRuntime::UnmapBuffer(const cl::Memory &buffer, void *host_ptr, cl::CommandQueue *command_queue) const { | ||||
| if (command_queue == nullptr) { | if (command_queue == nullptr) { | ||||
| command_queue = default_command_queue_.get(); | |||||
| command_queue = default_command_queue_; | |||||
| } | } | ||||
| return command_queue->enqueueUnmapMemObject(buffer, host_ptr); | return command_queue->enqueueUnmapMemObject(buffer, host_ptr); | ||||
| } | } | ||||
| @@ -527,14 +513,14 @@ int OpenCLRuntime::UnmapBuffer(void *host_ptr, cl::CommandQueue *command_queue) | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| if (command_queue == nullptr) { | if (command_queue == nullptr) { | ||||
| command_queue = default_command_queue_.get(); | |||||
| command_queue = default_command_queue_; | |||||
| } | } | ||||
| return command_queue->enqueueUnmapSVM(host_ptr); | return command_queue->enqueueUnmapSVM(host_ptr); | ||||
| } | } | ||||
| bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) { | bool OpenCLRuntime::SyncCommandQueue(cl::CommandQueue *command_queue) { | ||||
| if (command_queue == nullptr) { | if (command_queue == nullptr) { | ||||
| command_queue = default_command_queue_.get(); | |||||
| command_queue = default_command_queue_; | |||||
| } | } | ||||
| cl_int ret = command_queue->finish(); | cl_int ret = command_queue->finish(); | ||||
| if (ret != CL_SUCCESS) { | if (ret != CL_SUCCESS) { | ||||
| @@ -20,7 +20,6 @@ j* you may not use this file except in compliance with the License. | |||||
| #include <vector> | #include <vector> | ||||
| #include <map> | #include <map> | ||||
| #include <memory> | #include <memory> | ||||
| #include <mutex> | |||||
| #include <set> | #include <set> | ||||
| #include <string> | #include <string> | ||||
| #include <type_traits> | #include <type_traits> | ||||
| @@ -38,9 +37,6 @@ struct GpuInfo { | |||||
| float opencl_version = 0; | float opencl_version = 0; | ||||
| }; | }; | ||||
| // Base GPU cache size used for computing local work group size. | |||||
| const int32_t g_base_gpu_mem_cachesize = 16384; | |||||
| class OpenCLRuntime { | class OpenCLRuntime { | ||||
| public: | public: | ||||
| static OpenCLRuntime *GetInstance(); | static OpenCLRuntime *GetInstance(); | ||||
| @@ -54,8 +50,8 @@ class OpenCLRuntime { | |||||
| cl::Context *Context(); | cl::Context *Context(); | ||||
| cl::Device *Device(); | cl::Device *Device(); | ||||
| OpenCLAllocator *GetAllocator() { return allocator_.get(); } | |||||
| cl::CommandQueue *GetDefaultCommandQueue() { return default_command_queue_.get(); } | |||||
| OpenCLAllocator *GetAllocator() { return allocator_; } | |||||
| cl::CommandQueue *GetDefaultCommandQueue() { return default_command_queue_; } | |||||
| uint64_t DeviceGlobalMemoryCacheSize() const; | uint64_t DeviceGlobalMemoryCacheSize() const; | ||||
| int DeviceMaxWorkGroupSize() const; | int DeviceMaxWorkGroupSize() const; | ||||
| uint32_t DeviceComputeUnits() const; | uint32_t DeviceComputeUnits() const; | ||||
| @@ -146,13 +142,12 @@ class OpenCLRuntime { | |||||
| bool BuildProgram(const std::string &build_options, const cl::Program &program); | bool BuildProgram(const std::string &build_options, const cl::Program &program); | ||||
| private: | private: | ||||
| static std::shared_ptr<OpenCLRuntime> opencl_runtime_singleton_; | |||||
| static bool init_done_; | static bool init_done_; | ||||
| std::shared_ptr<cl::CommandQueue> default_command_queue_{nullptr}; | |||||
| std::shared_ptr<cl::Context> context_{nullptr}; | |||||
| std::shared_ptr<cl::Device> device_{nullptr}; | |||||
| std::shared_ptr<OpenCLAllocator> allocator_{nullptr}; | |||||
| std::map<std::string, cl::Program> program_map_{}; | |||||
| cl::CommandQueue *default_command_queue_{nullptr}; | |||||
| cl::Context *context_{nullptr}; | |||||
| cl::Device *device_{nullptr}; | |||||
| OpenCLAllocator *allocator_{nullptr}; | |||||
| std::map<std::string, cl::Program> program_map_; | |||||
| cl::Program binary_program_{0}; | cl::Program binary_program_{0}; | ||||
| uint64_t global_memery_cachesize_{0}; | uint64_t global_memery_cachesize_{0}; | ||||
| int max_work_group_size; | int max_work_group_size; | ||||
| @@ -169,5 +164,4 @@ class OpenCLRuntime { | |||||
| }; | }; | ||||
| } // namespace mindspore::lite::opencl | } // namespace mindspore::lite::opencl | ||||
| #endif // MINDSPORE_LITE_SRC_OPENCL_RUNTIME_H_ | #endif // MINDSPORE_LITE_SRC_OPENCL_RUNTIME_H_ | ||||
| @@ -66,19 +66,13 @@ static const std::vector<std::string> g_opencl_library_paths = { | |||||
| }; | }; | ||||
| OpenCLWrapper *OpenCLWrapper::GetInstance() { | OpenCLWrapper *OpenCLWrapper::GetInstance() { | ||||
| static std::once_flag opencl_wrapper_once; | |||||
| std::call_once(opencl_wrapper_once, | |||||
| []() { opencl_wrapper_singleton_ = std::shared_ptr<OpenCLWrapper>(new OpenCLWrapper()); }); | |||||
| return opencl_wrapper_singleton_.get(); | |||||
| static OpenCLWrapper ocl_wrapper; | |||||
| return &ocl_wrapper; | |||||
| } | } | ||||
| OpenCLWrapper::OpenCLWrapper() {} | OpenCLWrapper::OpenCLWrapper() {} | ||||
| OpenCLWrapper::~OpenCLWrapper() { | |||||
| if (nullptr == opencl_wrapper_singleton_.get()) return; | |||||
| opencl_wrapper_singleton_->UnLoadOpenCLLibrary(); | |||||
| } | |||||
| OpenCLWrapper::~OpenCLWrapper() {} | |||||
| // load default library path | // load default library path | ||||
| bool OpenCLWrapper::LoadOpenCLLibrary() { | bool OpenCLWrapper::LoadOpenCLLibrary() { | ||||
| @@ -230,8 +230,7 @@ class OpenCLWrapper { | |||||
| bool LoadLibraryFromPath(const std::string &path); | bool LoadLibraryFromPath(const std::string &path); | ||||
| private: | private: | ||||
| static std::shared_ptr<OpenCLWrapper> opencl_wrapper_singleton_; | |||||
| void *handle_ = nullptr; | |||||
| void *handle_{nullptr}; | |||||
| }; | }; | ||||
| } // namespace mindspore::lite::opencl | } // namespace mindspore::lite::opencl | ||||
| @@ -173,7 +173,6 @@ TEST_F(TestActivationOpenCL, ReluFp32_dim4) { | |||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) { | TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) { | ||||
| @@ -276,7 +275,6 @@ TEST_F(TestActivationOpenCL, Relu6Fp32_dim4) { | |||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) { | TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) { | ||||
| @@ -379,7 +377,6 @@ TEST_F(TestActivationOpenCL, SigmoidFp32_dim4) { | |||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) { | TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) { | ||||
| @@ -483,6 +480,5 @@ TEST_F(TestActivationOpenCL, LeakyReluFp32_dim4) { | |||||
| delete input_tensor; | delete input_tensor; | ||||
| delete output_tensor; | delete output_tensor; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -202,7 +202,6 @@ void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) | |||||
| for (auto tensor : outputs) { | for (auto tensor : outputs) { | ||||
| delete tensor; | delete tensor; | ||||
| } | } | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| class TestArithmeticOpenCL : public mindspore::CommonTest { | class TestArithmeticOpenCL : public mindspore::CommonTest { | ||||
| @@ -143,7 +143,6 @@ TEST_F(TestAvgPoolingOpenCL, AvgPoolFp32) { | |||||
| delete pooling_kernel; | delete pooling_kernel; | ||||
| delete pGraph; | delete pGraph; | ||||
| delete param; | delete param; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -155,6 +155,5 @@ TEST_F(TestBatchnormOpenCL, Batchnorminput_dim4) { | |||||
| delete param; | delete param; | ||||
| delete batchnorm_kernel; | delete batchnorm_kernel; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -213,6 +213,5 @@ TEST_F(TestConcatOpenCL, ConcatFp32_2input_dim4_axis3) { | |||||
| delete param; | delete param; | ||||
| delete concat_kernel; | delete concat_kernel; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -175,7 +175,6 @@ void RunTestCase(const std::vector<int> shape, const std::vector<std::string> fi | |||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| outputs[0]->SetData(nullptr); | outputs[0]->SetData(nullptr); | ||||
| MS_LOG(INFO) << "Test Conv2dTransposeFp32 passed"; | MS_LOG(INFO) << "Test Conv2dTransposeFp32 passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) { | TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) { | ||||
| int pad = 0; | int pad = 0; | ||||
| @@ -136,7 +136,6 @@ void TEST_MAIN(schema::Format input_format, schema::Format output_format, const | |||||
| bias_tensor.SetData(nullptr); | bias_tensor.SetData(nullptr); | ||||
| delete param; | delete param; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestConvolutionOpenCL, in1x1x64x512_out1x1x64x7358_k11_s11_p0000) { | TEST_F(TestConvolutionOpenCL, in1x1x64x512_out1x1x64x7358_k11_s11_p0000) { | ||||
| @@ -195,7 +195,6 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNC4HW4Fp32) { | |||||
| 2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988}; | 2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988}; | ||||
| DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NC4HW4); | DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NC4HW4); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) { | TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) { | ||||
| @@ -268,7 +267,6 @@ TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) { | |||||
| 1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203}; | 1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203}; | ||||
| DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NC4HW4); | DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NC4HW4); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) { | TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) { | ||||
| @@ -314,7 +312,6 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) { | |||||
| 2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988}; | 2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988}; | ||||
| DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NHWC4); | DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NHWC4); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) { | TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) { | ||||
| @@ -387,7 +384,6 @@ TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) { | |||||
| 1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203}; | 1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203}; | ||||
| DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NHWC4); | DepthWiseTestMain(conv_param.get(), input_data, weight_data, gnd_data, schema::Format_NHWC4); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) { | TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) { | ||||
| @@ -512,7 +508,6 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwNoPadFp32) { | |||||
| inputs[1]->SetData(nullptr); | inputs[1]->SetData(nullptr); | ||||
| inputs[2]->SetData(nullptr); | inputs[2]->SetData(nullptr); | ||||
| MS_LOG(INFO) << "TestConvolutionDwNoPadFp32 passed"; | MS_LOG(INFO) << "TestConvolutionDwNoPadFp32 passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) { | TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) { | ||||
| @@ -673,7 +668,6 @@ TEST_F(TestConvolutionDwOpenCL, ConvDwPadFp32) { | |||||
| inputs[1]->SetData(nullptr); | inputs[1]->SetData(nullptr); | ||||
| inputs[2]->SetData(nullptr); | inputs[2]->SetData(nullptr); | ||||
| MS_LOG(INFO) << "TestConvolutionDwPadFp32 passed"; | MS_LOG(INFO) << "TestConvolutionDwPadFp32 passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2) { | TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2) { | ||||
| @@ -739,7 +733,6 @@ TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2) { | |||||
| DepthWiseTestMain(conv_param.get(), input_data.get(), weight_data.get(), nullptr, schema::Format_NHWC4, false); | DepthWiseTestMain(conv_param.get(), input_data.get(), weight_data.get(), nullptr, schema::Format_NHWC4, false); | ||||
| } | } | ||||
| } | } | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestConvolutionDwOpenCL, Buffer2Image) { | TEST_F(TestConvolutionDwOpenCL, Buffer2Image) { | ||||
| @@ -788,6 +781,5 @@ TEST_F(TestConvolutionDwOpenCL, Buffer2Image) { | |||||
| } | } | ||||
| // DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4, true); | // DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4, true); | ||||
| DepthWiseTestMain(conv_param.get(), input_data.get(), weight_data.get(), gnd_data.get(), schema::Format_NHWC4, true); | DepthWiseTestMain(conv_param.get(), input_data.get(), weight_data.get(), gnd_data.get(), schema::Format_NHWC4, true); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -115,7 +115,6 @@ TEST_F(TestMatMulOpenCL, MatMulFp32) { | |||||
| CompareOutputData(output_data, correct_data, co, 0.0001); | CompareOutputData(output_data, correct_data, co, 0.0001); | ||||
| tensor_x->SetData(nullptr); | tensor_x->SetData(nullptr); | ||||
| tensor_out->SetData(nullptr); | tensor_out->SetData(nullptr); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| MS_LOG(INFO) << "TestMatMulFp32 passed"; | MS_LOG(INFO) << "TestMatMulFp32 passed"; | ||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -118,7 +118,6 @@ TEST_F(TestMaxPoolingOpenCL, MaxPool_1_32_512_96) { | |||||
| } | } | ||||
| delete pooling_kernel; | delete pooling_kernel; | ||||
| delete pGraph; | delete pGraph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -183,6 +183,5 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { | |||||
| delete param; | delete param; | ||||
| delete prelu_kernel; | delete prelu_kernel; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -104,7 +104,6 @@ TEST_F(TestReshapeOpenCL, ReshapeFp32) { | |||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| outputs[0]->SetData(nullptr); | outputs[0]->SetData(nullptr); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| MS_LOG(INFO) << "Test ReshapeFp32 passed"; | MS_LOG(INFO) << "Test ReshapeFp32 passed"; | ||||
| } | } | ||||
| @@ -144,6 +144,5 @@ TEST_F(TestSliceOpenCL, Sliceinput_dim4) { | |||||
| } | } | ||||
| delete slice_kernel; | delete slice_kernel; | ||||
| delete sub_graph; | delete sub_graph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -92,7 +92,6 @@ void RunTestCase(std::vector<int> input_shape, std::vector<int> output_shape, st | |||||
| } | } | ||||
| delete kernel; | delete kernel; | ||||
| delete pGraph; | delete pGraph; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| TEST_F(TestSoftmaxOpenCL, Softmax_1) { | TEST_F(TestSoftmaxOpenCL, Softmax_1) { | ||||
| @@ -103,6 +103,5 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) { | |||||
| // compare | // compare | ||||
| CompareOutputData(output_data, correct_data, h * w * c, 0.00001); | CompareOutputData(output_data, correct_data, h * w * c, 0.00001); | ||||
| MS_LOG(INFO) << "Test TransposeFp32 passed"; | MS_LOG(INFO) << "Test TransposeFp32 passed"; | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -106,7 +106,6 @@ TEST_F(TestTransposeOpenCL, TransposeFp32) { | |||||
| inputs[0]->SetData(nullptr); | inputs[0]->SetData(nullptr); | ||||
| outputs[0]->SetData(nullptr); | outputs[0]->SetData(nullptr); | ||||
| lite::opencl::OpenCLRuntime::DeleteInstance(); | |||||
| MS_LOG(INFO) << "Test TransposeFp32 passed"; | MS_LOG(INFO) << "Test TransposeFp32 passed"; | ||||
| } | } | ||||