diff --git a/mindspore/lite/src/common/utils.cc b/mindspore/lite/src/common/utils.cc index b7c89aaaf5..02f1c1c9e9 100644 --- a/mindspore/lite/src/common/utils.cc +++ b/mindspore/lite/src/common/utils.cc @@ -153,26 +153,5 @@ bool IsSupportSDot() { return status; } -bool IsSupportFloat16() { - bool status = false; -#ifdef ENABLE_ARM32 - status = true; -#endif - -#if defined(ENABLE_ARM64) -#if defined(__ANDROID__) - int hwcap_type = 16; - uint32_t hwcap = getHwCap(hwcap_type); - if (hwcap & HWCAP_FPHP) { - MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap; - status = true; - } else { - MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap; - status = false; - } -#endif -#endif - return status; -} } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/common/utils.h b/mindspore/lite/src/common/utils.h index 0b4fb9874c..2881ed2ab7 100644 --- a/mindspore/lite/src/common/utils.h +++ b/mindspore/lite/src/common/utils.h @@ -43,8 +43,7 @@ uint64_t GetTimeUs(); bool IsSupportSDot(); -bool IsSupportFloat16(); -#if defined(__arm__) +#ifdef __ANDROID__ uint32_t getHwCap(int hwcap_type); #endif diff --git a/mindspore/lite/src/inner_context.cc b/mindspore/lite/src/inner_context.cc index 7b62d4c26a..a3178bc8cc 100644 --- a/mindspore/lite/src/inner_context.cc +++ b/mindspore/lite/src/inner_context.cc @@ -14,6 +14,10 @@ * limitations under the License. */ +#ifdef __ANDROID__ +#include +#include +#endif #include "src/inner_context.h" #include "include/errorcode.h" #include "src/common/log_adapter.h" @@ -21,6 +25,9 @@ #ifdef SUPPORT_NPU #include "src/runtime/agent/npu/npu_manager.h" #endif +#ifdef SUPPORT_GPU +#include "src/runtime/gpu/opencl/opencl_runtime.h" +#endif namespace mindspore::lite { InnerContext::InnerContext(const Context *context) { @@ -127,13 +134,18 @@ bool InnerContext::IsCpuFloat16Enabled() const { } bool InnerContext::IsGpuFloat16Enabled() const { +#ifdef SUPPORT_GPU if (!IsGpuEnabled()) { return false; } - if (!IsSupportFloat16()) { + opencl::OpenCLRuntimeWrapper wrapper; + if (!wrapper.GetInstance()->GetFp16Enable()) { return false; } return GetGpuInfo().enable_float16_; +#else + return false; +#endif } bool InnerContext::IsCpuEnabled() const { return IsUserSetCpu(); } @@ -203,4 +215,24 @@ NpuDeviceInfo InnerContext::GetNpuInfo() const { } } +// Support CPU backend to judge whether it supports Float16. +bool InnerContext::IsSupportFloat16() const { + bool status = false; + +#if defined(ENABLE_ARM64) +#if defined(__ANDROID__) + int hwcap_type = 16; + uint32_t hwcap = getHwCap(hwcap_type); + if (hwcap & HWCAP_FPHP) { + MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap; + status = true; + } else { + MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap; + status = false; + } +#endif +#endif + return status; +} + } // namespace mindspore::lite diff --git a/mindspore/lite/src/inner_context.h b/mindspore/lite/src/inner_context.h index 41e4cad9d9..8a84ae6fcc 100644 --- a/mindspore/lite/src/inner_context.h +++ b/mindspore/lite/src/inner_context.h @@ -65,6 +65,8 @@ struct InnerContext : public Context { bool IsUserSetNpu() const; + bool IsSupportFloat16() const; + #if SUPPORT_NPU private: diff --git a/mindspore/lite/src/kernel_registry.cc b/mindspore/lite/src/kernel_registry.cc index 25e5479915..25c4cad323 100644 --- a/mindspore/lite/src/kernel_registry.cc +++ b/mindspore/lite/src/kernel_registry.cc @@ -96,23 +96,7 @@ int KernelRegistry::RegKernel(const std::string &arch, const std::string &vendor return RET_OK; } -int KernelRegistry::Init() { -#ifdef ENABLE_ARM64 - if (mindspore::lite::IsSupportSDot()) { - MS_LOG(INFO) << "The current device supports Sdot."; - } else { - MS_LOG(INFO) << "The current device NOT supports Sdot."; - } -#endif -#ifdef ENABLE_FP16 - if (mindspore::lite::IsSupportFloat16()) { - MS_LOG(INFO) << "The current device supports float16."; - } else { - MS_LOG(INFO) << "The current device NOT supports float16."; - } -#endif - return RET_OK; -} +int KernelRegistry::Init() { return RET_OK; } kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) { if (desc.vendor == kBuiltin) { diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc index 90f8068fdd..71bcf22f43 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc @@ -368,6 +368,8 @@ bool OpenCLRuntime::SetFp16Enable(bool enable) { return fp16_enable_ == enable; } +bool OpenCLRuntime::IsSupportFloat16() { return support_fp16_; } + int OpenCLRuntime::BuildKernel(const cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name, const std::vector &build_options_ext) { std::string build_option = default_build_option_; diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h index d67378c025..c791fda7ab 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h @@ -70,6 +70,7 @@ class OpenCLRuntime { GpuInfo GetGpuInfo(); bool GetFp16Enable() const; bool SetFp16Enable(bool enable); + bool IsSupportFloat16(); bool GetSVMEnable() const { return svm_enable_; } void SetSVMEnable(bool enable) { svm_enable_ = enable; } const std::vector &GetWorkItemSize() const { return max_work_item_sizes_; } diff --git a/mindspore/lite/src/sub_graph_kernel.cc b/mindspore/lite/src/sub_graph_kernel.cc index d2dae3a8dc..855f5f4fea 100644 --- a/mindspore/lite/src/sub_graph_kernel.cc +++ b/mindspore/lite/src/sub_graph_kernel.cc @@ -286,10 +286,6 @@ int CpuFp16SubGraph::Float16TensorToFloat32Tensor(lite::Tensor *tensor) { int CpuFp16SubGraph::PreProcess() { #ifdef ENABLE_FP16 - if (!mindspore::lite::IsSupportFloat16()) { - MS_LOG(ERROR) << "Unsupported fp16 in this devices"; - return RET_ERROR; - } int ret; for (auto tensor : this->in_tensors_) { MS_ASSERT(tensor != nullptr); @@ -350,10 +346,6 @@ int CpuFp16SubGraph::PreProcess() { int CpuFp16SubGraph::PostProcess() { #ifdef ENABLE_FP16 - if (!mindspore::lite::IsSupportFloat16()) { - MS_LOG(ERROR) << "Unsupported fp16 in this devices"; - return RET_ERROR; - } int ret; for (auto tensor : this->out_tensors_) { MS_ASSERT(tensor != nullptr);