From: @yeyunpeng2020 Reviewed-by: @zhanghaibo5,@hangangqiang Signed-off-by: @hangangqiangpull/15804/MERGE
| @@ -153,26 +153,5 @@ bool IsSupportSDot() { | |||||
| return status; | return status; | ||||
| } | } | ||||
| bool IsSupportFloat16() { | |||||
| bool status = false; | |||||
| #ifdef ENABLE_ARM32 | |||||
| status = true; | |||||
| #endif | |||||
| #if defined(ENABLE_ARM64) | |||||
| #if defined(__ANDROID__) | |||||
| int hwcap_type = 16; | |||||
| uint32_t hwcap = getHwCap(hwcap_type); | |||||
| if (hwcap & HWCAP_FPHP) { | |||||
| MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap; | |||||
| status = true; | |||||
| } else { | |||||
| MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap; | |||||
| status = false; | |||||
| } | |||||
| #endif | |||||
| #endif | |||||
| return status; | |||||
| } | |||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -43,8 +43,7 @@ uint64_t GetTimeUs(); | |||||
| bool IsSupportSDot(); | bool IsSupportSDot(); | ||||
| bool IsSupportFloat16(); | |||||
| #if defined(__arm__) | |||||
| #ifdef __ANDROID__ | |||||
| uint32_t getHwCap(int hwcap_type); | uint32_t getHwCap(int hwcap_type); | ||||
| #endif | #endif | ||||
| @@ -14,6 +14,10 @@ | |||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifdef __ANDROID__ | |||||
| #include <sys/auxv.h> | |||||
| #include <asm/hwcap.h> | |||||
| #endif | |||||
| #include "src/inner_context.h" | #include "src/inner_context.h" | ||||
| #include "include/errorcode.h" | #include "include/errorcode.h" | ||||
| #include "src/common/log_adapter.h" | #include "src/common/log_adapter.h" | ||||
| @@ -21,6 +25,9 @@ | |||||
| #ifdef SUPPORT_NPU | #ifdef SUPPORT_NPU | ||||
| #include "src/runtime/agent/npu/npu_manager.h" | #include "src/runtime/agent/npu/npu_manager.h" | ||||
| #endif | #endif | ||||
| #ifdef SUPPORT_GPU | |||||
| #include "src/runtime/gpu/opencl/opencl_runtime.h" | |||||
| #endif | |||||
| namespace mindspore::lite { | namespace mindspore::lite { | ||||
| InnerContext::InnerContext(const Context *context) { | InnerContext::InnerContext(const Context *context) { | ||||
| @@ -127,13 +134,18 @@ bool InnerContext::IsCpuFloat16Enabled() const { | |||||
| } | } | ||||
| bool InnerContext::IsGpuFloat16Enabled() const { | bool InnerContext::IsGpuFloat16Enabled() const { | ||||
| #ifdef SUPPORT_GPU | |||||
| if (!IsGpuEnabled()) { | if (!IsGpuEnabled()) { | ||||
| return false; | return false; | ||||
| } | } | ||||
| if (!IsSupportFloat16()) { | |||||
| opencl::OpenCLRuntimeWrapper wrapper; | |||||
| if (!wrapper.GetInstance()->GetFp16Enable()) { | |||||
| return false; | return false; | ||||
| } | } | ||||
| return GetGpuInfo().enable_float16_; | return GetGpuInfo().enable_float16_; | ||||
| #else | |||||
| return false; | |||||
| #endif | |||||
| } | } | ||||
| bool InnerContext::IsCpuEnabled() const { return IsUserSetCpu(); } | bool InnerContext::IsCpuEnabled() const { return IsUserSetCpu(); } | ||||
| @@ -203,4 +215,24 @@ NpuDeviceInfo InnerContext::GetNpuInfo() const { | |||||
| } | } | ||||
| } | } | ||||
| // Support CPU backend to judge whether it supports Float16. | |||||
| bool InnerContext::IsSupportFloat16() const { | |||||
| bool status = false; | |||||
| #if defined(ENABLE_ARM64) | |||||
| #if defined(__ANDROID__) | |||||
| int hwcap_type = 16; | |||||
| uint32_t hwcap = getHwCap(hwcap_type); | |||||
| if (hwcap & HWCAP_FPHP) { | |||||
| MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap; | |||||
| status = true; | |||||
| } else { | |||||
| MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap; | |||||
| status = false; | |||||
| } | |||||
| #endif | |||||
| #endif | |||||
| return status; | |||||
| } | |||||
| } // namespace mindspore::lite | } // namespace mindspore::lite | ||||
| @@ -65,6 +65,8 @@ struct InnerContext : public Context { | |||||
| bool IsUserSetNpu() const; | bool IsUserSetNpu() const; | ||||
| bool IsSupportFloat16() const; | |||||
| #if SUPPORT_NPU | #if SUPPORT_NPU | ||||
| private: | private: | ||||
| @@ -96,23 +96,7 @@ int KernelRegistry::RegKernel(const std::string &arch, const std::string &vendor | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int KernelRegistry::Init() { | |||||
| #ifdef ENABLE_ARM64 | |||||
| if (mindspore::lite::IsSupportSDot()) { | |||||
| MS_LOG(INFO) << "The current device supports Sdot."; | |||||
| } else { | |||||
| MS_LOG(INFO) << "The current device NOT supports Sdot."; | |||||
| } | |||||
| #endif | |||||
| #ifdef ENABLE_FP16 | |||||
| if (mindspore::lite::IsSupportFloat16()) { | |||||
| MS_LOG(INFO) << "The current device supports float16."; | |||||
| } else { | |||||
| MS_LOG(INFO) << "The current device NOT supports float16."; | |||||
| } | |||||
| #endif | |||||
| return RET_OK; | |||||
| } | |||||
| int KernelRegistry::Init() { return RET_OK; } | |||||
| kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) { | kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) { | ||||
| if (desc.vendor == kBuiltin) { | if (desc.vendor == kBuiltin) { | ||||
| @@ -368,6 +368,8 @@ bool OpenCLRuntime::SetFp16Enable(bool enable) { | |||||
| return fp16_enable_ == enable; | return fp16_enable_ == enable; | ||||
| } | } | ||||
| bool OpenCLRuntime::IsSupportFloat16() { return support_fp16_; } | |||||
| int OpenCLRuntime::BuildKernel(const cl::Kernel &kernel, const std::string &program_name, | int OpenCLRuntime::BuildKernel(const cl::Kernel &kernel, const std::string &program_name, | ||||
| const std::string &kernel_name, const std::vector<std::string> &build_options_ext) { | const std::string &kernel_name, const std::vector<std::string> &build_options_ext) { | ||||
| std::string build_option = default_build_option_; | std::string build_option = default_build_option_; | ||||
| @@ -70,6 +70,7 @@ class OpenCLRuntime { | |||||
| GpuInfo GetGpuInfo(); | GpuInfo GetGpuInfo(); | ||||
| bool GetFp16Enable() const; | bool GetFp16Enable() const; | ||||
| bool SetFp16Enable(bool enable); | bool SetFp16Enable(bool enable); | ||||
| bool IsSupportFloat16(); | |||||
| bool GetSVMEnable() const { return svm_enable_; } | bool GetSVMEnable() const { return svm_enable_; } | ||||
| void SetSVMEnable(bool enable) { svm_enable_ = enable; } | void SetSVMEnable(bool enable) { svm_enable_ = enable; } | ||||
| const std::vector<size_t> &GetWorkItemSize() const { return max_work_item_sizes_; } | const std::vector<size_t> &GetWorkItemSize() const { return max_work_item_sizes_; } | ||||
| @@ -286,10 +286,6 @@ int CpuFp16SubGraph::Float16TensorToFloat32Tensor(lite::Tensor *tensor) { | |||||
| int CpuFp16SubGraph::PreProcess() { | int CpuFp16SubGraph::PreProcess() { | ||||
| #ifdef ENABLE_FP16 | #ifdef ENABLE_FP16 | ||||
| if (!mindspore::lite::IsSupportFloat16()) { | |||||
| MS_LOG(ERROR) << "Unsupported fp16 in this devices"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| int ret; | int ret; | ||||
| for (auto tensor : this->in_tensors_) { | for (auto tensor : this->in_tensors_) { | ||||
| MS_ASSERT(tensor != nullptr); | MS_ASSERT(tensor != nullptr); | ||||
| @@ -350,10 +346,6 @@ int CpuFp16SubGraph::PreProcess() { | |||||
| int CpuFp16SubGraph::PostProcess() { | int CpuFp16SubGraph::PostProcess() { | ||||
| #ifdef ENABLE_FP16 | #ifdef ENABLE_FP16 | ||||
| if (!mindspore::lite::IsSupportFloat16()) { | |||||
| MS_LOG(ERROR) << "Unsupported fp16 in this devices"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| int ret; | int ret; | ||||
| for (auto tensor : this->out_tensors_) { | for (auto tensor : this->out_tensors_) { | ||||
| MS_ASSERT(tensor != nullptr); | MS_ASSERT(tensor != nullptr); | ||||