Browse Source

!15804 fix gpu fp16 bug

From: @yeyunpeng2020
Reviewed-by: @zhanghaibo5,@hangangqiang
Signed-off-by: @hangangqiang
pull/15804/MERGE
mindspore-ci-bot Gitee 4 years ago
parent
commit
796e5e3245
8 changed files with 40 additions and 49 deletions
  1. +0
    -21
      mindspore/lite/src/common/utils.cc
  2. +1
    -2
      mindspore/lite/src/common/utils.h
  3. +33
    -1
      mindspore/lite/src/inner_context.cc
  4. +2
    -0
      mindspore/lite/src/inner_context.h
  5. +1
    -17
      mindspore/lite/src/kernel_registry.cc
  6. +2
    -0
      mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc
  7. +1
    -0
      mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h
  8. +0
    -8
      mindspore/lite/src/sub_graph_kernel.cc

+ 0
- 21
mindspore/lite/src/common/utils.cc View File

@@ -153,26 +153,5 @@ bool IsSupportSDot() {
return status;
}

bool IsSupportFloat16() {
bool status = false;
#ifdef ENABLE_ARM32
status = true;
#endif

#if defined(ENABLE_ARM64)
#if defined(__ANDROID__)
int hwcap_type = 16;
uint32_t hwcap = getHwCap(hwcap_type);
if (hwcap & HWCAP_FPHP) {
MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap;
status = true;
} else {
MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap;
status = false;
}
#endif
#endif
return status;
}
} // namespace lite
} // namespace mindspore

+ 1
- 2
mindspore/lite/src/common/utils.h View File

@@ -43,8 +43,7 @@ uint64_t GetTimeUs();

bool IsSupportSDot();

bool IsSupportFloat16();
#if defined(__arm__)
#ifdef __ANDROID__
uint32_t getHwCap(int hwcap_type);
#endif



+ 33
- 1
mindspore/lite/src/inner_context.cc View File

@@ -14,6 +14,10 @@
* limitations under the License.
*/

#ifdef __ANDROID__
#include <sys/auxv.h>
#include <asm/hwcap.h>
#endif
#include "src/inner_context.h"
#include "include/errorcode.h"
#include "src/common/log_adapter.h"
@@ -21,6 +25,9 @@
#ifdef SUPPORT_NPU
#include "src/runtime/agent/npu/npu_manager.h"
#endif
#ifdef SUPPORT_GPU
#include "src/runtime/gpu/opencl/opencl_runtime.h"
#endif

namespace mindspore::lite {
InnerContext::InnerContext(const Context *context) {
@@ -127,13 +134,18 @@ bool InnerContext::IsCpuFloat16Enabled() const {
}

bool InnerContext::IsGpuFloat16Enabled() const {
#ifdef SUPPORT_GPU
if (!IsGpuEnabled()) {
return false;
}
if (!IsSupportFloat16()) {
opencl::OpenCLRuntimeWrapper wrapper;
if (!wrapper.GetInstance()->GetFp16Enable()) {
return false;
}
return GetGpuInfo().enable_float16_;
#else
return false;
#endif
}

bool InnerContext::IsCpuEnabled() const { return IsUserSetCpu(); }
@@ -203,4 +215,24 @@ NpuDeviceInfo InnerContext::GetNpuInfo() const {
}
}

// Support CPU backend to judge whether it supports Float16.
bool InnerContext::IsSupportFloat16() const {
bool status = false;

#if defined(ENABLE_ARM64)
#if defined(__ANDROID__)
int hwcap_type = 16;
uint32_t hwcap = getHwCap(hwcap_type);
if (hwcap & HWCAP_FPHP) {
MS_LOG(DEBUG) << "Hw cap support FP16, hwcap: 0x" << hwcap;
status = true;
} else {
MS_LOG(DEBUG) << "Hw cap NOT support FP16, hwcap: 0x" << hwcap;
status = false;
}
#endif
#endif
return status;
}

} // namespace mindspore::lite

+ 2
- 0
mindspore/lite/src/inner_context.h View File

@@ -65,6 +65,8 @@ struct InnerContext : public Context {

bool IsUserSetNpu() const;

bool IsSupportFloat16() const;

#if SUPPORT_NPU

private:


+ 1
- 17
mindspore/lite/src/kernel_registry.cc View File

@@ -96,23 +96,7 @@ int KernelRegistry::RegKernel(const std::string &arch, const std::string &vendor
return RET_OK;
}

int KernelRegistry::Init() {
#ifdef ENABLE_ARM64
if (mindspore::lite::IsSupportSDot()) {
MS_LOG(INFO) << "The current device supports Sdot.";
} else {
MS_LOG(INFO) << "The current device NOT supports Sdot.";
}
#endif
#ifdef ENABLE_FP16
if (mindspore::lite::IsSupportFloat16()) {
MS_LOG(INFO) << "The current device supports float16.";
} else {
MS_LOG(INFO) << "The current device NOT supports float16.";
}
#endif
return RET_OK;
}
int KernelRegistry::Init() { return RET_OK; }

kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) {
if (desc.vendor == kBuiltin) {


+ 2
- 0
mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc View File

@@ -368,6 +368,8 @@ bool OpenCLRuntime::SetFp16Enable(bool enable) {
return fp16_enable_ == enable;
}

bool OpenCLRuntime::IsSupportFloat16() { return support_fp16_; }

int OpenCLRuntime::BuildKernel(const cl::Kernel &kernel, const std::string &program_name,
const std::string &kernel_name, const std::vector<std::string> &build_options_ext) {
std::string build_option = default_build_option_;


+ 1
- 0
mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h View File

@@ -70,6 +70,7 @@ class OpenCLRuntime {
GpuInfo GetGpuInfo();
bool GetFp16Enable() const;
bool SetFp16Enable(bool enable);
bool IsSupportFloat16();
bool GetSVMEnable() const { return svm_enable_; }
void SetSVMEnable(bool enable) { svm_enable_ = enable; }
const std::vector<size_t> &GetWorkItemSize() const { return max_work_item_sizes_; }


+ 0
- 8
mindspore/lite/src/sub_graph_kernel.cc View File

@@ -286,10 +286,6 @@ int CpuFp16SubGraph::Float16TensorToFloat32Tensor(lite::Tensor *tensor) {

int CpuFp16SubGraph::PreProcess() {
#ifdef ENABLE_FP16
if (!mindspore::lite::IsSupportFloat16()) {
MS_LOG(ERROR) << "Unsupported fp16 in this devices";
return RET_ERROR;
}
int ret;
for (auto tensor : this->in_tensors_) {
MS_ASSERT(tensor != nullptr);
@@ -350,10 +346,6 @@ int CpuFp16SubGraph::PreProcess() {

int CpuFp16SubGraph::PostProcess() {
#ifdef ENABLE_FP16
if (!mindspore::lite::IsSupportFloat16()) {
MS_LOG(ERROR) << "Unsupported fp16 in this devices";
return RET_ERROR;
}
int ret;
for (auto tensor : this->out_tensors_) {
MS_ASSERT(tensor != nullptr);


Loading…
Cancel
Save