Browse Source

benchmark add fp16 flag

comment failed fp16 ops
tags/v0.7.0-beta
zhaozhenlong 5 years ago
parent
commit
0e2dc892f2
5 changed files with 23 additions and 19 deletions
  1. +1
    -1
      mindspore/lite/include/context.h
  2. +17
    -17
      mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
  3. +1
    -1
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
  4. +2
    -0
      mindspore/lite/tools/benchmark/benchmark.cc
  5. +2
    -0
      mindspore/lite/tools/benchmark/benchmark.h

+ 1
- 1
mindspore/lite/include/context.h View File

@@ -65,7 +65,7 @@ class MS_API Context {
virtual ~Context();

public:
bool float16_priority = true; /**< allow priority select float16 kernel */
bool float16_priority = false; /**< allow priority select float16 kernel */
DeviceContext device_ctx_{DT_CPU};
int thread_num_ = 2; /**< thread number config for thread pool */
std::shared_ptr<Allocator> allocator = nullptr;


+ 17
- 17
mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc View File

@@ -446,21 +446,21 @@ kernel::LiteKernel *CpuArithmeticFp16KernelCreator(const std::vector<lite::tenso
return kernel;
}

REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Mul, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Add, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Sub, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Div, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_FloorMod, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_FloorDiv, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LogicalAnd, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LogicalOr, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Maximum, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Minimum, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_NotEqual, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Equal, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Less, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LessEqual, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Greater, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_GreaterEqual, CpuArithmeticFp16KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Eltwise, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Mul, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Add, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Sub, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Div, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_FloorMod, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_FloorDiv, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LogicalAnd, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LogicalOr, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Maximum, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Minimum, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_NotEqual, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Equal, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Less, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_LessEqual, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Greater, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_GreaterEqual, CpuArithmeticFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Eltwise, CpuArithmeticFp16KernelCreator)
} // namespace mindspore::kernel

+ 1
- 1
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc View File

@@ -267,5 +267,5 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten
}
return kernel;
}
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Conv2D, CpuConvFp16KernelCreator)
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Conv2D, CpuConvFp16KernelCreator)
} // namespace mindspore::kernel

+ 2
- 0
mindspore/lite/tools/benchmark/benchmark.cc View File

@@ -396,6 +396,7 @@ int Benchmark::RunBenchmark(const std::string &deviceType) {
context->cpu_bind_mode_ = NO_BIND;
}
context->thread_num_ = _flags->numThreads;
context->float16_priority = _flags->fp16Priority;
session = session::LiteSession::CreateSession(context);
delete (context);
if (session == nullptr) {
@@ -503,6 +504,7 @@ int Benchmark::Init() {
MS_LOG(INFO) << "AccuracyThreshold = " << this->_flags->accuracyThreshold;
MS_LOG(INFO) << "WarmUpLoopCount = " << this->_flags->warmUpLoopCount;
MS_LOG(INFO) << "NumThreads = " << this->_flags->numThreads;
MS_LOG(INFO) << "Fp16Priority = " << this->_flags->fp16Priority;
MS_LOG(INFO) << "calibDataPath = " << this->_flags->calibDataPath;

if (this->_flags->loopCount < 1) {


+ 2
- 0
mindspore/lite/tools/benchmark/benchmark.h View File

@@ -63,6 +63,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
// MarkPerformance
AddFlag(&BenchmarkFlags::loopCount, "loopCount", "Run loop count", 10);
AddFlag(&BenchmarkFlags::numThreads, "numThreads", "Run threads number", 2);
AddFlag(&BenchmarkFlags::fp16Priority, "fp16Priority", "Priority float16", false);
AddFlag(&BenchmarkFlags::warmUpLoopCount, "warmUpLoopCount", "Run warm up loop", 3);
// MarkAccuracy
AddFlag(&BenchmarkFlags::calibDataPath, "calibDataPath", "Calibration data file path", "");
@@ -88,6 +89,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
// MarkPerformance
int loopCount;
int numThreads;
bool fp16Priority;
int warmUpLoopCount;
// MarkAccuracy
std::string calibDataPath;


Loading…
Cancel
Save