|
|
|
@@ -23,44 +23,75 @@ using mindspore::lite::KernelRegistrar; |
|
|
|
using mindspore::schema::PrimitiveType_BatchNorm; |
|
|
|
|
|
|
|
namespace mindspore::kernel { |
|
|
|
int BatchnormFp16CPUKernel::DoExecute(int task_id) { |
|
|
|
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); |
|
|
|
|
|
|
|
if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32) { |
|
|
|
auto input = in_tensors_.at(0); |
|
|
|
auto mean = in_tensors_.at(1); |
|
|
|
auto variance = in_tensors_.at(2); |
|
|
|
auto output = out_tensors_.at(0); |
|
|
|
int BatchnormFp16CPUKernel::InitConstTensor() { |
|
|
|
isFloat32Tensor_ = in_tensors_.at(0)->data_type() == kNumberTypeFloat32; |
|
|
|
if (isFloat32Tensor_) { |
|
|
|
auto mean_fp32 = in_tensors_.at(1); |
|
|
|
auto variance_fp32 = in_tensors_.at(2); |
|
|
|
mean_ = malloc(mean_fp32->ElementsNum() * sizeof(float16_t)); |
|
|
|
variance_ = malloc(variance_fp32->ElementsNum() * sizeof(float16_t)); |
|
|
|
if (mean_ == nullptr || variance_ == nullptr) { |
|
|
|
FreeMeanAndVariance(); |
|
|
|
return RET_ERROR; |
|
|
|
} |
|
|
|
Float32ToFloat16(reinterpret_cast<float *>(mean_fp32->Data()), |
|
|
|
reinterpret_cast<float16_t *>(mean_), mean_fp32->ElementsNum()); |
|
|
|
Float32ToFloat16(reinterpret_cast<float *>(variance_fp32->Data()), |
|
|
|
reinterpret_cast<float16_t *>(variance_), variance_fp32->ElementsNum()); |
|
|
|
} else { |
|
|
|
BatchnormCPUKernel::InitConstTensor(); |
|
|
|
} |
|
|
|
return RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
auto input_fp16 = context_->allocator->Malloc(input->ElementsNum() * sizeof(float16_t)); |
|
|
|
auto mean_fp16 = context_->allocator->Malloc(mean->ElementsNum() * sizeof(float16_t)); |
|
|
|
auto variance_fp16 = context_->allocator->Malloc(variance->ElementsNum() * sizeof(float16_t)); |
|
|
|
auto output_fp16 = context_->allocator->Malloc(output->ElementsNum() * sizeof(float16_t)); |
|
|
|
if (input_fp16 == nullptr || mean_fp16 == nullptr || variance_fp16 == nullptr || output_fp16 == nullptr) { |
|
|
|
context_->allocator->Free(input_fp16); |
|
|
|
context_->allocator->Free(mean_fp16); |
|
|
|
context_->allocator->Free(variance_fp16); |
|
|
|
context_->allocator->Free(output_fp16); |
|
|
|
int BatchnormFp16CPUKernel::Run() { |
|
|
|
auto ret = Prepare(); |
|
|
|
if (ret != RET_OK) { |
|
|
|
MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret; |
|
|
|
return ret; |
|
|
|
} |
|
|
|
auto input_fp32 = in_tensors_.at(0); |
|
|
|
auto output_fp32 = out_tensors_.at(0); |
|
|
|
if (isFloat32Tensor_) { |
|
|
|
input_ = context_->allocator->Malloc(input_fp32->ElementsNum() * sizeof(float16_t)); |
|
|
|
output_ = context_->allocator->Malloc(output_fp32->ElementsNum() * sizeof(float16_t)); |
|
|
|
if (input_ == nullptr || output_ == nullptr) { |
|
|
|
FreeInputAndOutput(); |
|
|
|
return RET_ERROR; |
|
|
|
} |
|
|
|
Float32ToFloat16(reinterpret_cast<float *>(input->Data()), |
|
|
|
reinterpret_cast<float16_t *>(input_fp16), input->ElementsNum()); |
|
|
|
Float32ToFloat16(reinterpret_cast<float *>(mean->Data()), |
|
|
|
reinterpret_cast<float16_t *>(mean_fp16), mean->ElementsNum()); |
|
|
|
Float32ToFloat16(reinterpret_cast<float *>(variance->Data()), |
|
|
|
reinterpret_cast<float16_t *>(variance_fp16), variance->ElementsNum()); |
|
|
|
Float32ToFloat16(reinterpret_cast<float *>(input_fp32->Data()), |
|
|
|
reinterpret_cast<float16_t *>(input_), input_fp32->ElementsNum()); |
|
|
|
} else { |
|
|
|
input_ = in_tensors_.at(0)->Data(); |
|
|
|
output_ = out_tensors_.at(0)->Data(); |
|
|
|
} |
|
|
|
ret = LiteBackendParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_); |
|
|
|
if (ret != RET_OK) { |
|
|
|
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; |
|
|
|
} |
|
|
|
if (isFloat32Tensor_) { |
|
|
|
Float16ToFloat32(reinterpret_cast<float16_t *>(output_), reinterpret_cast<float *>(output_fp32->Data()), |
|
|
|
output_fp32->ElementsNum()); |
|
|
|
FreeInputAndOutput(); |
|
|
|
} |
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
|
BatchNormFp16(input_fp16, mean_fp16, variance_fp16, param, task_id, output_fp16); |
|
|
|
int BatchnormFp16CPUKernel::DoExecute(int task_id) { |
|
|
|
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); |
|
|
|
BatchNormFp16(input_, mean_, variance_, param, task_id, output_); |
|
|
|
return mindspore::lite::RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
Float16ToFloat32(reinterpret_cast<float16_t *>(output_fp16), reinterpret_cast<float *>(output), |
|
|
|
output->ElementsNum()); |
|
|
|
context_->allocator->Free(input_fp16); |
|
|
|
context_->allocator->Free(mean_fp16); |
|
|
|
context_->allocator->Free(variance_fp16); |
|
|
|
context_->allocator->Free(output_fp16); |
|
|
|
return mindspore::lite::RET_OK; |
|
|
|
void BatchnormFp16CPUKernel::FreeInputAndOutput() { |
|
|
|
if (input_ != nullptr) { |
|
|
|
context_->allocator->Free(input_); |
|
|
|
input_ = nullptr; |
|
|
|
} |
|
|
|
if (output_ != nullptr) { |
|
|
|
context_->allocator->Free(output_); |
|
|
|
output_ = nullptr; |
|
|
|
} |
|
|
|
BatchNormFp16(in_tensors_.at(0)->Data(), mean_, variance_, param, task_id, out_tensors_.at(0)->Data()); |
|
|
|
return mindspore::lite::RET_OK; |
|
|
|
} |
|
|
|
|
|
|
|
kernel::LiteKernel *CpuBatchnormFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, |
|
|
|
@@ -83,5 +114,5 @@ kernel::LiteKernel *CpuBatchnormFp16KernelCreator(const std::vector<lite::tensor |
|
|
|
return kernel; |
|
|
|
} |
|
|
|
|
|
|
|
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_BatchNorm, CpuBatchnormFp16KernelCreator) |
|
|
|
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_BatchNorm, CpuBatchnormFp16KernelCreator) |
|
|
|
} // namespace mindspore::kernel |