diff --git a/mindspore/lite/nnacl/fp32/instance_norm.c b/mindspore/lite/nnacl/fp32/instance_norm.c index 4aa15a09a4..e90c49d395 100644 --- a/mindspore/lite/nnacl/fp32/instance_norm.c +++ b/mindspore/lite/nnacl/fp32/instance_norm.c @@ -13,30 +13,37 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "nnacl/fp32/instance_norm.h" #include -#include "nnacl/instance_norm_parameter.h" +#include "nnacl/errorcode.h" #include "nnacl/op_base.h" -void InstanceNormFp32(const void *input, const void *mean, const void *variance, InstanceNormParameter *param, - int task_id, void *output) { - int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); - int completed_units = task_id * units_per_thread; - if (completed_units >= param->unit_) { - return; +int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data, + const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id, + const int thread_num) { + if (src_data == NULL || dst_data == NULL || scale_data == NULL || bias_data == NULL) { + return NNACL_NULL_PTR; } - int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); - int cur_offset = completed_units * param->channel_; - for (int n = 0; n < param->batch_; n++) { - for (int hw = 0; hw < cur_unit; hw++) { - for (int c = 0; c < param->channel_; c++) { - float variance_sqrt = sqrt(((const float *)variance)[n * param->channel_ + c] + param->epsilon_); - ((float *)output)[cur_offset + c] = - (((const float *)input)[cur_offset + c] - ((const float *)mean)[n * param->channel_ + c]) / variance_sqrt; - } - cur_offset += param->channel_; + int i, j; + for (j = task_id; j < outer_size; j += thread_num) { + int offset = (j / param->channel_) * inner_size * param->channel_; + const float *src = src_data + offset; + float *dst = dst_data + offset; + float mean = 0.0f; + float square_mean = 0.0f; + for (i = 0; i < inner_size; i++) { + int idx = j % param->channel_ + i * param->channel_; + mean += src[idx]; + square_mean += src[idx] * src[idx]; + } + mean /= (float)inner_size; + square_mean /= (float)inner_size; + float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_); + for (i = 0; i < inner_size; ++i) { + int idx = j % param->channel_ + i * param->channel_; + int scale_idx = (j / param->channel_) * param->channel_ + j % param->channel_; + dst[idx] = ((src[idx] - mean) * deno) * scale_data[scale_idx] + bias_data[scale_idx]; } - cur_offset += (param->unit_ - cur_unit) * param->channel_; } + return NNACL_OK; } diff --git a/mindspore/lite/nnacl/fp32/instance_norm.h b/mindspore/lite/nnacl/fp32/instance_norm.h index 193525337a..a1aae2463f 100644 --- a/mindspore/lite/nnacl/fp32/instance_norm.h +++ b/mindspore/lite/nnacl/fp32/instance_norm.h @@ -13,20 +13,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #ifndef MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_ #define MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_ +#include "nnacl/op_base.h" #include "nnacl/instance_norm_parameter.h" #ifdef __cplusplus extern "C" { #endif -void InstanceNormFp32(const void *input, const void *mean, const void *variance, InstanceNormParameter *param, - int task_id, void *output); -void FusedInstanceNormFp32(const void *input, const void *scale, const void *offset, const void *mean, - const void *variance, InstanceNormParameter *param, int task_id, void *output); +int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data, + const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id, + const int thread_num); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/nnacl/instance_norm_parameter.h b/mindspore/lite/nnacl/instance_norm_parameter.h index f49a7f9fc8..8e188eb6d7 100644 --- a/mindspore/lite/nnacl/instance_norm_parameter.h +++ b/mindspore/lite/nnacl/instance_norm_parameter.h @@ -23,10 +23,7 @@ typedef struct InstanceNormParameter { OpParameter op_parameter_; float epsilon_; float momentum_; - int unit_; - int batch_; int channel_; - bool fused_; } InstanceNormParameter; #endif // MINDSPORE_LITE_NNACL_INSTANCE_NORM_PARAMETER_H_ diff --git a/mindspore/lite/src/ops/populate/instance_norm_populate.cc b/mindspore/lite/src/ops/populate/instance_norm_populate.cc index d333d75ee3..13d33fd8f8 100644 --- a/mindspore/lite/src/ops/populate/instance_norm_populate.cc +++ b/mindspore/lite/src/ops/populate/instance_norm_populate.cc @@ -33,7 +33,6 @@ OpParameter *PopulateInstanceNormParameter(const mindspore::lite::PrimitiveC *pr memset(instance_norm_param, 0, sizeof(InstanceNormParameter)); instance_norm_param->op_parameter_.type_ = primitive->Type(); instance_norm_param->epsilon_ = param->GetEpsilon(); - instance_norm_param->fused_ = false; return reinterpret_cast(instance_norm_param); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm.cc index 8c2fa2b384..bbf144787a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm.cc @@ -13,11 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "src/runtime/kernel/arm/fp32/instance_norm.h" -#include "nnacl/fp32/instance_norm.h" +#include +#include "schema/model_generated.h" #include "src/kernel_registry.h" +#include "include/errorcode.h" +using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; @@ -32,47 +34,60 @@ int InstanceNormCPUKernel::Init() { } int InstanceNormCPUKernel::ReSize() { - auto input_shapes = in_tensors_[0]->shape(); + auto input_shapes = in_tensors_.front()->shape(); auto n_dim = input_shapes.size(); - auto param = reinterpret_cast(op_parameter_); - param->batch_ = input_shapes[0]; - param->channel_ = input_shapes[n_dim - 1]; - param->unit_ = 1; - for (size_t i = 1; i < n_dim - 1; i++) { - param->unit_ *= input_shapes[i]; + outer_size_ = input_shapes[0] * input_shapes[n_dim - 1]; + inner_size_ = 1; + for (size_t i = 0; i < n_dim - 1; ++i) { + inner_size_ *= input_shapes[i]; } + param_->channel_ = input_shapes[n_dim - 1]; return RET_OK; } -int InstanceNormCPUKernel::Run() { - auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_); +int InstanceNormCPUKernel::DoInstanceNorm(int task_id) { + int ret = InstanceNorm(outer_size_, inner_size_, src_data_, scale_data_, bias_data_, param_, dst_data_, task_id, + op_parameter_->thread_num_); if (ret != RET_OK) { - MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]"; + MS_LOG(ERROR) << "DoInstanceNorm error error_code[" << ret << "]"; + return ret; } - return ret; -} - -int InstanceNormCPUKernel::DoExecute(int task_id) { - auto param = reinterpret_cast(op_parameter_); - InstanceNormFp32(in_tensors_.at(0)->MutableData(), in_tensors_.at(1)->MutableData(), in_tensors_.at(2)->MutableData(), - param, task_id, out_tensors_.at(0)->MutableData()); - return mindspore::lite::RET_OK; + return RET_OK; } int InstanceNormRun(void *cdata, int task_id) { - auto kernel = reinterpret_cast(cdata); - auto ret = kernel->DoExecute(task_id); + auto InstanceNormData = reinterpret_cast(cdata); + auto ret = InstanceNormData->DoInstanceNorm(task_id); if (ret != RET_OK) { MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int InstanceNormCPUKernel::Run() { + src_data_ = reinterpret_cast(in_tensors_.at(0)->MutableData()); + scale_data_ = reinterpret_cast(in_tensors_.at(1)->MutableData()); + bias_data_ = reinterpret_cast(in_tensors_.at(2)->MutableData()); + dst_data_ = reinterpret_cast(out_tensors_.at(0)->MutableData()); + auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]"; + return ret; } - return ret; + return RET_OK; } -kernel::LiteKernel *CpuInstanceNormKernelCreator(const std::vector &inputs, - const std::vector &outputs, OpParameter *opParameter, - const lite::InnerContext *ctx, const kernel::KernelKey &desc, - const mindspore::lite::PrimitiveC *primitive) { - MS_ASSERT(opParameter != nullptr); +kernel::LiteKernel *CpuInstanceNormFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::InnerContext *ctx, + const kernel::KernelKey &desc, + const mindspore::lite::PrimitiveC *primitive) { + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Create kernel failed, opParameter is nullptr, type: PrimitiveType_InstanceNorm. "; + return nullptr; + } + MS_ASSERT(desc.type == schema::PrimitiveType_InstanceNorm); auto *kernel = new (std::nothrow) InstanceNormCPUKernel(opParameter, inputs, outputs, ctx, primitive); if (kernel == nullptr) { MS_LOG(ERROR) << "new InstanceNormCPUKernel fail!"; @@ -89,5 +104,5 @@ kernel::LiteKernel *CpuInstanceNormKernelCreator(const std::vector #include "src/lite_kernel.h" #include "include/context.h" -#include "nnacl/instance_norm_parameter.h" -#include "src/runtime/runtime_api.h" +#include "nnacl/fp32/instance_norm.h" using mindspore::lite::InnerContext; @@ -29,18 +27,27 @@ namespace mindspore::kernel { class InstanceNormCPUKernel : public LiteKernel { public: InstanceNormCPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const InnerContext *ctx, + const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} - ~InstanceNormCPUKernel() override = default; + : LiteKernel(parameter, inputs, outputs, ctx, primitive) { + param_ = reinterpret_cast(parameter); + } + ~InstanceNormCPUKernel() override{}; int Init() override; int ReSize() override; int Run() override; - virtual int DoExecute(int task_id); -}; + int DoInstanceNorm(int thread_id); -int InstanceNormRun(void *cdata, int task_id); + private: + InstanceNormParameter *param_ = nullptr; + int outer_size_; + int inner_size_; + float *src_data_ = nullptr; + float *dst_data_ = nullptr; + float *scale_data_ = nullptr; + float *bias_data_ = nullptr; +}; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm.h index a142012272..e5534c8ede 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm.h @@ -39,7 +39,7 @@ class LayerNormCPUKernel : public LiteKernel { int DoLayerNorm(int thread_id); private: - LayerNormParameter *param_; + LayerNormParameter *param_ = nullptr; int outer_size_; int inner_size_; float *src_data_ = nullptr; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/instance_norm_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/instance_norm_fp32_tests.cc index 5b4c1a2084..ae6745bed5 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/instance_norm_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/instance_norm_fp32_tests.cc @@ -45,8 +45,8 @@ TEST_F(TestInstanceNormFp32, INTest1) { std::vector inputs_tensor = {&input0_tensor, &input1_tensor, &input2_tensor}; std::vector output(12); - std::vector corr_out = {-6.1533737, 7.4904885, -0.8563998, -0.289212, -9.356432, 0.13245535, - -3.5422924, -14.005781, -2.3525476, -6.7113695, -16.396551, -1.4275324}; + std::vector corr_out = {5.0145645, 9.248516, 15.439679, 33.51017, 0.0012711287, 31.0666883, + 17.70254, -2.5507483, -8.204435, 2.3031063, -3.8630369, 6.4138837}; lite::Tensor output0_tensor(kNumberTypeFloat32, {1, 2, 2, 3}); output0_tensor.set_data(output.data()); @@ -80,8 +80,8 @@ TEST_F(TestInstanceNormFp32, INTest1) { TEST_F(TestInstanceNormFp32, INTest2) { std::vector in_data = {-11.18675, 11.433986, 11.386012, 11.245945, -2.7614849, 14.692399, -1.1983503, -6.6790967, 6.383416, -13.3213005, -8.693595, 9.476344, - -11.18675, 11.433986, 11.386012, 11.245945, -2.7614849, 14.692399, - -1.1983503, -6.6790967, 6.383416, -13.3213005, -8.693595, 9.476344}; + -12.18675, 12.433986, 12.386012, 12.245945, -3.7614849, 15.692399, + -2.1983503, -7.6790967, 7.383416, -14.3213005, -9.693595, 10.476344}; std::vector in_data1 = {12.352293, 5.122387, 14.249514, 12.352293, 5.122387, 14.249514}; std::vector in_data2 = {14.632595, 0.70900035, 11.179003, 14.632595, 0.70900035, 11.179003}; @@ -90,18 +90,18 @@ TEST_F(TestInstanceNormFp32, INTest2) { op_param.epsilon_ = 0.001f; lite::Tensor input0_tensor(kNumberTypeFloat32, {2, 2, 2, 3}); - lite::Tensor input1_tensor(kNumberTypeFloat32, {6}); - lite::Tensor input2_tensor(kNumberTypeFloat32, {6}); + lite::Tensor input1_tensor(kNumberTypeFloat32, {2, 3}); + lite::Tensor input2_tensor(kNumberTypeFloat32, {2, 3}); input0_tensor.set_data(in_data.data()); input1_tensor.set_data(in_data1.data()); input2_tensor.set_data(in_data2.data()); std::vector inputs_tensor = {&input0_tensor, &input1_tensor, &input2_tensor}; std::vector output(24); - std::vector corr_out = {-6.1533737, 7.4904885, -0.8563998, -0.289212, -9.356432, 0.13245535, - -3.5422924, -14.005781, -2.3525476, -6.7113695, -16.396551, -1.4275324, - -6.1533737, 7.4904885, -0.8563998, -0.289212, -9.356432, 0.13245535, - -3.5422924, -14.005781, -2.3525476, -6.7113695, -16.396551, -1.4275324}; + std::vector corr_out = {5.0145645, 9.248516, 15.439679, 33.51017, 0.0012711287, 31.0666883, + 17.70254, -2.5507483, -8.204435, 2.3031063, -3.8630369, 6.4138837, + 5.133601, 9.310399, 15.439679, 33.886883, -0.22505027, 31.066883, + 16.888313, -2.5316327, -8.204435, 2.6215858, -3.717714, 6.4138837}; lite::Tensor output0_tensor(kNumberTypeFloat32, {2, 2, 2, 3}); output0_tensor.set_data(output.data()); diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_conv_parser.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_conv_parser.cc index 158c774640..746642b194 100644 --- a/mindspore/lite/tools/converter/parser/onnx/onnx_conv_parser.cc +++ b/mindspore/lite/tools/converter/parser/onnx/onnx_conv_parser.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { -constexpr int32_t kSingleGrounp = 1; +constexpr int32_t kSingleGroup = 1; bool OnnxConvParser::ParseGroupConvolution(const std::unique_ptr &attr, schema::CNodeT *op) { MS_LOG(DEBUG) << "onnx DepthwiseConvParser"; if (attr == nullptr || attr->group != attr->channelIn) { @@ -172,7 +172,7 @@ STATUS OnnxConvParser::Parse(const onnx::GraphProto &onnx_graph, const onnx::Nod attr->activationType = schema::ActivationType_NO_ACTIVATION; } - if (attr->group > kSingleGrounp && attr->group == attr->channelIn) { + if (attr->group > kSingleGroup && attr->group == attr->channelIn) { if (!ParseGroupConvolution(attr, op)) { MS_LOG(ERROR) << "Convert Convolution to Depthwise failed"; return RET_ERROR;