diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc
index 07d51f1450..4687ec549e 100644
--- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc
+++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_fusion_pass.cc
@@ -226,7 +226,16 @@ int NPUFusionPass::FormatFusion(kernel::LiteKernel *kernel) {
     }
     RemoveAndFreeKernel(trans_kernel);
   }
-  pre_kernel->set_out_kernels(pre_insert_kernels);
+  auto pre_out_kernels = pre_kernel->out_kernels();
+  size_t index = 0;
+  for (; index < pre_out_kernels.size(); index++) {
+    if (pre_out_kernels[index] == kernel) {
+      pre_out_kernels.erase(pre_out_kernels.begin() + index);
+      break;
+    }
+  }
+  pre_out_kernels.insert(pre_out_kernels.begin() + index, pre_insert_kernels.begin(), pre_insert_kernels.end());
+  pre_kernel->set_out_kernels(pre_out_kernels);
   RemoveAndFreeKernel(kernel);
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
index 997b0213da..4d6f005b60 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
@@ -62,10 +62,10 @@ int InstanceNormRun(void *cdata, int task_id) {
 }
 
 int InstanceNormCPUKernel::Run() {
-  src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
-  gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
-  beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
-  dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
+  src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
+  gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->data_c());
+  beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c());
+  dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
   auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
index 45e68cf1cd..03bca32835 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
@@ -83,10 +83,11 @@ int LayerNormRun(void *cdata, int task_id) {
 }
 
 int LayerNormCPUKernel::Run() {
-  src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
-  gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
-  beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
-  dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
+  src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
+  gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->data_c());
+  beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c());
+  dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
+
   auto ret = ParallelLaunch(this->context_->thread_pool_, LayerNormRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "LayerNormRun error error_code[" << ret << "]";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.cc
index d4922bee2b..4b986a0932 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/layer_norm_int8.cc
@@ -123,8 +123,8 @@ int LayerNormInt8Run(void *cdata, int task_id) {
 }
 
 int LayerNormInt8CPUKernel::Run() {
-  src_ptr_ = reinterpret_cast<int8_t *>(in_tensors_.at(0)->MutableData());
-  dst_ptr_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
+  src_ptr_ = reinterpret_cast<int8_t *>(in_tensors_.at(0)->data_c());
+  dst_ptr_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->data_c());
 
   auto ret = ParallelLaunch(this->context_->thread_pool_, LayerNormInt8Run, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.cc b/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.cc
new file mode 100644
index 0000000000..e3897a05d8
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.cc
@@ -0,0 +1,89 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/npu/instance_norm_npu.h"
+#include <memory>
+#include "src/kernel_registry.h"
+#include "src/runtime/agent/npu/npu_converter_utils.h"
+
+using mindspore::kernel::KERNEL_ARCH::kNPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::schema::PrimitiveType_InstanceNorm;
+
+namespace mindspore::kernel {
+int LayerNormNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                                  OpParameter *opParameter) {
+  return RET_OK;
+}
+
+int LayerNormNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
+                                     const std::vector<lite::Tensor *> &outputs,
+                                     const std::vector<ge::Operator *> &npu_inputs) {
+  op_ = new (std::nothrow) hiai::op::InstanceNorm(name_);
+  if (op_ == nullptr) {
+    MS_LOG(ERROR) << "New layer norm npu operator for op " << name_ << " failed.";
+    return RET_ERROR;
+  }
+  op_->set_input_x(*npu_inputs[0]);
+
+  auto gamma = new (std::nothrow) hiai::op::Const(name_ + "_gamma");
+  if (gamma == nullptr) {
+    MS_LOG(ERROR) << "New gamma const failed.";
+    return RET_ERROR;
+  }
+  auto gamma_shape = inputs[1]->shape();
+  std::shared_ptr<ge::Tensor> gamma_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
+  if (gamma_tensor == nullptr) {
+    MS_LOG(ERROR) << "new gamma_tensor failed.";
+    return RET_ERROR;
+  }
+  ge::TensorDesc gamma_tensor_desc(lite::ConverterToNPUShape({1, gamma_shape[0], 1, 1}), ge::FORMAT_NCHW,
+                                   lite::ConverterToNPUDataType(inputs[1]->data_type()));
+  gamma_tensor->SetTensorDesc(gamma_tensor_desc);
+  gamma_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs.data()), inputs[1]->Size());
+  op_->set_input_gamma(*gamma);
+
+  auto beta = new (std::nothrow) hiai::op::Const(name_ + "_beta");
+  if (beta == nullptr) {
+    MS_LOG(ERROR) << "New beta const failed.";
+    return RET_ERROR;
+  }
+  auto beta_shape = inputs[1]->shape();
+  std::shared_ptr<ge::Tensor> beta_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
+  if (beta_tensor == nullptr) {
+    MS_LOG(ERROR) << "new beta_tensor failed.";
+    return RET_ERROR;
+  }
+  ge::TensorDesc beta_tensor_desc(lite::ConverterToNPUShape({1, beta_shape[0], 1, 1}), ge::FORMAT_NCHW,
+                                  lite::ConverterToNPUDataType(inputs[1]->data_type()));
+  beta_tensor->SetTensorDesc(beta_tensor_desc);
+  beta_tensor->SetData(reinterpret_cast<const uint8_t *>(inputs.data()), inputs[1]->Size());
+  op_->set_input_beta(*beta);
+  op_->set_attr_epsilon(layer_norm_param_->epsilon_);
+  return RET_OK;
+}
+
+ge::Operator *mindspore::kernel::LayerNormNPUKernel::GetNPUOp() { return this->op_; }
+
+LayerNormNPUKernel::~LayerNormNPUKernel() {
+  if (op_ != nullptr) {
+    delete op_;
+    op_ = nullptr;
+  }
+}
+
+REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, NPUKernelCreator<LayerNormNPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.h b/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.h
new file mode 100644
index 0000000000..dde66c21ca
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/npu/instance_norm_npu.h
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_LAYER_NORM_NPU_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_LAYER_NORM_NPU_H_
+#include <vector>
+#include "nnacl/layer_norm_parameter.h"
+#include "src/runtime/kernel/npu/npu_kernel.h"
+#include "include/graph/op/all_ops.h"
+namespace mindspore::kernel {
+class LayerNormNPUKernel : public NPUKernel {
+ public:
+  LayerNormNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+                     const mindspore::lite::PrimitiveC *primitive)
+      : NPUKernel(parameter, inputs, outputs, ctx, primitive) {
+    layer_norm_param_ = reinterpret_cast<LayerNormParameter *>(parameter);
+  }
+  ~LayerNormNPUKernel() override;
+
+  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                OpParameter *opParameter) override;
+  int SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                   const std::vector<ge::Operator *> &npu_inputs) override;
+  ge::Operator *GetNPUOp() override;
+
+ private:
+  hiai::op::InstanceNorm *op_ = nullptr;
+  LayerNormParameter *layer_norm_param_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_LAYER_NORM_NPU_H_
diff --git a/mindspore/lite/src/runtime/kernel/npu/scale_npu.h b/mindspore/lite/src/runtime/kernel/npu/scale_npu.h
index a09e10651c..f230d00e3e 100644
--- a/mindspore/lite/src/runtime/kernel/npu/scale_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/scale_npu.h
@@ -42,4 +42,4 @@ class ScaleNPUKernel : public NPUKernel {
   ScaleParameter *scale_parameter_;
 };
 }  // namespace mindspore::kernel
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_Scale_NPU_H_
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_NPU_SCALE_NPU_H_
diff --git a/mindspore/lite/src/runtime/kernel/npu/squeeze_npu.cc b/mindspore/lite/src/runtime/kernel/npu/squeeze_npu.cc
index f12d35c4f9..27fe65c8a4 100644
--- a/mindspore/lite/src/runtime/kernel/npu/squeeze_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/squeeze_npu.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/mindspore/lite/src/runtime/kernel/npu/squeeze_npu.h b/mindspore/lite/src/runtime/kernel/npu/squeeze_npu.h
index 770dee648e..8b433a045c 100644
--- a/mindspore/lite/src/runtime/kernel/npu/squeeze_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/squeeze_npu.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.