| @@ -19,11 +19,12 @@ | |||
| #include "nnacl/op_base.h" | |||
| int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const float *gamma_data, | |||
| const float *beta_data, bool affine, float epsilon, float *dst_data, size_t task_id, size_t thread_num) { | |||
| const float *beta_data, enum ElementwiseMode elementwise_mode, float epsilon, float *dst_data, | |||
| size_t task_id, size_t thread_num) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| if (affine && (gamma_data == NULL || beta_data == NULL)) { | |||
| if (elementwise_mode != 0 && (gamma_data == NULL || beta_data == NULL)) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -63,7 +64,7 @@ int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const | |||
| #ifdef ENABLE_NEON | |||
| float32x4_t meanv = vdupq_n_f32(mean); | |||
| float32x4_t denov = vdupq_n_f32(deno); | |||
| if (affine) { | |||
| if (elementwise_mode != 0) { | |||
| for (; index < inner_size - C8NUM; index += C8NUM) { | |||
| float32x4_t srcv1 = vld1q_f32(src + index); | |||
| float32x4_t srcv2 = vld1q_f32(src + index + 4); | |||
| @@ -71,14 +72,23 @@ int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const | |||
| float32x4_t outv2 = vsubq_f32(srcv2, meanv); | |||
| outv1 = vmulq_f32(outv1, denov); | |||
| outv2 = vmulq_f32(outv2, denov); | |||
| float32x4_t gammav1 = vld1q_f32(gamma_data + index); | |||
| float32x4_t gammav2 = vld1q_f32(gamma_data + index + 4); | |||
| float32x4_t betav1 = vld1q_f32(beta_data + index); | |||
| float32x4_t betav2 = vld1q_f32(beta_data + index + 4); | |||
| outv1 = vmulq_f32(outv1, gammav1); | |||
| outv2 = vmulq_f32(outv2, gammav2); | |||
| outv1 = vaddq_f32(outv1, betav1); | |||
| outv2 = vaddq_f32(outv2, betav2); | |||
| if (elementwise_mode == 1) { | |||
| float32x4_t gammav1 = vdupq_n_f32(gamma_data[j]); | |||
| float32x4_t betav1 = vdupq_n_f32(beta_data[j]); | |||
| outv1 = vmulq_f32(outv1, gammav1); | |||
| outv2 = vmulq_f32(outv2, gammav1); | |||
| outv1 = vaddq_f32(outv1, betav1); | |||
| outv2 = vaddq_f32(outv2, betav1); | |||
| } else { | |||
| float32x4_t gammav1 = vld1q_f32(gamma_data + index); | |||
| float32x4_t gammav2 = vld1q_f32(gamma_data + index + 4); | |||
| float32x4_t betav1 = vld1q_f32(beta_data + index); | |||
| float32x4_t betav2 = vld1q_f32(beta_data + index + 4); | |||
| outv1 = vmulq_f32(outv1, gammav1); | |||
| outv2 = vmulq_f32(outv2, gammav2); | |||
| outv1 = vaddq_f32(outv1, betav1); | |||
| outv2 = vaddq_f32(outv2, betav2); | |||
| } | |||
| vst1q_f32(dst + index, outv1); | |||
| vst1q_f32(dst + index + 4, outv2); | |||
| } | |||
| @@ -97,7 +107,9 @@ int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const | |||
| #endif | |||
| for (; index < inner_size; index++) { | |||
| dst[index] = (src[index] - mean) * deno; | |||
| if (affine) { | |||
| if (elementwise_mode == 1) { | |||
| dst[index] = dst[index] * gamma_data[j] + beta_data[j]; | |||
| } else if (elementwise_mode == 2) { | |||
| dst[index] = dst[index] * gamma_data[index] + beta_data[index]; | |||
| } | |||
| } | |||
| @@ -24,7 +24,8 @@ extern "C" { | |||
| #endif | |||
| int LayerNorm(size_t outer_size, size_t inner_size, const float *src_data, const float *gamma_data, | |||
| const float *beta_data, bool affine, float epsilon, float *dst_data, size_t task_id, size_t thread_num); | |||
| const float *beta_data, enum ElementwiseMode elementwise_mode, float epsilon, float *dst_data, | |||
| size_t task_id, size_t thread_num); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -22,12 +22,13 @@ | |||
| * | |||
| * */ | |||
| int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float *beta_data, int8_t *dst_data, | |||
| bool affine, int outer_size, int inner_size, LayerNormQuantArg *quant, float epsilon) { | |||
| enum ElementwiseMode elementwise_mode, int outer_size, int inner_size, LayerNormQuantArg *quant, | |||
| float epsilon) { | |||
| if (src_data == NULL || dst_data == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| if (affine && (gamma_data == NULL || beta_data == NULL)) { | |||
| if (elementwise_mode != 0 && (gamma_data == NULL || beta_data == NULL)) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| @@ -47,7 +48,9 @@ int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float * | |||
| for (int i = 0; i < inner_size; i++) { | |||
| float fp32_src = (src[i] - quant->in_zp_) * quant->in_scale_; | |||
| float fp32_dst = (fp32_src - mean) * deno; | |||
| if (affine) { | |||
| if (elementwise_mode == 1) { | |||
| fp32_dst = fp32_dst * gamma_data[out_index] + beta_data[out_index]; | |||
| } else if (elementwise_mode == 2) { | |||
| fp32_dst = fp32_dst * gamma_data[i] + beta_data[i]; | |||
| } | |||
| int32_t int32_dst = (int32_t)round(fp32_dst * 1.0 / quant->out_scale_ + quant->out_zp_); | |||
| @@ -25,7 +25,8 @@ extern "C" { | |||
| #endif | |||
| int LayerNormInt8(const int8_t *src_data, const float *gamma_data, const float *beta_data, int8_t *dst_data, | |||
| bool affine, int outer_size, int inner_size, LayerNormQuantArg *quant, float epsilon); | |||
| enum ElementwiseMode elementwise_mode, int outer_size, int inner_size, LayerNormQuantArg *quant, | |||
| float epsilon); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -19,11 +19,12 @@ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/quantization/quantize.h" | |||
| enum ElementwiseMode { ELEMENTWISE_NOT = 0, ELEMENTWISE_PER_CHANNEL = 1, ELEMENTWISE_PER_NUM = 2 }; | |||
| typedef struct LayerNormParameter { | |||
| // Primitive parameter | |||
| OpParameter op_parameter_; | |||
| float epsilon_; | |||
| bool elementwise_affine_; | |||
| enum ElementwiseMode elementwise_mode_; | |||
| // shape correlative | |||
| int *normalized_shape_; | |||
| int normalized_dims_; | |||
| @@ -89,21 +89,32 @@ int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite: | |||
| MS_LOG(INFO) << "input tensor amount error"; | |||
| return RET_INPUT_TENSOR_ERROR; | |||
| } | |||
| if (!infer_flag()) { | |||
| return RET_INFER_INVALID; | |||
| } | |||
| auto input_shape = input->shape(); | |||
| auto normalized_shape = GetNormalizedShape(); | |||
| if (normalized_shape.size() > input_shape.size() || normalized_shape.size() == 0) { | |||
| normlized_shape_ = GetNormalizedShape(); | |||
| elementwise_mode_ = GetElementwiseAffine() ? 2 : 0; | |||
| if (normlized_shape_.size() > input_shape.size()) { | |||
| MS_LOG(INFO) << "normalized_shape attr invalid"; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| size_t first_index = input_shape.size() - normalized_shape.size(); | |||
| for (size_t i = first_index; i < input_shape.size(); ++i) { | |||
| if (input_shape.at(i) != normalized_shape.at(i - first_index)) { | |||
| if (normlized_shape_.empty()) { | |||
| // instance norm -> layernorm | |||
| if (input->format() == schema::Format_NCHW) { | |||
| normlized_shape_.insert(normlized_shape_.begin(), input_shape.begin() + 2, input_shape.end()); | |||
| elementwise_mode_ = 1; | |||
| } else { | |||
| MS_LOG(INFO) << "normalized_shape attr invalid"; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| } | |||
| if (!infer_flag()) { | |||
| return RET_INFER_INVALID; | |||
| size_t first_index = input_shape.size() - normlized_shape_.size(); | |||
| for (size_t i = first_index; i < input_shape.size(); ++i) { | |||
| if (input_shape.at(i) != normlized_shape_.at(i - first_index)) { | |||
| MS_LOG(INFO) << "normalized_shape attr invalid"; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| } | |||
| output->set_shape(input_shape); | |||
| @@ -42,6 +42,12 @@ class LayerNorm : public PrimitiveC { | |||
| std::vector<int> GetNormalizedShape() const; | |||
| float GetEpsilon() const; | |||
| bool GetElementwiseAffine() const; | |||
| std::vector<int> normlized_shape() const { return normlized_shape_; } | |||
| int elementwise_mode() const { return elementwise_mode_; } | |||
| protected: | |||
| std::vector<int> normlized_shape_; | |||
| int elementwise_mode_ = 0; | |||
| }; | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -14,6 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/ops/populate/layer_norm_populate.h" | |||
| #include "nnacl/layer_norm_parameter.h" | |||
| #include <cstdint> | |||
| #include "src/ops/layer_norm.h" | |||
| @@ -31,7 +32,7 @@ OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primi | |||
| memset(layer_norm_parameter, 0, sizeof(LayerNormParameter)); | |||
| layer_norm_parameter->op_parameter_.type_ = primitive->Type(); | |||
| auto param = reinterpret_cast<mindspore::lite::LayerNorm *>(const_cast<mindspore::lite::PrimitiveC *>(primitive)); | |||
| auto normalized_shape = param->GetNormalizedShape(); | |||
| auto normalized_shape = param->normlized_shape(); | |||
| layer_norm_parameter->normalized_dims_ = normalized_shape.size(); | |||
| if (normalized_shape.size() > SIZE_MAX / sizeof(int)) { | |||
| MS_LOG(ERROR) << "normalized_shape size too big"; | |||
| @@ -48,7 +49,7 @@ OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primi | |||
| layer_norm_parameter->normalized_shape_[i] = normalized_shape[i]; | |||
| } | |||
| layer_norm_parameter->epsilon_ = param->GetEpsilon(); | |||
| layer_norm_parameter->elementwise_affine_ = param->GetElementwiseAffine(); | |||
| layer_norm_parameter->elementwise_mode_ = static_cast<ElementwiseMode>(param->elementwise_mode()); | |||
| return reinterpret_cast<OpParameter *>(layer_norm_parameter); | |||
| } | |||
| @@ -0,0 +1,28 @@ | |||
| /** | |||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_ | |||
| #define MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_ | |||
| #include "src/ops/arithmetic.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| OpParameter *PopulateLayerNormParameter(const mindspore::lite::PrimitiveC *primitive); | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_OPS_POPULATE_STRIDED_LAYER_NORM_POPULATE_H_ | |||
| @@ -18,6 +18,7 @@ | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/ops/populate/layer_norm_populate.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| @@ -34,6 +35,13 @@ int LayerNormCPUKernel::Init() { | |||
| } | |||
| int LayerNormCPUKernel::ReSize() { | |||
| if (op_parameter_ != nullptr) { | |||
| free(op_parameter_); | |||
| op_parameter_ = nullptr; | |||
| } | |||
| op_parameter_ = PopulateLayerNormParameter(primitive_); | |||
| op_parameter_->thread_num_ = context_->thread_num_; | |||
| param_ = reinterpret_cast<LayerNormParameter *>(op_parameter_); | |||
| auto shape = in_tensors_.front()->shape(); | |||
| outer_size_ = 1; | |||
| inner_size_ = 1; | |||
| @@ -48,7 +56,7 @@ int LayerNormCPUKernel::ReSize() { | |||
| } | |||
| int LayerNormCPUKernel::DoLayerNorm(int thread_id) { | |||
| int ret = LayerNorm(outer_size_, inner_size_, src_data_, gamma_data_, beta_data_, param_->elementwise_affine_, | |||
| int ret = LayerNorm(outer_size_, inner_size_, src_data_, gamma_data_, beta_data_, param_->elementwise_mode_, | |||
| param_->epsilon_, dst_data_, thread_id, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "DoLayerNorm error error_code[" << ret << "]"; | |||
| @@ -69,7 +77,7 @@ int LayerNormRun(void *cdata, int task_id) { | |||
| int LayerNormCPUKernel::Run() { | |||
| src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData()); | |||
| if (param_->elementwise_affine_) { | |||
| if (param_->elementwise_mode_ != 0) { | |||
| gamma_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData()); | |||
| beta_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData()); | |||
| } | |||
| @@ -15,6 +15,7 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/int8/layer_norm_int8.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "src/ops/populate/layer_norm_populate.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| @@ -23,11 +24,11 @@ using mindspore::schema::PrimitiveType_LayerNorm; | |||
| namespace mindspore::kernel { | |||
| LayerNormInt8CPUKernel::~LayerNormInt8CPUKernel() { | |||
| if (param_->elementwise_affine_ && gamma_ptr_ != nullptr) { | |||
| if (param_->elementwise_mode_ != 0 && gamma_ptr_ != nullptr) { | |||
| free(gamma_ptr_); | |||
| gamma_ptr_ = nullptr; | |||
| } | |||
| if (param_->elementwise_affine_ && beta_ptr_ != nullptr) { | |||
| if (param_->elementwise_mode_ != 0 && beta_ptr_ != nullptr) { | |||
| free(beta_ptr_); | |||
| beta_ptr_ = nullptr; | |||
| } | |||
| @@ -43,7 +44,7 @@ int LayerNormInt8CPUKernel::SetQuantArgs() { | |||
| quant_param_.out_zp_ = output->quant_params().front().zeroPoint; | |||
| quant_param_.out_scale_ = output->quant_params().front().scale; | |||
| if (param_->elementwise_affine_) { | |||
| if (param_->elementwise_mode_ != 0) { | |||
| lite::Tensor *gamma_tensor = in_tensors_.at(1); | |||
| lite::Tensor *beta_tensor = in_tensors_.at(2); | |||
| @@ -84,6 +85,13 @@ int LayerNormInt8CPUKernel::Init() { | |||
| } | |||
| int LayerNormInt8CPUKernel::ReSize() { | |||
| if (op_parameter_ != nullptr) { | |||
| free(op_parameter_); | |||
| op_parameter_ = nullptr; | |||
| } | |||
| op_parameter_ = PopulateLayerNormParameter(primitive_); | |||
| op_parameter_->thread_num_ = context_->thread_num_; | |||
| param_ = reinterpret_cast<LayerNormParameter *>(op_parameter_); | |||
| auto shape = in_tensors_.front()->shape(); | |||
| outer_size_ = 1; | |||
| inner_size_ = 1; | |||
| @@ -116,8 +124,8 @@ int LayerNormInt8CPUKernel::DoExecute(int task_id) { | |||
| const int8_t *thread_src = src_ptr_ + task_id * param_->thread_outsize_ * inner_size_; | |||
| int8_t *thread_dst = dst_ptr_ + task_id * param_->thread_outsize_ * inner_size_; | |||
| LayerNormInt8(thread_src, gamma_ptr_, beta_ptr_, thread_dst, param_->elementwise_affine_, current_out_size, | |||
| inner_size_, &quant_param_, param_->epsilon_); | |||
| LayerNormInt8(thread_src, gamma_ptr_, beta_ptr_, thread_dst, param_->elementwise_mode_, current_out_size, inner_size_, | |||
| &quant_param_, param_->epsilon_); | |||
| return RET_OK; | |||
| } | |||
| @@ -22,7 +22,7 @@ namespace lite { | |||
| lite::PrimitiveC *OnnxInstanceNormParser::ParseLitePrimitive(const onnx::GraphProto &onnx_graph, | |||
| const onnx::NodeProto &onnx_node) { | |||
| MS_LOG(DEBUG) << "onnx InstanceNormParser"; | |||
| auto attr = std::make_unique<schema::InstanceNormT>(); | |||
| auto attr = std::make_unique<schema::LayerNormT>(); | |||
| if (attr == nullptr) { | |||
| MS_LOG(ERROR) << "new op failed"; | |||
| return nullptr; | |||
| @@ -39,7 +39,8 @@ lite::PrimitiveC *OnnxInstanceNormParser::ParseLitePrimitive(const onnx::GraphPr | |||
| MS_LOG(ERROR) << "new primitive failed"; | |||
| return nullptr; | |||
| } | |||
| primitive->value.type = schema::PrimitiveType_InstanceNorm; | |||
| attr->elementwiseAffine = true; | |||
| primitive->value.type = schema::PrimitiveType_LayerNorm; | |||
| primitive->value.value = attr.release(); | |||
| return PrimitiveC::Create(primitive.release()); | |||
| } | |||