From 2717d0993b3eeab212bb9e4024145224e749ba85 Mon Sep 17 00:00:00 2001 From: zhengjun10 Date: Tue, 19 Jan 2021 14:52:15 +0800 Subject: [PATCH] support deconv bn activation fusion --- mindspore/lite/tools/benchmark/benchmark.cc | 31 +++++++- mindspore/lite/tools/benchmark/benchmark.h | 6 +- .../lite/tools/optimizer/common/gllo_utils.cc | 3 +- .../fusion/conv_activation_fusion.cc | 9 +++ .../optimizer/fusion/conv_transform_fusion.cc | 73 ++++++++++++++----- .../optimizer/fusion/conv_transform_fusion.h | 3 +- 6 files changed, 99 insertions(+), 26 deletions(-) diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc index 2ab2d50280..c0c445459e 100644 --- a/mindspore/lite/tools/benchmark/benchmark.cc +++ b/mindspore/lite/tools/benchmark/benchmark.cc @@ -212,7 +212,10 @@ int Benchmark::ReadTensorData(std::ifstream &in_file_stream, const std::string & std::string line; getline(in_file_stream, line); std::stringstream line_stream(line); - tensor::MSTensor *tensor = GetTensorByNodeOrTensorName(tensor_name); + if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) { + return RET_OK; + } + tensor::MSTensor *tensor = GetTensorByNameOrShape(tensor_name, dims); if (tensor == nullptr) { MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name; return RET_ERROR; @@ -248,7 +251,7 @@ int Benchmark::CompareOutput() { int total_size = 0; for (const auto &calib_tensor : benchmark_data_) { std::string node_or_tensor_name = calib_tensor.first; - tensor::MSTensor *tensor = GetTensorByNodeOrTensorName(node_or_tensor_name); + tensor::MSTensor *tensor = GetTensorByNameOrShape(node_or_tensor_name, calib_tensor.second->shape); if (tensor == nullptr) { MS_LOG(ERROR) << "Get tensor failed, tensor name: " << node_or_tensor_name; return RET_ERROR; @@ -284,13 +287,35 @@ int Benchmark::CompareOutput() { return RET_OK; } -tensor::MSTensor *Benchmark::GetTensorByNodeOrTensorName(const std::string &node_or_tensor_name) { +tensor::MSTensor *Benchmark::GetTensorByNodeShape(const std::vector &node_shape) { + std::vector match_tensors; + std::vector shape_vector; + (void)std::transform(node_shape.begin(), node_shape.end(), std::back_inserter(shape_vector), + [](const size_t &value) { return static_cast(value); }); + auto tensors = session_->GetOutputs(); + for (auto &out_tensor_pair : tensors) { + if (out_tensor_pair.second->shape() == shape_vector) { + match_tensors.emplace_back(out_tensor_pair.second); + } + } + if (match_tensors.empty() || match_tensors.size() != 1) { + MS_LOG(ERROR) << "get tensor by node shape failed"; + return nullptr; + } + return match_tensors.front(); +} + +tensor::MSTensor *Benchmark::GetTensorByNameOrShape(const std::string &node_or_tensor_name, + const std::vector &dims) { tensor::MSTensor *tensor = nullptr; auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name); if (tensors.empty() || tensors.size() != 1) { MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name << " or node has more than one output tensor, switch to GetOutputByTensorName"; tensor = session_->GetOutputByTensorName(node_or_tensor_name); + if (tensor == nullptr) { + return GetTensorByNodeShape(dims); + } } else { tensor = tensors.front(); } diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h index 891004a4e9..df298b1e2d 100644 --- a/mindspore/lite/tools/benchmark/benchmark.h +++ b/mindspore/lite/tools/benchmark/benchmark.h @@ -75,7 +75,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser { AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU"); AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode", - "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, defalut value: 1", 1); + "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, default value: 1", 1); // MarkPerformance AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10); AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2); @@ -153,7 +153,9 @@ class MS_API Benchmark { int CompareOutput(); - tensor::MSTensor *GetTensorByNodeOrTensorName(const std::string &node_or_tensor_name); + tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector &dims); + + tensor::MSTensor *GetTensorByNodeShape(const std::vector &node_shape); int CompareStringData(const std::string &name, tensor::MSTensor *tensor); diff --git a/mindspore/lite/tools/optimizer/common/gllo_utils.cc b/mindspore/lite/tools/optimizer/common/gllo_utils.cc index 09d26c48c2..31221f6068 100644 --- a/mindspore/lite/tools/optimizer/common/gllo_utils.cc +++ b/mindspore/lite/tools/optimizer/common/gllo_utils.cc @@ -480,7 +480,8 @@ bool IsParamNode(const BaseRef &n) { bool IsConvNode(const BaseRef &n) { if (utils::isa(n) || utils::isa(n)) { auto type = opt::GetCNodeType(n); - return type == schema::PrimitiveType_Conv2D || type == schema::PrimitiveType_DepthwiseConv2D; + return type == schema::PrimitiveType_Conv2D || type == schema::PrimitiveType_DepthwiseConv2D || + type == schema::PrimitiveType_DeConv2D; } return false; } diff --git a/mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc b/mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc index 47b8d172b0..41d5b41e56 100644 --- a/mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc +++ b/mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc @@ -18,6 +18,7 @@ #include #include "src/ops/primitive_c.h" #include "src/ops/conv2d.h" +#include "src/ops/deconv2d.h" #include "src/ops/depthwise_conv2d.h" #include "src/ops/activation.h" #include "schema/inner/model_generated.h" @@ -82,6 +83,14 @@ const AnfNodePtr ConvActivationFusion::Process(const FuncGraphPtr &func_graph, c primc->SetActivationType(act_primitivec->GetType()); return pre_node; } + } else if (node_type == schema::PrimitiveType_DeConv2D) { + MS_ASSERT(utils::isa>(primitive_c)); + auto primc = utils::cast>(primitive_c); + MS_ASSERT(primc != nullptr); + if (primc->GetActivationType() == schema::ActivationType_NO_ACTIVATION) { + primc->SetActivationType(act_primitivec->GetType()); + return pre_node; + } } else { MS_LOG(ERROR) << "conv activation pass match only conv2d or depthwise_conv2d "; } diff --git a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc index 1935d7fbe2..1b27adca31 100644 --- a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc +++ b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc @@ -18,6 +18,7 @@ #include #include "src/ops/primitive_c.h" #include "src/ops/conv2d.h" +#include "src/ops/deconv2d.h" #include "src/ops/depthwise_conv2d.h" #include "src/param_value_lite.h" #include "schema/inner/model_generated.h" @@ -30,8 +31,7 @@ constexpr size_t kConvWeightIndex = 2; constexpr size_t kConvBiasIndex = 3; constexpr size_t kConvNoBiasLen = 3; constexpr size_t kConvWithBiasLen = 4; - -int Get_Kenrnel_nums(const CNodePtr &conv_node) { +int GetOutChannels(const CNodePtr &conv_node) { MS_ASSERT(conv_node != nullptr); auto value_primitive = conv_node->input(0); auto value_node = value_primitive->cast(); @@ -47,6 +47,11 @@ int Get_Kenrnel_nums(const CNodePtr &conv_node) { auto primc = utils::cast>(primitive); MS_ASSERT(primc != nullptr); return primc->GetChannelOut(); + } else if (type == schema::PrimitiveType_DeConv2D) { + MS_ASSERT(utils::isa>(primitive)); + auto primc = utils::cast>(primitive); + MS_ASSERT(primc != nullptr); + return primc->GetChannelOut(); } else if (type == schema::PrimitiveType_DepthwiseConv2D) { MS_ASSERT(utils::isa>(primitive)); auto primc = utils::cast>(primitive); @@ -78,7 +83,7 @@ const AnfNodePtr ConvTransformFusion::Process(const FuncGraphPtr &func_graph, co } auto abstr = transform_node->abstract(); - int kernel_nums = Get_Kenrnel_nums(conv_node); + int kernel_nums = GetOutChannels(conv_node); if (kernel_nums <= 0) { MS_LOG(INFO) << "Unsupported conv node, " << conv_node->DebugString(); return node; @@ -143,26 +148,23 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const return; } if (!conv_weight_node->isa()) { - MS_LOG(ERROR) << "scale weight node not paramter node"; + MS_LOG(ERROR) << "scale weight node not parameter node"; lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR); return; } if (conv_bias_node != nullptr && !conv_bias_node->isa()) { - MS_LOG(ERROR) << "scale bias node not paramter node"; + MS_LOG(ERROR) << "scale bias node not parameter node"; lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR); return; } - auto conv_weight_param = conv_weight_node->cast()->default_param(); auto weight_tensor = std::dynamic_pointer_cast(conv_weight_param); - auto weight_data = reinterpret_cast(weight_tensor->tensor_addr()); if (kernel_num <= 0) { MS_LOG(ERROR) << "kernel num less than 0"; lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR); return; } - auto kernel_size = weight_tensor->tensor_shape_size() / kernel_num; - CalNewWeightTensor(weight_data, kernel_num, kernel_size, trans_scale); + CalNewWeightTensor(conv_node, weight_tensor, kernel_num, trans_scale); float *bias_data = nullptr; // conv has bias,bias_flag true bool bias_flag = false; @@ -185,31 +187,64 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const conv_node->add_input(bias_node); } } -void ConvTransformFusion::CalNewWeightTensor(float *weight_data, int kernel_num, int kernel_size, - const float *trans_scale) const { +void ConvTransformFusion::CalNewWeightTensor(const CNodePtr &conv_node, const ParamValueLitePtr &weight_tensor, + int kernel_num, const float *trans_scale) const { MS_ASSERT(weight_data != nullptr); MS_ASSERT(trans_scale != nullptr); - auto tmp_weight_data = new (std::nothrow) float[kernel_num * kernel_size]; + auto weight_shape_size = weight_tensor->tensor_shape_size(); + auto tmp_weight_data = new (std::nothrow) float[weight_shape_size]; if (tmp_weight_data == nullptr) { lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_MEMORY_FAILED); return; } MS_ASSERT(new_weight_data != nullptr); - auto data_size = kernel_num * kernel_size * sizeof(float); + auto data_size = weight_shape_size * sizeof(float); if (0 != memset_s(tmp_weight_data, data_size, 0, data_size)) { MS_LOG(ERROR) << "memset newWeightData failed"; delete[] tmp_weight_data; lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_MEMORY_FAILED); return; } - if (this->fmk_type_ == lite::converter::FmkType_TF) { - for (int i = 0; i < kernel_num * kernel_size; i++) { - tmp_weight_data[i] = weight_data[i] * trans_scale[i % kernel_num]; + auto weight_data = reinterpret_cast(weight_tensor->tensor_addr()); + auto conv_type = GetCNodeType(conv_node); + if (conv_type == schema::PrimitiveType_DeConv2D) { + auto value_node = conv_node->input(0)->cast(); + MS_ASSERT(value_node != nullptr); + auto value = value_node->value(); + MS_ASSERT(value != nullptr); + auto primitive = value->cast(); + MS_ASSERT(utils::isa>(primitive)); + auto primc = utils::cast>(primitive); + MS_ASSERT(primc != nullptr); + if (weight_tensor->tensor_shape().size() != 4) { + MS_LOG(ERROR) << "deconv2d weight tensor shape error"; + delete[] tmp_weight_data; + return; + } + auto group = primc->GetGroup(); + auto cin_group = weight_tensor->tensor_shape()[0] / group; + int area_size = weight_tensor->tensor_shape()[2] * weight_tensor->tensor_shape()[3]; + int cout_size = kernel_num * area_size; + for (int k = 0; k < cin_group; ++k) { + for (int i = 0; i < kernel_num; ++i) { + auto row_addr = weight_data + k * cout_size + i * area_size; + auto new_row_addr = tmp_weight_data + k * cout_size + i * area_size; + for (int j = 0; j < area_size; j++) { + new_row_addr[j] = row_addr[j] * trans_scale[i]; + } + } } } else { - for (int i = 0; i < kernel_num; i++) { - for (int j = 0; j < kernel_size; j++) { - tmp_weight_data[i * kernel_size + j] = weight_data[i * kernel_size + j] * trans_scale[i]; + if (this->fmk_type_ == lite::converter::FmkType_TF) { + for (int i = 0; i < weight_shape_size; i++) { + tmp_weight_data[i] = weight_data[i] * trans_scale[i % kernel_num]; + } + } else { + auto kernel_size = weight_shape_size / kernel_num; + for (int i = 0; i < kernel_num; i++) { + for (int j = 0; j < kernel_size; j++) { + tmp_weight_data[i * kernel_size + j] = weight_data[i * kernel_size + j] * trans_scale[i]; + } } } } diff --git a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h index 379edf9315..c518f30d5b 100644 --- a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h +++ b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h @@ -20,6 +20,7 @@ #include #include "backend/optimizer/common/optimizer.h" #include "tools/converter/converter_flags.h" +#include "src/param_value_lite.h" using mindspore::lite::converter::FmkType; namespace mindspore::opt { @@ -32,7 +33,7 @@ class ConvTransformFusion : public PatternProcessPass { void GenTransParam(const CNodePtr &, int, float *, float *) const; virtual void InitTransParam(const CNodePtr &, int, float *, float *) const = 0; void GenNewConvTensor(const FuncGraphPtr &, const CNodePtr &, int, const float *, const float *) const; - void CalNewWeightTensor(float *, int, int, const float *) const; + void CalNewWeightTensor(const CNodePtr &, const ParamValueLitePtr &, int, const float *) const; void CalNewBiasTensor(float *, int, bool, const float *, const float *) const; void SetFmkType(FmkType type) { this->fmk_type_ = type; }