From 2717d0993b3eeab212bb9e4024145224e749ba85 Mon Sep 17 00:00:00 2001
From: zhengjun10 <zhengjun10@huawei.com>
Date: Tue, 19 Jan 2021 14:52:15 +0800
Subject: [PATCH] support deconv bn activation fusion

---
 mindspore/lite/tools/benchmark/benchmark.cc   | 31 +++++++-
 mindspore/lite/tools/benchmark/benchmark.h    |  6 +-
 .../lite/tools/optimizer/common/gllo_utils.cc |  3 +-
 .../fusion/conv_activation_fusion.cc          |  9 +++
 .../optimizer/fusion/conv_transform_fusion.cc | 73 ++++++++++++++-----
 .../optimizer/fusion/conv_transform_fusion.h  |  3 +-
 6 files changed, 99 insertions(+), 26 deletions(-)
diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc
index 2ab2d50280..c0c445459e 100644
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@@ -212,7 +212,10 @@ int Benchmark::ReadTensorData(std::ifstream &in_file_stream, const std::string &
   std::string line;
   getline(in_file_stream, line);
   std::stringstream line_stream(line);
-  tensor::MSTensor *tensor = GetTensorByNodeOrTensorName(tensor_name);
+  if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
+    return RET_OK;
+  }
+  tensor::MSTensor *tensor = GetTensorByNameOrShape(tensor_name, dims);
   if (tensor == nullptr) {
     MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
     return RET_ERROR;
@@ -248,7 +251,7 @@ int Benchmark::CompareOutput() {
   int total_size = 0;
   for (const auto &calib_tensor : benchmark_data_) {
     std::string node_or_tensor_name = calib_tensor.first;
-    tensor::MSTensor *tensor = GetTensorByNodeOrTensorName(node_or_tensor_name);
+    tensor::MSTensor *tensor = GetTensorByNameOrShape(node_or_tensor_name, calib_tensor.second->shape);
     if (tensor == nullptr) {
       MS_LOG(ERROR) << "Get tensor failed, tensor name: " << node_or_tensor_name;
       return RET_ERROR;
@@ -284,13 +287,35 @@ int Benchmark::CompareOutput() {
   return RET_OK;
 }
 
-tensor::MSTensor *Benchmark::GetTensorByNodeOrTensorName(const std::string &node_or_tensor_name) {
+tensor::MSTensor *Benchmark::GetTensorByNodeShape(const std::vector<size_t> &node_shape) {
+  std::vector<tensor::MSTensor *> match_tensors;
+  std::vector<int> shape_vector;
+  (void)std::transform(node_shape.begin(), node_shape.end(), std::back_inserter(shape_vector),
+                       [](const size_t &value) { return static_cast<int>(value); });
+  auto tensors = session_->GetOutputs();
+  for (auto &out_tensor_pair : tensors) {
+    if (out_tensor_pair.second->shape() == shape_vector) {
+      match_tensors.emplace_back(out_tensor_pair.second);
+    }
+  }
+  if (match_tensors.empty() || match_tensors.size() != 1) {
+    MS_LOG(ERROR) << "get tensor by node shape failed";
+    return nullptr;
+  }
+  return match_tensors.front();
+}
+
+tensor::MSTensor *Benchmark::GetTensorByNameOrShape(const std::string &node_or_tensor_name,
+                                                    const std::vector<size_t> &dims) {
   tensor::MSTensor *tensor = nullptr;
   auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name);
   if (tensors.empty() || tensors.size() != 1) {
     MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name
                  << " or node has more than one output tensor, switch to GetOutputByTensorName";
     tensor = session_->GetOutputByTensorName(node_or_tensor_name);
+    if (tensor == nullptr) {
+      return GetTensorByNodeShape(dims);
+    }
   } else {
     tensor = tensors.front();
   }
diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h
index 891004a4e9..df298b1e2d 100644
--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@@ -75,7 +75,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
     AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
     AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU");
     AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode",
-            "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, defalut value: 1", 1);
+            "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, default value: 1", 1);
     // MarkPerformance
     AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10);
     AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2);
@@ -153,7 +153,9 @@ class MS_API Benchmark {
 
   int CompareOutput();
 
-  tensor::MSTensor *GetTensorByNodeOrTensorName(const std::string &node_or_tensor_name);
+  tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector<size_t> &dims);
+
+  tensor::MSTensor *GetTensorByNodeShape(const std::vector<size_t> &node_shape);
 
   int CompareStringData(const std::string &name, tensor::MSTensor *tensor);
 
diff --git a/mindspore/lite/tools/optimizer/common/gllo_utils.cc b/mindspore/lite/tools/optimizer/common/gllo_utils.cc
index 09d26c48c2..31221f6068 100644
--- a/mindspore/lite/tools/optimizer/common/gllo_utils.cc
+++ b/mindspore/lite/tools/optimizer/common/gllo_utils.cc
@@ -480,7 +480,8 @@ bool IsParamNode(const BaseRef &n) {
 bool IsConvNode(const BaseRef &n) {
   if (utils::isa<CNodePtr>(n) || utils::isa<ValueNodePtr>(n)) {
     auto type = opt::GetCNodeType(n);
-    return type == schema::PrimitiveType_Conv2D || type == schema::PrimitiveType_DepthwiseConv2D;
+    return type == schema::PrimitiveType_Conv2D || type == schema::PrimitiveType_DepthwiseConv2D ||
+           type == schema::PrimitiveType_DeConv2D;
   }
   return false;
 }
diff --git a/mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc b/mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc
index 47b8d172b0..41d5b41e56 100644
--- a/mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc
@@ -18,6 +18,7 @@
 #include <memory>
 #include "src/ops/primitive_c.h"
 #include "src/ops/conv2d.h"
+#include "src/ops/deconv2d.h"
 #include "src/ops/depthwise_conv2d.h"
 #include "src/ops/activation.h"
 #include "schema/inner/model_generated.h"
@@ -82,6 +83,14 @@ const AnfNodePtr ConvActivationFusion::Process(const FuncGraphPtr &func_graph, c
         primc->SetActivationType(act_primitivec->GetType());
         return pre_node;
       }
+    } else if (node_type == schema::PrimitiveType_DeConv2D) {
+      MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive_c));
+      auto primc = utils::cast<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive_c);
+      MS_ASSERT(primc != nullptr);
+      if (primc->GetActivationType() == schema::ActivationType_NO_ACTIVATION) {
+        primc->SetActivationType(act_primitivec->GetType());
+        return pre_node;
+      }
     } else {
       MS_LOG(ERROR) << "conv activation pass match only conv2d or depthwise_conv2d ";
     }
diff --git a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc
index 1935d7fbe2..1b27adca31 100644
--- a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc
@@ -18,6 +18,7 @@
 #include <memory>
 #include "src/ops/primitive_c.h"
 #include "src/ops/conv2d.h"
+#include "src/ops/deconv2d.h"
 #include "src/ops/depthwise_conv2d.h"
 #include "src/param_value_lite.h"
 #include "schema/inner/model_generated.h"
@@ -30,8 +31,7 @@ constexpr size_t kConvWeightIndex = 2;
 constexpr size_t kConvBiasIndex = 3;
 constexpr size_t kConvNoBiasLen = 3;
 constexpr size_t kConvWithBiasLen = 4;
-
-int Get_Kenrnel_nums(const CNodePtr &conv_node) {
+int GetOutChannels(const CNodePtr &conv_node) {
   MS_ASSERT(conv_node != nullptr);
   auto value_primitive = conv_node->input(0);
   auto value_node = value_primitive->cast<ValueNodePtr>();
@@ -47,6 +47,11 @@ int Get_Kenrnel_nums(const CNodePtr &conv_node) {
     auto primc = utils::cast<std::shared_ptr<mindspore::lite::Conv2D>>(primitive);
     MS_ASSERT(primc != nullptr);
     return primc->GetChannelOut();
+  } else if (type == schema::PrimitiveType_DeConv2D) {
+    MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive));
+    auto primc = utils::cast<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive);
+    MS_ASSERT(primc != nullptr);
+    return primc->GetChannelOut();
   } else if (type == schema::PrimitiveType_DepthwiseConv2D) {
     MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DepthwiseConv2D>>(primitive));
     auto primc = utils::cast<std::shared_ptr<mindspore::lite::DepthwiseConv2D>>(primitive);
@@ -78,7 +83,7 @@ const AnfNodePtr ConvTransformFusion::Process(const FuncGraphPtr &func_graph, co
   }
 
   auto abstr = transform_node->abstract();
-  int kernel_nums = Get_Kenrnel_nums(conv_node);
+  int kernel_nums = GetOutChannels(conv_node);
   if (kernel_nums <= 0) {
     MS_LOG(INFO) << "Unsupported conv node, " << conv_node->DebugString();
     return node;
@@ -143,26 +148,23 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const
     return;
   }
   if (!conv_weight_node->isa<Parameter>()) {
-    MS_LOG(ERROR) << "scale weight node not paramter node";
+    MS_LOG(ERROR) << "scale weight node not parameter node";
     lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR);
     return;
   }
   if (conv_bias_node != nullptr && !conv_bias_node->isa<Parameter>()) {
-    MS_LOG(ERROR) << "scale bias node not paramter node";
+    MS_LOG(ERROR) << "scale bias node not parameter node";
     lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR);
     return;
   }
-
   auto conv_weight_param = conv_weight_node->cast<ParameterPtr>()->default_param();
   auto weight_tensor = std::dynamic_pointer_cast<ParamValueLite>(conv_weight_param);
-  auto weight_data = reinterpret_cast<float *>(weight_tensor->tensor_addr());
   if (kernel_num <= 0) {
     MS_LOG(ERROR) << "kernel num less than 0";
     lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR);
     return;
   }
-  auto kernel_size = weight_tensor->tensor_shape_size() / kernel_num;
-  CalNewWeightTensor(weight_data, kernel_num, kernel_size, trans_scale);
+  CalNewWeightTensor(conv_node, weight_tensor, kernel_num, trans_scale);
   float *bias_data = nullptr;
   // conv has bias,bias_flag true
   bool bias_flag = false;
@@ -185,31 +187,64 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const
     conv_node->add_input(bias_node);
   }
 }
-void ConvTransformFusion::CalNewWeightTensor(float *weight_data, int kernel_num, int kernel_size,
-                                             const float *trans_scale) const {
+void ConvTransformFusion::CalNewWeightTensor(const CNodePtr &conv_node, const ParamValueLitePtr &weight_tensor,
+                                             int kernel_num, const float *trans_scale) const {
   MS_ASSERT(weight_data != nullptr);
   MS_ASSERT(trans_scale != nullptr);
-  auto tmp_weight_data = new (std::nothrow) float[kernel_num * kernel_size];
+  auto weight_shape_size = weight_tensor->tensor_shape_size();
+  auto tmp_weight_data = new (std::nothrow) float[weight_shape_size];
   if (tmp_weight_data == nullptr) {
     lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_MEMORY_FAILED);
     return;
   }
   MS_ASSERT(new_weight_data != nullptr);
-  auto data_size = kernel_num * kernel_size * sizeof(float);
+  auto data_size = weight_shape_size * sizeof(float);
   if (0 != memset_s(tmp_weight_data, data_size, 0, data_size)) {
     MS_LOG(ERROR) << "memset newWeightData failed";
     delete[] tmp_weight_data;
     lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_MEMORY_FAILED);
     return;
   }
-  if (this->fmk_type_ == lite::converter::FmkType_TF) {
-    for (int i = 0; i < kernel_num * kernel_size; i++) {
-      tmp_weight_data[i] = weight_data[i] * trans_scale[i % kernel_num];
+  auto weight_data = reinterpret_cast<float *>(weight_tensor->tensor_addr());
+  auto conv_type = GetCNodeType(conv_node);
+  if (conv_type == schema::PrimitiveType_DeConv2D) {
+    auto value_node = conv_node->input(0)->cast<ValueNodePtr>();
+    MS_ASSERT(value_node != nullptr);
+    auto value = value_node->value();
+    MS_ASSERT(value != nullptr);
+    auto primitive = value->cast<PrimitivePtr>();
+    MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive));
+    auto primc = utils::cast<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive);
+    MS_ASSERT(primc != nullptr);
+    if (weight_tensor->tensor_shape().size() != 4) {
+      MS_LOG(ERROR) << "deconv2d weight tensor shape error";
+      delete[] tmp_weight_data;
+      return;
+    }
+    auto group = primc->GetGroup();
+    auto cin_group = weight_tensor->tensor_shape()[0] / group;
+    int area_size = weight_tensor->tensor_shape()[2] * weight_tensor->tensor_shape()[3];
+    int cout_size = kernel_num * area_size;
+    for (int k = 0; k < cin_group; ++k) {
+      for (int i = 0; i < kernel_num; ++i) {
+        auto row_addr = weight_data + k * cout_size + i * area_size;
+        auto new_row_addr = tmp_weight_data + k * cout_size + i * area_size;
+        for (int j = 0; j < area_size; j++) {
+          new_row_addr[j] = row_addr[j] * trans_scale[i];
+        }
+      }
     }
   } else {
-    for (int i = 0; i < kernel_num; i++) {
-      for (int j = 0; j < kernel_size; j++) {
-        tmp_weight_data[i * kernel_size + j] = weight_data[i * kernel_size + j] * trans_scale[i];
+    if (this->fmk_type_ == lite::converter::FmkType_TF) {
+      for (int i = 0; i < weight_shape_size; i++) {
+        tmp_weight_data[i] = weight_data[i] * trans_scale[i % kernel_num];
+      }
+    } else {
+      auto kernel_size = weight_shape_size / kernel_num;
+      for (int i = 0; i < kernel_num; i++) {
+        for (int j = 0; j < kernel_size; j++) {
+          tmp_weight_data[i * kernel_size + j] = weight_data[i * kernel_size + j] * trans_scale[i];
+        }
       }
     }
   }
diff --git a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h
index 379edf9315..c518f30d5b 100644
--- a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h
+++ b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h
@@ -20,6 +20,7 @@
 #include <string>
 #include "backend/optimizer/common/optimizer.h"
 #include "tools/converter/converter_flags.h"
+#include "src/param_value_lite.h"
 
 using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
@@ -32,7 +33,7 @@ class ConvTransformFusion : public PatternProcessPass {
   void GenTransParam(const CNodePtr &, int, float *, float *) const;
   virtual void InitTransParam(const CNodePtr &, int, float *, float *) const = 0;
   void GenNewConvTensor(const FuncGraphPtr &, const CNodePtr &, int, const float *, const float *) const;
-  void CalNewWeightTensor(float *, int, int, const float *) const;
+  void CalNewWeightTensor(const CNodePtr &, const ParamValueLitePtr &, int, const float *) const;
   void CalNewBiasTensor(float *, int, bool, const float *, const float *) const;
   void SetFmkType(FmkType type) { this->fmk_type_ = type; }