!11412 [MSLITE] support deconv bn activation fusion

From: @zhengjun10 Reviewed-by: Signed-off-by:
4 years ago · 6f81d28a88
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@@ -212,7 +212,10 @@ int Benchmark::ReadTensorData(std::ifstream &in_file_stream, const std::string &
  std::string line;
  getline(in_file_stream, line);
  std::stringstream line_stream(line);
  tensor::MSTensor *tensor = GetTensorByNodeOrTensorName(tensor_name);
  if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
    return RET_OK;
  }
  tensor::MSTensor *tensor = GetTensorByNameOrShape(tensor_name, dims);
  if (tensor == nullptr) {
    MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
    return RET_ERROR;
@@ -248,7 +251,7 @@ int Benchmark::CompareOutput() {
  int total_size = 0;
  for (const auto &calib_tensor : benchmark_data_) {
    std::string node_or_tensor_name = calib_tensor.first;
    tensor::MSTensor *tensor = GetTensorByNodeOrTensorName(node_or_tensor_name);
    tensor::MSTensor *tensor = GetTensorByNameOrShape(node_or_tensor_name, calib_tensor.second->shape);
    if (tensor == nullptr) {
      MS_LOG(ERROR) << "Get tensor failed, tensor name: " << node_or_tensor_name;
      return RET_ERROR;
@@ -284,13 +287,35 @@ int Benchmark::CompareOutput() {
  return RET_OK;
 }
 tensor::MSTensor *Benchmark::GetTensorByNodeOrTensorName(const std::string &node_or_tensor_name) {
 tensor::MSTensor *Benchmark::GetTensorByNodeShape(const std::vector<size_t> &node_shape) {
  std::vector<tensor::MSTensor *> match_tensors;
  std::vector<int> shape_vector;
  (void)std::transform(node_shape.begin(), node_shape.end(), std::back_inserter(shape_vector),
                       [](const size_t &value) { return static_cast<int>(value); });
  auto tensors = session_->GetOutputs();
  for (auto &out_tensor_pair : tensors) {
    if (out_tensor_pair.second->shape() == shape_vector) {
      match_tensors.emplace_back(out_tensor_pair.second);
    }
  }
  if (match_tensors.empty() || match_tensors.size() != 1) {
    MS_LOG(ERROR) << "get tensor by node shape failed";
    return nullptr;
  }
  return match_tensors.front();
 }
 tensor::MSTensor *Benchmark::GetTensorByNameOrShape(const std::string &node_or_tensor_name,
                                                    const std::vector<size_t> &dims) {
  tensor::MSTensor *tensor = nullptr;
  auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name);
  if (tensors.empty() || tensors.size() != 1) {
    MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name
                 << " or node has more than one output tensor, switch to GetOutputByTensorName";
    tensor = session_->GetOutputByTensorName(node_or_tensor_name);
    if (tensor == nullptr) {
      return GetTensorByNodeShape(dims);
    }
  } else {
    tensor = tensors.front();
  }
--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@@ -75,7 +75,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
    AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
    AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU");
    AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode",
            "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, defalut value: 1", 1);
            "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, default value: 1", 1);
    // MarkPerformance
    AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10);
    AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2);
@@ -153,7 +153,9 @@ class MS_API Benchmark {
  int CompareOutput();
  tensor::MSTensor *GetTensorByNodeOrTensorName(const std::string &node_or_tensor_name);
  tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector<size_t> &dims);
  tensor::MSTensor *GetTensorByNodeShape(const std::vector<size_t> &node_shape);
  int CompareStringData(const std::string &name, tensor::MSTensor *tensor);
--- a/mindspore/lite/tools/optimizer/common/gllo_utils.cc
+++ b/mindspore/lite/tools/optimizer/common/gllo_utils.cc
@@ -480,7 +480,8 @@ bool IsParamNode(const BaseRef &n) {
 bool IsConvNode(const BaseRef &n) {
  if (utils::isa<CNodePtr>(n) || utils::isa<ValueNodePtr>(n)) {
    auto type = opt::GetCNodeType(n);
    return type == schema::PrimitiveType_Conv2D || type == schema::PrimitiveType_DepthwiseConv2D;
    return type == schema::PrimitiveType_Conv2D || type == schema::PrimitiveType_DepthwiseConv2D ||
           type == schema::PrimitiveType_DeConv2D;
  }
  return false;
 }
--- a/mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc
@@ -18,6 +18,7 @@
 #include <memory>
 #include "src/ops/primitive_c.h"
 #include "src/ops/conv2d.h"
 #include "src/ops/deconv2d.h"
 #include "src/ops/depthwise_conv2d.h"
 #include "src/ops/activation.h"
 #include "schema/inner/model_generated.h"
@@ -82,6 +83,14 @@ const AnfNodePtr ConvActivationFusion::Process(const FuncGraphPtr &func_graph, c
        primc->SetActivationType(act_primitivec->GetType());
        return pre_node;
      }
    } else if (node_type == schema::PrimitiveType_DeConv2D) {
      MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive_c));
      auto primc = utils::cast<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive_c);
      MS_ASSERT(primc != nullptr);
      if (primc->GetActivationType() == schema::ActivationType_NO_ACTIVATION) {
        primc->SetActivationType(act_primitivec->GetType());
        return pre_node;
      }
    } else {
      MS_LOG(ERROR) << "conv activation pass match only conv2d or depthwise_conv2d ";
    }
--- a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc
@@ -18,6 +18,7 @@
 #include <memory>
 #include "src/ops/primitive_c.h"
 #include "src/ops/conv2d.h"
 #include "src/ops/deconv2d.h"
 #include "src/ops/depthwise_conv2d.h"
 #include "src/param_value_lite.h"
 #include "schema/inner/model_generated.h"
@@ -30,8 +31,7 @@ constexpr size_t kConvWeightIndex = 2;
 constexpr size_t kConvBiasIndex = 3;
 constexpr size_t kConvNoBiasLen = 3;
 constexpr size_t kConvWithBiasLen = 4;
 int Get_Kenrnel_nums(const CNodePtr &conv_node) {
 int GetOutChannels(const CNodePtr &conv_node) {
  MS_ASSERT(conv_node != nullptr);
  auto value_primitive = conv_node->input(0);
  auto value_node = value_primitive->cast<ValueNodePtr>();
@@ -47,6 +47,11 @@ int Get_Kenrnel_nums(const CNodePtr &conv_node) {
    auto primc = utils::cast<std::shared_ptr<mindspore::lite::Conv2D>>(primitive);
    MS_ASSERT(primc != nullptr);
    return primc->GetChannelOut();
  } else if (type == schema::PrimitiveType_DeConv2D) {
    MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive));
    auto primc = utils::cast<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive);
    MS_ASSERT(primc != nullptr);
    return primc->GetChannelOut();
  } else if (type == schema::PrimitiveType_DepthwiseConv2D) {
    MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DepthwiseConv2D>>(primitive));
    auto primc = utils::cast<std::shared_ptr<mindspore::lite::DepthwiseConv2D>>(primitive);
@@ -78,7 +83,7 @@ const AnfNodePtr ConvTransformFusion::Process(const FuncGraphPtr &func_graph, co
  }
  auto abstr = transform_node->abstract();
  int kernel_nums = Get_Kenrnel_nums(conv_node);
  int kernel_nums = GetOutChannels(conv_node);
  if (kernel_nums <= 0) {
    MS_LOG(INFO) << "Unsupported conv node, " << conv_node->DebugString();
    return node;
@@ -143,26 +148,23 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const
    return;
  }
  if (!conv_weight_node->isa<Parameter>()) {
    MS_LOG(ERROR) << "scale weight node not paramter node";
    MS_LOG(ERROR) << "scale weight node not parameter node";
    lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR);
    return;
  }
  if (conv_bias_node != nullptr && !conv_bias_node->isa<Parameter>()) {
    MS_LOG(ERROR) << "scale bias node not paramter node";
    MS_LOG(ERROR) << "scale bias node not parameter node";
    lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR);
    return;
  }
  auto conv_weight_param = conv_weight_node->cast<ParameterPtr>()->default_param();
  auto weight_tensor = std::dynamic_pointer_cast<ParamValueLite>(conv_weight_param);
  auto weight_data = reinterpret_cast<float *>(weight_tensor->tensor_addr());
  if (kernel_num <= 0) {
    MS_LOG(ERROR) << "kernel num less than 0";
    lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR);
    return;
  }
  auto kernel_size = weight_tensor->tensor_shape_size() / kernel_num;
  CalNewWeightTensor(weight_data, kernel_num, kernel_size, trans_scale);
  CalNewWeightTensor(conv_node, weight_tensor, kernel_num, trans_scale);
  float *bias_data = nullptr;
  // conv has bias,bias_flag true
  bool bias_flag = false;
@@ -185,31 +187,64 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const
    conv_node->add_input(bias_node);
  }
 }
 void ConvTransformFusion::CalNewWeightTensor(float *weight_data, int kernel_num, int kernel_size,
                                             const float *trans_scale) const {
 void ConvTransformFusion::CalNewWeightTensor(const CNodePtr &conv_node, const ParamValueLitePtr &weight_tensor,
                                             int kernel_num, const float *trans_scale) const {
  MS_ASSERT(weight_data != nullptr);
  MS_ASSERT(trans_scale != nullptr);
  auto tmp_weight_data = new (std::nothrow) float[kernel_num * kernel_size];
  auto weight_shape_size = weight_tensor->tensor_shape_size();
  auto tmp_weight_data = new (std::nothrow) float[weight_shape_size];
  if (tmp_weight_data == nullptr) {
    lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_MEMORY_FAILED);
    return;
  }
  MS_ASSERT(new_weight_data != nullptr);
  auto data_size = kernel_num * kernel_size * sizeof(float);
  auto data_size = weight_shape_size * sizeof(float);
  if (0 != memset_s(tmp_weight_data, data_size, 0, data_size)) {
    MS_LOG(ERROR) << "memset newWeightData failed";
    delete[] tmp_weight_data;
    lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_MEMORY_FAILED);
    return;
  }
  if (this->fmk_type_ == lite::converter::FmkType_TF) {
    for (int i = 0; i < kernel_num * kernel_size; i++) {
      tmp_weight_data[i] = weight_data[i] * trans_scale[i % kernel_num];
  auto weight_data = reinterpret_cast<float *>(weight_tensor->tensor_addr());
  auto conv_type = GetCNodeType(conv_node);
  if (conv_type == schema::PrimitiveType_DeConv2D) {
    auto value_node = conv_node->input(0)->cast<ValueNodePtr>();
    MS_ASSERT(value_node != nullptr);
    auto value = value_node->value();
    MS_ASSERT(value != nullptr);
    auto primitive = value->cast<PrimitivePtr>();
    MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive));
    auto primc = utils::cast<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive);
    MS_ASSERT(primc != nullptr);
    if (weight_tensor->tensor_shape().size() != 4) {
      MS_LOG(ERROR) << "deconv2d weight tensor shape error";
      delete[] tmp_weight_data;
      return;
    }
    auto group = primc->GetGroup();
    auto cin_group = weight_tensor->tensor_shape()[0] / group;
    int area_size = weight_tensor->tensor_shape()[2] * weight_tensor->tensor_shape()[3];
    int cout_size = kernel_num * area_size;
    for (int k = 0; k < cin_group; ++k) {
      for (int i = 0; i < kernel_num; ++i) {
        auto row_addr = weight_data + k * cout_size + i * area_size;
        auto new_row_addr = tmp_weight_data + k * cout_size + i * area_size;
        for (int j = 0; j < area_size; j++) {
          new_row_addr[j] = row_addr[j] * trans_scale[i];
        }
      }
    }
  } else {
    for (int i = 0; i < kernel_num; i++) {
      for (int j = 0; j < kernel_size; j++) {
        tmp_weight_data[i * kernel_size + j] = weight_data[i * kernel_size + j] * trans_scale[i];
    if (this->fmk_type_ == lite::converter::FmkType_TF) {
      for (int i = 0; i < weight_shape_size; i++) {
        tmp_weight_data[i] = weight_data[i] * trans_scale[i % kernel_num];
      }
    } else {
      auto kernel_size = weight_shape_size / kernel_num;
      for (int i = 0; i < kernel_num; i++) {
        for (int j = 0; j < kernel_size; j++) {
          tmp_weight_data[i * kernel_size + j] = weight_data[i * kernel_size + j] * trans_scale[i];
        }
      }
    }
  }
--- a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h
+++ b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h
@@ -20,6 +20,7 @@
 #include <string>
 #include "backend/optimizer/common/optimizer.h"
 #include "tools/converter/converter_flags.h"
 #include "src/param_value_lite.h"
 using mindspore::lite::converter::FmkType;
 namespace mindspore::opt {
@@ -32,7 +33,7 @@ class ConvTransformFusion : public PatternProcessPass {
  void GenTransParam(const CNodePtr &, int, float *, float *) const;
  virtual void InitTransParam(const CNodePtr &, int, float *, float *) const = 0;
  void GenNewConvTensor(const FuncGraphPtr &, const CNodePtr &, int, const float *, const float *) const;
  void CalNewWeightTensor(float *, int, int, const float *) const;
  void CalNewWeightTensor(const CNodePtr &, const ParamValueLitePtr &, int, const float *) const;
  void CalNewBiasTensor(float *, int, bool, const float *, const float *) const;
  void SetFmkType(FmkType type) { this->fmk_type_ = type; }