Browse Source

!11412 [MSLITE] support deconv bn activation fusion

From: @zhengjun10
Reviewed-by: 
Signed-off-by:
tags/v1.2.0-rc1
mindspore-ci-bot Gitee 4 years ago
parent
commit
6f81d28a88
6 changed files with 99 additions and 26 deletions
  1. +28
    -3
      mindspore/lite/tools/benchmark/benchmark.cc
  2. +4
    -2
      mindspore/lite/tools/benchmark/benchmark.h
  3. +2
    -1
      mindspore/lite/tools/optimizer/common/gllo_utils.cc
  4. +9
    -0
      mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc
  5. +54
    -19
      mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc
  6. +2
    -1
      mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h

+ 28
- 3
mindspore/lite/tools/benchmark/benchmark.cc View File

@@ -212,7 +212,10 @@ int Benchmark::ReadTensorData(std::ifstream &in_file_stream, const std::string &
std::string line; std::string line;
getline(in_file_stream, line); getline(in_file_stream, line);
std::stringstream line_stream(line); std::stringstream line_stream(line);
tensor::MSTensor *tensor = GetTensorByNodeOrTensorName(tensor_name);
if (this->benchmark_data_.find(tensor_name) != this->benchmark_data_.end()) {
return RET_OK;
}
tensor::MSTensor *tensor = GetTensorByNameOrShape(tensor_name, dims);
if (tensor == nullptr) { if (tensor == nullptr) {
MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name; MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
return RET_ERROR; return RET_ERROR;
@@ -248,7 +251,7 @@ int Benchmark::CompareOutput() {
int total_size = 0; int total_size = 0;
for (const auto &calib_tensor : benchmark_data_) { for (const auto &calib_tensor : benchmark_data_) {
std::string node_or_tensor_name = calib_tensor.first; std::string node_or_tensor_name = calib_tensor.first;
tensor::MSTensor *tensor = GetTensorByNodeOrTensorName(node_or_tensor_name);
tensor::MSTensor *tensor = GetTensorByNameOrShape(node_or_tensor_name, calib_tensor.second->shape);
if (tensor == nullptr) { if (tensor == nullptr) {
MS_LOG(ERROR) << "Get tensor failed, tensor name: " << node_or_tensor_name; MS_LOG(ERROR) << "Get tensor failed, tensor name: " << node_or_tensor_name;
return RET_ERROR; return RET_ERROR;
@@ -284,13 +287,35 @@ int Benchmark::CompareOutput() {
return RET_OK; return RET_OK;
} }


tensor::MSTensor *Benchmark::GetTensorByNodeOrTensorName(const std::string &node_or_tensor_name) {
tensor::MSTensor *Benchmark::GetTensorByNodeShape(const std::vector<size_t> &node_shape) {
std::vector<tensor::MSTensor *> match_tensors;
std::vector<int> shape_vector;
(void)std::transform(node_shape.begin(), node_shape.end(), std::back_inserter(shape_vector),
[](const size_t &value) { return static_cast<int>(value); });
auto tensors = session_->GetOutputs();
for (auto &out_tensor_pair : tensors) {
if (out_tensor_pair.second->shape() == shape_vector) {
match_tensors.emplace_back(out_tensor_pair.second);
}
}
if (match_tensors.empty() || match_tensors.size() != 1) {
MS_LOG(ERROR) << "get tensor by node shape failed";
return nullptr;
}
return match_tensors.front();
}

tensor::MSTensor *Benchmark::GetTensorByNameOrShape(const std::string &node_or_tensor_name,
const std::vector<size_t> &dims) {
tensor::MSTensor *tensor = nullptr; tensor::MSTensor *tensor = nullptr;
auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name); auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name);
if (tensors.empty() || tensors.size() != 1) { if (tensors.empty() || tensors.size() != 1) {
MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name
<< " or node has more than one output tensor, switch to GetOutputByTensorName"; << " or node has more than one output tensor, switch to GetOutputByTensorName";
tensor = session_->GetOutputByTensorName(node_or_tensor_name); tensor = session_->GetOutputByTensorName(node_or_tensor_name);
if (tensor == nullptr) {
return GetTensorByNodeShape(dims);
}
} else { } else {
tensor = tensors.front(); tensor = tensors.front();
} }


+ 4
- 2
mindspore/lite/tools/benchmark/benchmark.h View File

@@ -75,7 +75,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU"); AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU");
AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode", AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode",
"Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, defalut value: 1", 1);
"Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, default value: 1", 1);
// MarkPerformance // MarkPerformance
AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10); AddFlag(&BenchmarkFlags::loop_count_, "loopCount", "Run loop count", 10);
AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2); AddFlag(&BenchmarkFlags::num_threads_, "numThreads", "Run threads number", 2);
@@ -153,7 +153,9 @@ class MS_API Benchmark {


int CompareOutput(); int CompareOutput();


tensor::MSTensor *GetTensorByNodeOrTensorName(const std::string &node_or_tensor_name);
tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector<size_t> &dims);

tensor::MSTensor *GetTensorByNodeShape(const std::vector<size_t> &node_shape);


int CompareStringData(const std::string &name, tensor::MSTensor *tensor); int CompareStringData(const std::string &name, tensor::MSTensor *tensor);




+ 2
- 1
mindspore/lite/tools/optimizer/common/gllo_utils.cc View File

@@ -480,7 +480,8 @@ bool IsParamNode(const BaseRef &n) {
bool IsConvNode(const BaseRef &n) { bool IsConvNode(const BaseRef &n) {
if (utils::isa<CNodePtr>(n) || utils::isa<ValueNodePtr>(n)) { if (utils::isa<CNodePtr>(n) || utils::isa<ValueNodePtr>(n)) {
auto type = opt::GetCNodeType(n); auto type = opt::GetCNodeType(n);
return type == schema::PrimitiveType_Conv2D || type == schema::PrimitiveType_DepthwiseConv2D;
return type == schema::PrimitiveType_Conv2D || type == schema::PrimitiveType_DepthwiseConv2D ||
type == schema::PrimitiveType_DeConv2D;
} }
return false; return false;
} }


+ 9
- 0
mindspore/lite/tools/optimizer/fusion/conv_activation_fusion.cc View File

@@ -18,6 +18,7 @@
#include <memory> #include <memory>
#include "src/ops/primitive_c.h" #include "src/ops/primitive_c.h"
#include "src/ops/conv2d.h" #include "src/ops/conv2d.h"
#include "src/ops/deconv2d.h"
#include "src/ops/depthwise_conv2d.h" #include "src/ops/depthwise_conv2d.h"
#include "src/ops/activation.h" #include "src/ops/activation.h"
#include "schema/inner/model_generated.h" #include "schema/inner/model_generated.h"
@@ -82,6 +83,14 @@ const AnfNodePtr ConvActivationFusion::Process(const FuncGraphPtr &func_graph, c
primc->SetActivationType(act_primitivec->GetType()); primc->SetActivationType(act_primitivec->GetType());
return pre_node; return pre_node;
} }
} else if (node_type == schema::PrimitiveType_DeConv2D) {
MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive_c));
auto primc = utils::cast<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive_c);
MS_ASSERT(primc != nullptr);
if (primc->GetActivationType() == schema::ActivationType_NO_ACTIVATION) {
primc->SetActivationType(act_primitivec->GetType());
return pre_node;
}
} else { } else {
MS_LOG(ERROR) << "conv activation pass match only conv2d or depthwise_conv2d "; MS_LOG(ERROR) << "conv activation pass match only conv2d or depthwise_conv2d ";
} }


+ 54
- 19
mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc View File

@@ -18,6 +18,7 @@
#include <memory> #include <memory>
#include "src/ops/primitive_c.h" #include "src/ops/primitive_c.h"
#include "src/ops/conv2d.h" #include "src/ops/conv2d.h"
#include "src/ops/deconv2d.h"
#include "src/ops/depthwise_conv2d.h" #include "src/ops/depthwise_conv2d.h"
#include "src/param_value_lite.h" #include "src/param_value_lite.h"
#include "schema/inner/model_generated.h" #include "schema/inner/model_generated.h"
@@ -30,8 +31,7 @@ constexpr size_t kConvWeightIndex = 2;
constexpr size_t kConvBiasIndex = 3; constexpr size_t kConvBiasIndex = 3;
constexpr size_t kConvNoBiasLen = 3; constexpr size_t kConvNoBiasLen = 3;
constexpr size_t kConvWithBiasLen = 4; constexpr size_t kConvWithBiasLen = 4;

int Get_Kenrnel_nums(const CNodePtr &conv_node) {
int GetOutChannels(const CNodePtr &conv_node) {
MS_ASSERT(conv_node != nullptr); MS_ASSERT(conv_node != nullptr);
auto value_primitive = conv_node->input(0); auto value_primitive = conv_node->input(0);
auto value_node = value_primitive->cast<ValueNodePtr>(); auto value_node = value_primitive->cast<ValueNodePtr>();
@@ -47,6 +47,11 @@ int Get_Kenrnel_nums(const CNodePtr &conv_node) {
auto primc = utils::cast<std::shared_ptr<mindspore::lite::Conv2D>>(primitive); auto primc = utils::cast<std::shared_ptr<mindspore::lite::Conv2D>>(primitive);
MS_ASSERT(primc != nullptr); MS_ASSERT(primc != nullptr);
return primc->GetChannelOut(); return primc->GetChannelOut();
} else if (type == schema::PrimitiveType_DeConv2D) {
MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive));
auto primc = utils::cast<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive);
MS_ASSERT(primc != nullptr);
return primc->GetChannelOut();
} else if (type == schema::PrimitiveType_DepthwiseConv2D) { } else if (type == schema::PrimitiveType_DepthwiseConv2D) {
MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DepthwiseConv2D>>(primitive)); MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DepthwiseConv2D>>(primitive));
auto primc = utils::cast<std::shared_ptr<mindspore::lite::DepthwiseConv2D>>(primitive); auto primc = utils::cast<std::shared_ptr<mindspore::lite::DepthwiseConv2D>>(primitive);
@@ -78,7 +83,7 @@ const AnfNodePtr ConvTransformFusion::Process(const FuncGraphPtr &func_graph, co
} }


auto abstr = transform_node->abstract(); auto abstr = transform_node->abstract();
int kernel_nums = Get_Kenrnel_nums(conv_node);
int kernel_nums = GetOutChannels(conv_node);
if (kernel_nums <= 0) { if (kernel_nums <= 0) {
MS_LOG(INFO) << "Unsupported conv node, " << conv_node->DebugString(); MS_LOG(INFO) << "Unsupported conv node, " << conv_node->DebugString();
return node; return node;
@@ -143,26 +148,23 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const
return; return;
} }
if (!conv_weight_node->isa<Parameter>()) { if (!conv_weight_node->isa<Parameter>()) {
MS_LOG(ERROR) << "scale weight node not paramter node";
MS_LOG(ERROR) << "scale weight node not parameter node";
lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR); lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR);
return; return;
} }
if (conv_bias_node != nullptr && !conv_bias_node->isa<Parameter>()) { if (conv_bias_node != nullptr && !conv_bias_node->isa<Parameter>()) {
MS_LOG(ERROR) << "scale bias node not paramter node";
MS_LOG(ERROR) << "scale bias node not parameter node";
lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR); lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR);
return; return;
} }

auto conv_weight_param = conv_weight_node->cast<ParameterPtr>()->default_param(); auto conv_weight_param = conv_weight_node->cast<ParameterPtr>()->default_param();
auto weight_tensor = std::dynamic_pointer_cast<ParamValueLite>(conv_weight_param); auto weight_tensor = std::dynamic_pointer_cast<ParamValueLite>(conv_weight_param);
auto weight_data = reinterpret_cast<float *>(weight_tensor->tensor_addr());
if (kernel_num <= 0) { if (kernel_num <= 0) {
MS_LOG(ERROR) << "kernel num less than 0"; MS_LOG(ERROR) << "kernel num less than 0";
lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR); lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR);
return; return;
} }
auto kernel_size = weight_tensor->tensor_shape_size() / kernel_num;
CalNewWeightTensor(weight_data, kernel_num, kernel_size, trans_scale);
CalNewWeightTensor(conv_node, weight_tensor, kernel_num, trans_scale);
float *bias_data = nullptr; float *bias_data = nullptr;
// conv has bias,bias_flag true // conv has bias,bias_flag true
bool bias_flag = false; bool bias_flag = false;
@@ -185,31 +187,64 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const
conv_node->add_input(bias_node); conv_node->add_input(bias_node);
} }
} }
void ConvTransformFusion::CalNewWeightTensor(float *weight_data, int kernel_num, int kernel_size,
const float *trans_scale) const {
void ConvTransformFusion::CalNewWeightTensor(const CNodePtr &conv_node, const ParamValueLitePtr &weight_tensor,
int kernel_num, const float *trans_scale) const {
MS_ASSERT(weight_data != nullptr); MS_ASSERT(weight_data != nullptr);
MS_ASSERT(trans_scale != nullptr); MS_ASSERT(trans_scale != nullptr);
auto tmp_weight_data = new (std::nothrow) float[kernel_num * kernel_size];
auto weight_shape_size = weight_tensor->tensor_shape_size();
auto tmp_weight_data = new (std::nothrow) float[weight_shape_size];
if (tmp_weight_data == nullptr) { if (tmp_weight_data == nullptr) {
lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_MEMORY_FAILED); lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_MEMORY_FAILED);
return; return;
} }
MS_ASSERT(new_weight_data != nullptr); MS_ASSERT(new_weight_data != nullptr);
auto data_size = kernel_num * kernel_size * sizeof(float);
auto data_size = weight_shape_size * sizeof(float);
if (0 != memset_s(tmp_weight_data, data_size, 0, data_size)) { if (0 != memset_s(tmp_weight_data, data_size, 0, data_size)) {
MS_LOG(ERROR) << "memset newWeightData failed"; MS_LOG(ERROR) << "memset newWeightData failed";
delete[] tmp_weight_data; delete[] tmp_weight_data;
lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_MEMORY_FAILED); lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_MEMORY_FAILED);
return; return;
} }
if (this->fmk_type_ == lite::converter::FmkType_TF) {
for (int i = 0; i < kernel_num * kernel_size; i++) {
tmp_weight_data[i] = weight_data[i] * trans_scale[i % kernel_num];
auto weight_data = reinterpret_cast<float *>(weight_tensor->tensor_addr());
auto conv_type = GetCNodeType(conv_node);
if (conv_type == schema::PrimitiveType_DeConv2D) {
auto value_node = conv_node->input(0)->cast<ValueNodePtr>();
MS_ASSERT(value_node != nullptr);
auto value = value_node->value();
MS_ASSERT(value != nullptr);
auto primitive = value->cast<PrimitivePtr>();
MS_ASSERT(utils::isa<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive));
auto primc = utils::cast<std::shared_ptr<mindspore::lite::DeConv2D>>(primitive);
MS_ASSERT(primc != nullptr);
if (weight_tensor->tensor_shape().size() != 4) {
MS_LOG(ERROR) << "deconv2d weight tensor shape error";
delete[] tmp_weight_data;
return;
}
auto group = primc->GetGroup();
auto cin_group = weight_tensor->tensor_shape()[0] / group;
int area_size = weight_tensor->tensor_shape()[2] * weight_tensor->tensor_shape()[3];
int cout_size = kernel_num * area_size;
for (int k = 0; k < cin_group; ++k) {
for (int i = 0; i < kernel_num; ++i) {
auto row_addr = weight_data + k * cout_size + i * area_size;
auto new_row_addr = tmp_weight_data + k * cout_size + i * area_size;
for (int j = 0; j < area_size; j++) {
new_row_addr[j] = row_addr[j] * trans_scale[i];
}
}
} }
} else { } else {
for (int i = 0; i < kernel_num; i++) {
for (int j = 0; j < kernel_size; j++) {
tmp_weight_data[i * kernel_size + j] = weight_data[i * kernel_size + j] * trans_scale[i];
if (this->fmk_type_ == lite::converter::FmkType_TF) {
for (int i = 0; i < weight_shape_size; i++) {
tmp_weight_data[i] = weight_data[i] * trans_scale[i % kernel_num];
}
} else {
auto kernel_size = weight_shape_size / kernel_num;
for (int i = 0; i < kernel_num; i++) {
for (int j = 0; j < kernel_size; j++) {
tmp_weight_data[i * kernel_size + j] = weight_data[i * kernel_size + j] * trans_scale[i];
}
} }
} }
} }


+ 2
- 1
mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.h View File

@@ -20,6 +20,7 @@
#include <string> #include <string>
#include "backend/optimizer/common/optimizer.h" #include "backend/optimizer/common/optimizer.h"
#include "tools/converter/converter_flags.h" #include "tools/converter/converter_flags.h"
#include "src/param_value_lite.h"


using mindspore::lite::converter::FmkType; using mindspore::lite::converter::FmkType;
namespace mindspore::opt { namespace mindspore::opt {
@@ -32,7 +33,7 @@ class ConvTransformFusion : public PatternProcessPass {
void GenTransParam(const CNodePtr &, int, float *, float *) const; void GenTransParam(const CNodePtr &, int, float *, float *) const;
virtual void InitTransParam(const CNodePtr &, int, float *, float *) const = 0; virtual void InitTransParam(const CNodePtr &, int, float *, float *) const = 0;
void GenNewConvTensor(const FuncGraphPtr &, const CNodePtr &, int, const float *, const float *) const; void GenNewConvTensor(const FuncGraphPtr &, const CNodePtr &, int, const float *, const float *) const;
void CalNewWeightTensor(float *, int, int, const float *) const;
void CalNewWeightTensor(const CNodePtr &, const ParamValueLitePtr &, int, const float *) const;
void CalNewBiasTensor(float *, int, bool, const float *, const float *) const; void CalNewBiasTensor(float *, int, bool, const float *, const float *) const;
void SetFmkType(FmkType type) { this->fmk_type_ = type; } void SetFmkType(FmkType type) { this->fmk_type_ = type; }




Loading…
Cancel
Save