You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

batchmatmul_fusion.cc 8.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "tools/optimizer/fusion/batchmatmul_fusion.h"
  17. #include <memory>
  18. #include <vector>
  19. #include "src/ops/primitive_c.h"
  20. #include "src/param_value_lite.h"
  21. #include "schema/inner/model_generated.h"
  22. #include "utils/utils.h"
  23. #include "tools/optimizer/common/gllo_utils.h"
  24. #include "securec/include/securec.h"
  25. namespace mindspore::opt {
  26. namespace {
  27. bool IsStackNode(const BaseRef &n) {
  28. if (utils::isa<CNodePtr>(n) || utils::isa<ValueNodePtr>(n)) {
  29. auto type = opt::GetCNodeType(n);
  30. return type == schema::PrimitiveType_Stack;
  31. }
  32. return false;
  33. }
  34. bool IsFullConnectNode(const BaseRef &n) {
  35. if (utils::isa<CNodePtr>(n) || utils::isa<ValueNodePtr>(n)) {
  36. auto type = opt::GetCNodeType(n);
  37. return type == schema::PrimitiveType_FullConnection;
  38. }
  39. return false;
  40. }
  41. void *GetInputAddr(const AnfNodePtr &node, size_t input_index) {
  42. MS_ASSERT(node != nullptr);
  43. if (!node->isa<CNode>()) {
  44. MS_LOG(ERROR) << "GetInputAddr not cnode";
  45. return nullptr;
  46. }
  47. auto cnode = node->cast<CNodePtr>();
  48. if (input_index >= cnode->inputs().size()) {
  49. MS_LOG(ERROR) << "input index error";
  50. return nullptr;
  51. }
  52. if (cnode->input(input_index)->isa<Parameter>()) {
  53. auto param_input = cnode->input(input_index)->cast<ParameterPtr>();
  54. auto param_value = std::dynamic_pointer_cast<ParamValueLite>(param_input->default_param());
  55. if (param_value == nullptr) {
  56. MS_LOG(ERROR) << "param not paramValueLite";
  57. return nullptr;
  58. }
  59. return param_value->tensor_addr();
  60. }
  61. MS_LOG(ERROR) << "input not paramter";
  62. return nullptr;
  63. }
  64. STATUS GetRightMatmulInputParamter(const CNodePtr &stack_node, const ParameterPtr &rmatmul_input) {
  65. MS_ASSERT(stack_node != nullptr);
  66. MS_ASSERT(right_matmul_input != nullptr);
  67. auto joint_fullconnect_size = stack_node->inputs().size() - 1;
  68. auto fc = stack_node->input(1)->cast<CNodePtr>();
  69. auto fc_weight = fc->input(2)->cast<ParameterPtr>();
  70. auto fc_weight_param = std::dynamic_pointer_cast<ParamValueLite>(fc_weight->default_param());
  71. auto tensor_size = fc_weight_param->tensor_size();
  72. auto rmatmul_input_shape = fc_weight_param->tensor_shape();
  73. auto new_tensor_data = new (std::nothrow) int8_t[joint_fullconnect_size * tensor_size];
  74. if (new_tensor_data == nullptr) {
  75. MS_LOG(ERROR) << "tensor_data is nullptr";
  76. return RET_ERROR;
  77. }
  78. for (size_t i = 1; i < joint_fullconnect_size + 1; i++) {
  79. auto tensor_addr = GetInputAddr(stack_node->input(i), 2);
  80. if (tensor_addr == nullptr) {
  81. MS_LOG(ERROR) << "input tensor addr nullptr";
  82. return RET_ERROR;
  83. }
  84. if (EOK != memcpy_s(new_tensor_data + (i - 1) * tensor_size, tensor_size, tensor_addr, tensor_size)) {
  85. MS_LOG(ERROR) << "memcpy_s data failed";
  86. return RET_ERROR;
  87. }
  88. }
  89. rmatmul_input_shape.insert(rmatmul_input_shape.begin(), joint_fullconnect_size);
  90. auto type_ptr = TypeIdToType(fc_weight_param->tensor_type());
  91. auto abstract_tensor = std::make_shared<abstract::AbstractTensor>(type_ptr, rmatmul_input_shape);
  92. rmatmul_input->set_abstract(abstract_tensor);
  93. rmatmul_input->set_name(stack_node->fullname_with_scope() + "right_parameter");
  94. ParamValueLitePtr param_value = std::make_shared<ParamValueLite>();
  95. MS_ASSERT(param_value != nullptr);
  96. param_value->set_tensor_shape(rmatmul_input_shape);
  97. param_value->set_tensor_type(fc_weight_param->tensor_type());
  98. param_value->set_format(fc_weight_param->format());
  99. param_value->set_tensor_addr(new_tensor_data);
  100. param_value->set_tensor_size(joint_fullconnect_size * tensor_size);
  101. rmatmul_input->set_default_param(param_value);
  102. return RET_OK;
  103. }
  104. } // namespace
  105. const BaseRef BatchMatMulFusion::DefinePattern() const {
  106. auto pack_var = std::make_shared<CondVar>(IsStackNode);
  107. auto left_fullconnect_var = std::make_shared<CondVar>(IsFullConnectNode);
  108. auto right_fullconnect_var = std::make_shared<CondVar>(IsFullConnectNode);
  109. auto other_fullconnect_var = std::make_shared<SeqVar>();
  110. return VectorRef({pack_var, left_fullconnect_var, right_fullconnect_var, other_fullconnect_var});
  111. }
  112. // slice +fullconnect ->batchmatmul
  113. const AnfNodePtr BatchMatMulFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
  114. const EquivPtr &) const {
  115. MS_ASSERT(func_graph != nullptr);
  116. MS_ASSERT(node != nullptr);
  117. auto stack_cnode = node->cast<CNodePtr>();
  118. // check stack node all inputs must fullconnect
  119. for (size_t i = 1; i < stack_cnode->inputs().size(); i++) {
  120. auto input_node = stack_cnode->input(i);
  121. if (!IsFullConnectNode(input_node)) {
  122. MS_LOG(WARNING) << "batchmatmulfusion stack node all inputs must fullconnect type";
  123. return nullptr;
  124. }
  125. }
  126. auto fullconnect_node = stack_cnode->input(1);
  127. MS_ASSERT(fullconnnect_node != nullptr);
  128. auto fullconnect_cnode = fullconnect_node->cast<CNodePtr>();
  129. MS_ASSERT(fullconnect_cnode->inputs().size() == 3);
  130. auto left_slice_node = fullconnect_cnode->input(1);
  131. auto left_slice_cnode = left_slice_node->cast<CNodePtr>();
  132. auto left_matmul_input = left_slice_cnode->input(1);
  133. auto right_reshape_node = fullconnect_cnode->input(2);
  134. auto matmul_primitive = std::make_unique<schema::PrimitiveT>();
  135. std::unique_ptr<schema::MatMulT> attr = std::make_unique<schema::MatMulT>();
  136. matmul_primitive->value.type = schema::PrimitiveType_MatMul;
  137. matmul_primitive->value.value = attr.release();
  138. auto matmul_cvalue = lite::PrimitiveC::Create(matmul_primitive.release());
  139. // get matmul quantParams
  140. std::vector<schema::QuantParamT> jointed_quant_params;
  141. for (int i = 1; i < 9; i++) {
  142. auto fullconnect_node2 = stack_cnode->input(i)->cast<CNodePtr>();
  143. auto fc_prim = GetValueNode<std::shared_ptr<lite::PrimitiveC>>(fullconnect_node2->input(0));
  144. auto fc_input_quantParams = fc_prim->GetInputQuantParams();
  145. if (fc_input_quantParams.size() > 1 && !fc_input_quantParams[1].empty()) {
  146. jointed_quant_params.push_back(fc_input_quantParams[1][0]);
  147. }
  148. }
  149. auto fc_prim = GetValueNode<std::shared_ptr<lite::PrimitiveC>>(fullconnect_cnode->input(0));
  150. auto rmatmul_quant_params = fc_prim->GetInputQuantParams();
  151. rmatmul_quant_params.pop_back();
  152. rmatmul_quant_params.pop_back();
  153. // no bias quantParams
  154. rmatmul_quant_params.emplace_back(jointed_quant_params);
  155. matmul_cvalue->SetInputQuantParams(rmatmul_quant_params);
  156. matmul_cvalue->SetOutputQuantParams(fc_prim->GetOutputQuantParams());
  157. auto matmul_value_node = NewValueNode(std::shared_ptr<lite::PrimitiveC>(matmul_cvalue));
  158. std::vector<AnfNodePtr> matmul_inputs = {matmul_value_node, left_matmul_input};
  159. // batchmatmul right node may be const
  160. if (right_reshape_node->isa<Parameter>()) {
  161. // return stack_cnode;
  162. auto rmatmul_paramter = func_graph->add_parameter();
  163. if (GetRightMatmulInputParamter(stack_cnode, rmatmul_paramter) != RET_OK) {
  164. MS_LOG(ERROR) << "GetRightMatmulInputParamter failed";
  165. return node;
  166. }
  167. auto prim = GetValueNode<std::shared_ptr<lite::PrimitiveC>>(matmul_value_node);
  168. prim->GetPrimitiveT()->value.AsMatMul()->transposeB = true;
  169. matmul_inputs.push_back(rmatmul_paramter);
  170. } else {
  171. auto right_reshape_cnode = right_reshape_node->cast<CNodePtr>();
  172. MS_ASSERT(right_reshape_cnode->inputs().size() > 1);
  173. auto right_transpose_node = right_reshape_cnode->input(1);
  174. auto right_transpose_cnode = right_transpose_node->cast<CNodePtr>();
  175. auto right_slice_node = right_transpose_cnode->input(1);
  176. auto right_slice_cnode = right_slice_node->cast<CNodePtr>();
  177. auto right_matmul_input = right_slice_cnode->input(1);
  178. matmul_inputs.push_back(right_matmul_input);
  179. }
  180. auto matmul_cnode = func_graph->NewCNode(matmul_inputs);
  181. matmul_cnode->set_fullname_with_scope("matmul_" + stack_cnode->fullname_with_scope());
  182. MS_LOG(INFO) << "stack node:" << stack_cnode->fullname_with_scope() << " batchmatmul fusion success";
  183. return matmul_cnode;
  184. }
  185. } // namespace mindspore::opt