!727 [AutoParallel] complete cost for recursive programming

Merge pull request !727 from Chong/cost
5 years ago · 69ab46e624
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
@@ -446,51 +446,8 @@ StrategyRec CostPooling::ChoseStr(const std::vector<double> &cost_op, StrategyRe
  return str;
 }

 // Get optimal strategy for Add
 StrategyRec CostAdd::GetOptimalStr(const Graph::NodeType &node,
                                   const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                                   const Graph &graph) {
  int tensor_n = static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n);
  int tensor_c = static_cast<int>(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c);
  int tensor_h = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h);
  int tensor_w = static_cast<int>(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w);

  std::vector<double> cost_op;
  std::vector<std::vector<float>> mode;

  if (tensor_n < 2) {
    cost_op.push_back(DOUBLE_MAX);
  } else {
    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
                                           mode = {{0.5, 1, 1, 1}, {0.5, 1, 1, 1}, {0.5, 1, 1, 1}}, graph));
  }

  if (tensor_c < 2) {
    cost_op.push_back(DOUBLE_MAX);
  } else {
    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
                                           mode = {{1, 0.5, 1, 1}, {1, 0.5, 1, 1}, {1, 0.5, 1, 1}}, graph));
  }

  if (tensor_h < 2) {
    cost_op.push_back(DOUBLE_MAX);
  } else {
    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
                                           mode = {{1, 1, 0.5, 1}, {1, 1, 0.5, 1}, {1, 1, 0.5, 1}}, graph));
  }

  if (tensor_w < 2) {
    cost_op.push_back(DOUBLE_MAX);
  } else {
    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
                                           mode = {{1, 1, 1, 0.5}, {1, 1, 1, 0.5}, {1, 1, 1, 0.5}}, graph));
  }

  return ChoseStr(cost_op, node.apply.str);
 }

 // Chose strategy for Add
 StrategyRec CostAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
 StrategyRec CostTensorAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
  uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
  if (cost_op[min_position] > (DOUBLE_MAX - 0.1)) {
    return str;
@@ -540,49 +497,6 @@ StrategyRec CostReshape::GetOptimalStr(const Graph::NodeType &node) const { retu

 StrategyRec CostReshape::ChoseStr(StrategyRec str) const { return str; }

 // Get optimal strategy for Biasadd
 StrategyRec CostBiasAdd::GetOptimalStr(const Graph::NodeType &node,
                                       const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                                       const Graph &graph) {
  int tensor_n = static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n);
  int tensor_c = static_cast<int>(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c);
  int tensor_h = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h);
  int tensor_w = static_cast<int>(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w);

  std::vector<double> cost_op;
  std::vector<std::vector<float>> mode;

  if (tensor_n < 2) {
    cost_op.push_back(DOUBLE_MAX);
  } else {
    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
                                           mode = {{0.5, 1, 1, 1}, {0.5, 1, 1, 1}, {0.5, 1, 1, 1}}, graph));
  }

  if (tensor_c < 2) {
    cost_op.push_back(DOUBLE_MAX);
  } else {
    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
                                           mode = {{1, 0.5, 1, 1}, {1, 0.5, 1, 1}, {1, 0.5, 1, 1}}, graph));
  }

  if (tensor_h < 2) {
    cost_op.push_back(DOUBLE_MAX);
  } else {
    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
                                           mode = {{1, 1, 0.5, 1}, {1, 1, 0.5, 1}, {1, 1, 0.5, 1}}, graph));
  }

  if (tensor_w < 2) {
    cost_op.push_back(DOUBLE_MAX);
  } else {
    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
                                           mode = {{1, 1, 1, 0.5}, {1, 1, 1, 0.5}, {1, 1, 1, 0.5}}, graph));
  }

  return ChoseStr(cost_op, node.apply.str);
 }

 // Chose strategy for BiasAdd
 StrategyRec CostBiasAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
  uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
@@ -629,7 +543,7 @@ StrategyRec CostBiasAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRe
  return str;
 }

 // Get optimal strategy for Common OPs: ReLU and Softmax
 // Get optimal strategy for Common OPs
 StrategyRec CostCommon::GetOptimalStr(const Graph::NodeType &node,
                                      const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                                      const Graph &graph) {
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
@@ -157,21 +157,6 @@ class CostPooling {
  double cost_in_ = 0;
 };  // class CostPooling is used to compute the cost of Pooling operator.

 // class CostAdd is used to compute the cost of Add operator.
 class CostAdd {
 public:
  StrategyRec GetOptimalStr(const Graph::NodeType &node,
                            const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                            const Graph &graph);

  double GetMinCostIn() const { return cost_in_; }

 private:
  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);

  double cost_in_ = 0;
 };  // class CostAdd is used to compute the cost of Add operator.

 // class CostReshape is used to compute the cost of Reshape operator.
 class CostReshape {
 public:
@@ -185,35 +170,41 @@ class CostReshape {
  double cost_in_ = 0;
 };  // class CostReshape is used to compute the cost of Reshape operator.

 // class CostBiasAdd is used to compute the cost of BiasAdd operator.
 class CostBiasAdd {
 // class CostCommon is used to compute the cost of an element-wise operator
 class CostCommon {
 public:
  StrategyRec GetOptimalStr(const Graph::NodeType &node,
                            const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                            const Graph &graph);
  virtual StrategyRec GetOptimalStr(const Graph::NodeType &node,
                                    const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                                    const Graph &graph);

  double GetMinCostIn() const { return cost_in_; }
  virtual double GetMinCostIn() const { return cost_in_; }

 private:
  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
 protected:
  virtual StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);

  double cost_in_ = 0;
 };  // class CostBiasAdd is used to compute the cost of BiasAdd operator.

 // class CostCommon is used to compute the cost of the element independent operator.
 class CostCommon {
 public:
  StrategyRec GetOptimalStr(const Graph::NodeType &node,
                            const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                            const Graph &graph);

  double GetMinCostIn() const { return cost_in_; }
 };  // class CostCommon is used to compute the cost of an element-wise operator

 private:
 // class CostBiasAdd is used to compute the cost of the addition between a tensor and a bias
 class CostBiasAdd : public CostCommon {
  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);

  double cost_in_ = 0;
 };  // class CostCommon is used to compute the cost of Softmax & || Activation operator.
 };
 // class CostAdd is used to compute the cost of Add operator.
 class CostTensorAdd : public CostCommon {
  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
 };

 // all the following operation are element-wise and have the same cost
 class CostOneHot : public CostCommon {};
 class CostReLU : public CostCommon {};
 class CostLog : public CostCommon {};
 class CostExp : public CostCommon {};
 class CostAdd : public CostCommon {};
 class CostSub : public CostCommon {};
 class CostMul : public CostCommon {};
 class CostDiv : public CostCommon {};
 class CostSqueeze : public CostCommon {};
 class CostCast : public CostCommon {};

 // class BatchNorm is used to compute the cost of BatchNorm operator.
 class CostBatchNorm {
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
@@ -38,6 +38,12 @@ void GenerateStrategy(std::shared_ptr<Graph> graph, bool mask_special_ops,
    for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
      stra.push_back(PrepareStrategy(graph, ops, iter_ops, iter_op_inputs));
    }
    // OneHot's scalar parameters were removed by entire_costgraph, we had to complete them.
    if (ops[iter_ops]->type() == ONEHOT) {
      std::vector<int32_t> s_Onehot = {};
      stra.push_back(s_Onehot);
      stra.push_back(s_Onehot);
    }
    StrategyPtr sp = std::make_shared<Strategy>(0, stra);
    ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost());
  }
@@ -201,12 +207,13 @@ std::vector<int32_t> PrepareStrategy(const std::shared_ptr<Graph> &graph,
  }
 }

 // use to respect strategy checks of auto parallel
 void MaskSpecialOps(std::shared_ptr<Graph> graph) {
  size_t iter_nodes = graph->nodes.size();
  for (size_t i = 0; i < iter_nodes; i++) {
    Graph::NodeType &node = graph->nodes[i];

    if (node.apply.op_type == 1) {  // For Convolution
    if (node.apply.op_type == kRecConvolution) {  // For convolution
      // cover input tensor strategy
      node.apply.arguments[0].tensor_str.str_n = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
      node.apply.arguments[0].tensor_str.str_c = 1;
@@ -217,19 +224,12 @@ void MaskSpecialOps(std::shared_ptr<Graph> graph) {
      node.apply.arguments[1].tensor_str.str_c = 1;
      node.apply.arguments[1].tensor_str.str_h = 1;
      node.apply.arguments[1].tensor_str.str_w = 1;
    } else if (node.apply.op_type == 8) {  // For BN
      node.apply.arguments[0].tensor_str.str_n = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
      node.apply.arguments[0].tensor_str.str_c = 1;
    } else if (node.apply.op_type == kRecBiasAdd || node.apply.op_type == kRecMatMul) {
      // For MatMul and BiasAdd
      node.apply.arguments[0].tensor_str.str_h = 1;
      node.apply.arguments[0].tensor_str.str_w = 1;
      // cover 1-d argument blobs
      node.apply.arguments[1].tensor_str.str_n = 1;
      node.apply.arguments[2].tensor_str.str_c = 1;
      node.apply.arguments[3].tensor_str.str_h = 1;
      node.apply.arguments[4].tensor_str.str_w = 1;
    } else if (node.apply.op_type == 4 || node.apply.op_type == 9) {  // For SparseSoftmaxCrossEntropyWithLogits
      node.tensor_parm.tensor_str.str_h = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
      node.tensor_parm.tensor_str.str_w = 1;
      node.apply.arguments[1].tensor_str.str_h = 1;
      node.apply.arguments[1].tensor_str.str_w = 1;
    }
  }
 }
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
@@ -27,17 +27,26 @@
 namespace mindspore {
 namespace parallel {
 enum OperatorType {
  kRecUnkownType,
  kRecMatMul,
  kRecConvolution,
  kRecPooling,
  kRecAdd,
  kRecSoftmax,
  kRecReshape,
  kRecBiasAdd,
  kRecTensorAdd,
  kRecReLU,
  kRecBatchNorm,
  kRecReshape,
  kRecBiasAdd,
  kRecSoftmax,
  kRecSparseSoftmaxCrossEntropyWithLogits,
  kRecUnkownType
  kRecOneHot,
  kRecLog,
  kRecExp,
  kRecAdd,
  kRecSub,
  kRecMul,
  kRecDiv,
  kRecSqueeze,
  kRecCast
 };

 enum InfoType { kApplication, kConstant };
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
@@ -1,187 +1,187 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "parallel/auto_parallel/rec_core/rec_parse_graph.h"

 #include <algorithm>
 #include <memory>
 #include <string>
 #include <vector>

 #include "ir/value.h"
 #include "parallel/auto_parallel/rec_core/rec_graph.h"
 #include "parallel/auto_parallel/rec_core/rec_tensor.h"
 #include "parallel/ops_info/operator_info.h"

 namespace mindspore {
 namespace parallel {
 const TensorParam MakeTensor(int n, int c, int h, int w) {
  TensorParam new_tensor;
  new_tensor.tensor_type = kFloat32;
  new_tensor.tensor_shape.shape_n = n;
  new_tensor.tensor_shape.shape_c = c;
  new_tensor.tensor_shape.shape_h = h;
  new_tensor.tensor_shape.shape_w = w;
  const TensorParam &tensor = new_tensor;
  return tensor;
 }

 Graph::NodeType MakeNewOperator(std::vector<std::shared_ptr<OperatorInfo>> ops, size_t iter_ops) {
  Graph::NodeType NewOp;
  NewOp.name = ops[iter_ops]->name();
  NewOp.info = InfoType::kApplication;

  auto op_type = ops[iter_ops]->type();
  auto idx = DictOpType.find(op_type);
  if (idx == DictOpType.end()) {
    NewOp.apply.op_type = OperatorType::kRecUnkownType;
    MS_LOG(INFO) << "Unknown operator type.";
  } else {
    NewOp.apply.op_type = DictOpType.at(op_type);
  }

  if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 4) {
    NewOp.tensor_parm = MakeTensor(
      ops[iter_ops]->outputs_tensor_info()[0].shape()[0], ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
      ops[iter_ops]->outputs_tensor_info()[0].shape()[2], ops[iter_ops]->outputs_tensor_info()[0].shape()[3]);
  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 2) {
    NewOp.tensor_parm = Fill2DTensor(ops, iter_ops, NewOp);
  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 1) {
    NewOp.tensor_parm = MakeTensor(1, 1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 0) {
    NewOp.tensor_parm = MakeTensor(1, 1, 1, 1);
  } else {
    MS_LOG(ERROR) << "Tensor's shape is unknown.";
  }

  NewOp.apply = CompleteOperatorInputs(ops, iter_ops, NewOp);
  return NewOp;
 }

 TensorParam Fill2DTensor(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
                         Graph::NodeType NewTensor) {
  if (NewTensor.apply.op_type == OperatorType::kRecMatMul) {
    auto attrs = ops[iter_ops]->attrs();
    bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
    bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
    if (transpose_a) {
      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[1],
                                         ops[iter_ops]->inputs_tensor_info()[0].shape()[0]);
    } else if (transpose_b) {
      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[1],
                                         ops[iter_ops]->inputs_tensor_info()[0].shape()[0]);
    } else {
      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[0],
                                         ops[iter_ops]->inputs_tensor_info()[0].shape()[1]);
    }
  } else {
    NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[0],
                                       ops[iter_ops]->inputs_tensor_info()[0].shape()[1]);
  }
  return NewTensor.tensor_parm;
 }

 OperatorRec CompleteOperatorInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
                                   Graph::NodeType NewTensor) {
  for (size_t iter_input_tensors = 0; iter_input_tensors < ops[iter_ops]->inputs_tensor_info().size();
       iter_input_tensors++) {
    if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 4) {
      NewTensor.apply.arguments[iter_input_tensors] =
        MakeTensor(ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[2],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[3]);
    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 2) {
      NewTensor.apply.arguments[iter_input_tensors] = Complete2DInputs(ops, iter_ops, iter_input_tensors, NewTensor);
    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 1) {
      NewTensor.apply.arguments[iter_input_tensors] =
        MakeTensor(1, 1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 0) {
      NewTensor.apply.arguments[iter_input_tensors] = MakeTensor(1, 1, 1, 1);
    } else {
      MS_LOG(ERROR) << "Tensor's shape is unknown.";
    }
  }
  return NewTensor.apply;
 }

 TensorParam Complete2DInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
                             const size_t iter_input_tensors, Graph::NodeType NewTensor) {
  if (NewTensor.apply.op_type == OperatorType::kRecMatMul) {
    auto attrs = ops[iter_ops]->attrs();
    bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
    bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
    if (transpose_a && (iter_input_tensors == 0)) {
      NewTensor.apply.arguments[iter_input_tensors] =
        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
    } else if (transpose_b && (iter_input_tensors == 1)) {
      NewTensor.apply.arguments[iter_input_tensors] =
        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
    } else {
      NewTensor.apply.arguments[iter_input_tensors] =
        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]);
    }
  } else {
    NewTensor.apply.arguments[iter_input_tensors] =
      MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
                 ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]);
  }
  return NewTensor.apply.arguments[iter_input_tensors];
 }

 std::shared_ptr<Graph> ParseGraph(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
                                  const std::vector<std::vector<std::string>> &input_tensor_names) {
  std::shared_ptr<Graph> graph(new Graph);
  if (ops.size() > SIZE_MAX / 2) {
    MS_LOG(EXCEPTION) << "Total number of operators is bigger than " << SIZE_MAX / 2;
  }

  for (size_t iter_ops = 0; iter_ops < ops.size(); iter_ops++) {
    Graph::NodeType NewOp = MakeNewOperator(ops, iter_ops);
    graph->nodes.push_back(NewOp);
  }
  MakeEdge(input_tensor_names, graph);

  return graph;
 }

 void MakeEdge(const std::vector<std::vector<std::string>> &input_tensor_names, std::shared_ptr<Graph> graph) {
  for (size_t iter_i = 0; iter_i < input_tensor_names.size(); iter_i++) {
    for (size_t iter_j = 1; iter_j < input_tensor_names[iter_i].size(); iter_j++) {
      size_t head_node_index = GetIndexInInputTensorNames(input_tensor_names, input_tensor_names[iter_i][iter_j]);
      if (head_node_index < SIZE_MAX / 2 && head_node_index != iter_i) {
        graph->nodes[iter_i].node_in.push_back(head_node_index);
        graph->nodes[head_node_index].node_out.push_back(iter_i);
      }
    }
  }
 }

 size_t GetIndexInInputTensorNames(const std::vector<std::vector<std::string>> &input_tensor_name,
                                  const std::string &input_name) {
  for (size_t index = 0; index < input_tensor_name.size(); index++) {
    if (input_tensor_name[index][0] == input_name) {
      return index;
    }
  }
  MS_LOG(INFO) << "Get index failed, using SIZE_MAX insted";
  return SIZE_MAX;
 }
 }  // namespace parallel
 }  // namespace mindspore
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include "parallel/auto_parallel/rec_core/rec_parse_graph.h"

 #include <algorithm>
 #include <memory>
 #include <string>
 #include <vector>

 #include "ir/value.h"
 #include "parallel/auto_parallel/rec_core/rec_graph.h"
 #include "parallel/auto_parallel/rec_core/rec_tensor.h"
 #include "parallel/ops_info/operator_info.h"

 namespace mindspore {
 namespace parallel {
 const TensorParam MakeTensor(int n, int c, int h, int w) {
  TensorParam new_tensor;
  new_tensor.tensor_type = kFloat32;
  new_tensor.tensor_shape.shape_n = n;
  new_tensor.tensor_shape.shape_c = c;
  new_tensor.tensor_shape.shape_h = h;
  new_tensor.tensor_shape.shape_w = w;
  const TensorParam &tensor = new_tensor;
  return tensor;
 }

 Graph::NodeType MakeNewOperator(std::vector<std::shared_ptr<OperatorInfo>> ops, size_t iter_ops) {
  Graph::NodeType NewOp;
  NewOp.name = ops[iter_ops]->name();
  NewOp.info = InfoType::kApplication;

  auto op_type = ops[iter_ops]->type();
  auto idx = DictOpType.find(op_type);
  if (idx == DictOpType.end()) {
    NewOp.apply.op_type = OperatorType::kRecUnkownType;
    MS_LOG(INFO) << "Unknown operator type.";
  } else {
    NewOp.apply.op_type = DictOpType.at(op_type);
  }

  if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 4) {
    NewOp.tensor_parm = MakeTensor(
      ops[iter_ops]->outputs_tensor_info()[0].shape()[0], ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
      ops[iter_ops]->outputs_tensor_info()[0].shape()[2], ops[iter_ops]->outputs_tensor_info()[0].shape()[3]);
  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 2) {
    NewOp.tensor_parm = Fill2DTensor(ops, iter_ops, NewOp);
  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 1) {
    NewOp.tensor_parm = MakeTensor(1, 1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 0) {
    NewOp.tensor_parm = MakeTensor(1, 1, 1, 1);
  } else {
    MS_LOG(ERROR) << "Tensor's shape is unknown.";
  }

  NewOp.apply = CompleteOperatorInputs(ops, iter_ops, NewOp);
  return NewOp;
 }

 TensorParam Fill2DTensor(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
                         Graph::NodeType NewTensor) {
  if (NewTensor.apply.op_type == OperatorType::kRecMatMul) {
    auto attrs = ops[iter_ops]->attrs();
    bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
    bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
    if (transpose_a) {
      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
                                         ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
    } else if (transpose_b) {
      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
                                         ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
    } else {
      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0],
                                         ops[iter_ops]->outputs_tensor_info()[0].shape()[1]);
    }
  } else {
    NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0],
                                       ops[iter_ops]->outputs_tensor_info()[0].shape()[1]);
  }
  return NewTensor.tensor_parm;
 }

 OperatorRec CompleteOperatorInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
                                   Graph::NodeType NewTensor) {
  for (size_t iter_input_tensors = 0; iter_input_tensors < ops[iter_ops]->inputs_tensor_info().size();
       iter_input_tensors++) {
    if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 4) {
      NewTensor.apply.arguments[iter_input_tensors] =
        MakeTensor(ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[2],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[3]);
    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 2) {
      NewTensor.apply.arguments[iter_input_tensors] = Complete2DInputs(ops, iter_ops, iter_input_tensors, NewTensor);
    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 1) {
      NewTensor.apply.arguments[iter_input_tensors] =
        MakeTensor(1, 1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 0) {
      NewTensor.apply.arguments[iter_input_tensors] = MakeTensor(1, 1, 1, 1);
    } else {
      MS_LOG(ERROR) << "Tensor's shape is unknown.";
    }
  }
  return NewTensor.apply;
 }

 TensorParam Complete2DInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
                             const size_t iter_input_tensors, Graph::NodeType NewTensor) {
  if (NewTensor.apply.op_type == OperatorType::kRecMatMul) {
    auto attrs = ops[iter_ops]->attrs();
    bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
    bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
    if (transpose_a && (iter_input_tensors == 0)) {
      NewTensor.apply.arguments[iter_input_tensors] =
        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
    } else if (transpose_b && (iter_input_tensors == 1)) {
      NewTensor.apply.arguments[iter_input_tensors] =
        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
    } else {
      NewTensor.apply.arguments[iter_input_tensors] =
        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]);
    }
  } else {
    NewTensor.apply.arguments[iter_input_tensors] =
      MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
                 ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]);
  }
  return NewTensor.apply.arguments[iter_input_tensors];
 }

 std::shared_ptr<Graph> ParseGraph(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
                                  const std::vector<std::vector<std::string>> &input_tensor_names) {
  std::shared_ptr<Graph> graph(new Graph);
  if (ops.size() > SIZE_MAX / 2) {
    MS_LOG(EXCEPTION) << "Total number of operators is bigger than " << SIZE_MAX / 2;
  }

  for (size_t iter_ops = 0; iter_ops < ops.size(); iter_ops++) {
    Graph::NodeType NewOp = MakeNewOperator(ops, iter_ops);
    graph->nodes.push_back(NewOp);
  }
  MakeEdge(input_tensor_names, graph);

  return graph;
 }

 void MakeEdge(const std::vector<std::vector<std::string>> &input_tensor_names, std::shared_ptr<Graph> graph) {
  for (size_t iter_i = 0; iter_i < input_tensor_names.size(); iter_i++) {
    for (size_t iter_j = 1; iter_j < input_tensor_names[iter_i].size(); iter_j++) {
      size_t head_node_index = GetIndexInInputTensorNames(input_tensor_names, input_tensor_names[iter_i][iter_j]);
      if (head_node_index < SIZE_MAX / 2 && head_node_index != iter_i) {
        graph->nodes[iter_i].node_in.push_back(head_node_index);
        graph->nodes[head_node_index].node_out.push_back(iter_i);
      }
    }
  }
 }

 size_t GetIndexInInputTensorNames(const std::vector<std::vector<std::string>> &input_tensor_name,
                                  const std::string &input_name) {
  for (size_t index = 0; index < input_tensor_name.size(); index++) {
    if (input_tensor_name[index][0] == input_name) {
      return index;
    }
  }
  MS_LOG(INFO) << "Get index failed, using SIZE_MAX insted";
  return SIZE_MAX;
 }
 }  // namespace parallel
 }  // namespace mindspore
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
@@ -31,15 +31,23 @@ namespace parallel {
 const std::map<std::string, OperatorType> DictOpType{
  {MATMUL, OperatorType::kRecMatMul},
  {CONV2D, OperatorType::kRecConvolution},
  {MAXPOOL, OperatorType::kRecPooling},
  {MAXPOOLV2, OperatorType::kRecPooling},
  {SIMPLE_MEAN, OperatorType::kRecPooling},
  {TENSOR_ADD, OperatorType::kRecAdd},
  {TENSOR_ADD, OperatorType::kRecTensorAdd},
  {RESHAPE, OperatorType::kRecReshape},
  {BIAS_ADD, OperatorType::kRecBiasAdd},
  {RELU, OperatorType::kRecReLU},
  {BATCH_NORM, OperatorType::kRecBatchNorm},
  {SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits},
 };
  {ONEHOT, OperatorType::kRecOneHot},
  {LOG, OperatorType::kRecLog},
  {EXP, OperatorType::kRecExp},
  {SUB, OperatorType::kRecSub},
  {MUL, OperatorType::kRecMul},
  {DIV, OperatorType::kRecDiv},
  {SQUEEZE, OperatorType::kRecSqueeze},
  {CAST, OperatorType::kRecCast}};

 const TensorParam MakeTensor(int n, int c, int h, int w);

--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
@@ -48,14 +48,14 @@ double GetWeights(const Graph::NodeType &node) {
    auto cost_ptr = std::make_shared<CostPooling>();

    return cost_ptr->GetMinCostIn();
  } else if (op.op_type == OperatorType::kRecAdd) {
    // For Add
    auto cost_ptr = std::make_shared<CostAdd>();
  } else if (op.op_type == OperatorType::kRecTensorAdd) {
    // For TensorAdd
    auto cost_ptr = std::make_shared<CostTensorAdd>();

    return cost_ptr->GetMinCostIn();
  } else if (op.op_type == OperatorType::kRecSoftmax || op.op_type == OperatorType::kRecReLU ||
  } else if (op.op_type == OperatorType::kRecReLU || op.op_type == OperatorType::kRecSoftmax ||
             op.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
    // For Softmax & || Activation
    // For Activation and Softmax
    auto cost_ptr = std::make_shared<CostCommon>();

    return cost_ptr->GetMinCostIn();
@@ -73,6 +73,15 @@ double GetWeights(const Graph::NodeType &node) {
    // For BatchNorm
    auto cost_ptr = std::make_shared<CostBatchNorm>();

    return cost_ptr->GetMinCostIn();
  } else if (op.op_type == OperatorType::kRecOneHot || op.op_type == OperatorType::kRecLog ||
             op.op_type == OperatorType::kRecExp || op.op_type == OperatorType::kRecAdd ||
             op.op_type == OperatorType::kRecSub || op.op_type == OperatorType::kRecMul ||
             op.op_type == OperatorType::kRecDiv || op.op_type == OperatorType::kRecSqueeze ||
             op.op_type == OperatorType::kRecCast) {
    // For element-wise op
    auto cost_ptr = std::make_shared<CostCommon>();

    return cost_ptr->GetMinCostIn();
  } else if (op.op_type == OperatorType::kRecUnkownType) {
    // For unknown type
@@ -117,47 +126,57 @@ StrategyRec PartitionNode(const Graph::NodeType &node,
                          std::shared_ptr<Graph> graph) {
  MS_EXCEPTION_IF_NULL(graph);

  if (node.apply.op_type == 0) {
  if (node.apply.op_type == OperatorType::kRecMatMul) {
    // For MatMul
    auto cost_ptr = std::make_shared<CostMatMul>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
  } else if (node.apply.op_type == 1) {
  } else if (node.apply.op_type == OperatorType::kRecConvolution) {
    // For Convolution
    auto cost_ptr = std::make_shared<CostConvolution>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
  } else if (node.apply.op_type == 2) {
  } else if (node.apply.op_type == OperatorType::kRecPooling) {
    // For Pooling
    auto cost_ptr = std::make_shared<CostPooling>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
  } else if (node.apply.op_type == 3) {
    // For Add
    auto cost_ptr = std::make_shared<CostAdd>();
  } else if (node.apply.op_type == OperatorType::kRecTensorAdd) {
    // For TensorAdd
    auto cost_ptr = std::make_shared<CostTensorAdd>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
  } else if (node.apply.op_type == 4 || node.apply.op_type == 7 || node.apply.op_type == 9) {
  } else if (node.apply.op_type == OperatorType::kRecReLU || node.apply.op_type == OperatorType::kRecSoftmax ||
             node.apply.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
    // For Softmax & Activation
    auto cost_ptr = std::make_shared<CostCommon>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
  } else if (node.apply.op_type == 5) {
  } else if (node.apply.op_type == OperatorType::kRecReshape) {
    // For Reshape
    auto cost_ptr = std::make_shared<CostReshape>();

    return cost_ptr->GetOptimalStr(node);
  } else if (node.apply.op_type == 6) {
  } else if (node.apply.op_type == OperatorType::kRecBiasAdd) {
    // For BiasAdd
    auto cost_ptr = std::make_shared<CostBiasAdd>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
  } else if (node.apply.op_type == 8) {
  } else if (node.apply.op_type == OperatorType::kRecBatchNorm) {
    // For BatchNorm
    auto cost_ptr = std::make_shared<CostBatchNorm>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
  } else if (node.apply.op_type == 10) {
  } else if (node.apply.op_type == OperatorType::kRecOneHot || node.apply.op_type == OperatorType::kRecLog ||
             node.apply.op_type == OperatorType::kRecExp || node.apply.op_type == OperatorType::kRecAdd ||
             node.apply.op_type == OperatorType::kRecSub || node.apply.op_type == OperatorType::kRecMul ||
             node.apply.op_type == OperatorType::kRecDiv || node.apply.op_type == OperatorType::kRecSqueeze ||
             node.apply.op_type == OperatorType::kRecCast) {
    // For element-wise op
    auto cost_ptr = std::make_shared<CostCommon>();

    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
  } else if (node.apply.op_type == OperatorType::kRecUnkownType) {
    // For unknown type
    StrategyRec default_strategy;
    return default_strategy;