!12044 modify pack to stack

From: @jinyaohui Reviewed-by: Signed-off-by:
5 years ago · 74652eb942
--- a/config/op_info.config
+++ b/config/op_info.config
@@ -23,7 +23,7 @@
 {"op_name": "ExpandDims", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]]], "imply_type": "AiCPU"}
 {"op_name": "Randperm", "inputs": [{"index": 0, "name": "n", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "max_length", "type": "int"}, {"name": "pad", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int64", "DefaultFormat"]], [["int32", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["int32", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["uint64", "DefaultFormat"]]], "imply_type": "AiCPU"}
 {"op_name": "RandomChoiceWithMask", "inputs": [{"index": 0, "name": "x", "param_type": "required"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}, {"index": 1, "name": "mask", "param_type": "required"}], "attr": [{"name": "count", "type": "int"}, {"name": "seed", "type": "int"}, {"name": "seed2", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AiCPU"}
 {"op_name": "Pack", "inputs": [{"index": 0, "name": "x", "param_type": "dynamic"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "axis", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AiCPU"}
 {"op_name": "Stack", "inputs": [{"index": 0, "name": "x", "param_type": "dynamic"}], "outputs": [{"index": 0, "name": "y", "param_type": "required"}], "attr": [{"name": "axis", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["float64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "AiCPU"}
 {"op_name": "UniformCandidateSampler", "inputs": [{"index": 0, "name": "true_classes", "param_type": "required"}], "outputs": [{"index": 0, "name": "sampled_candidates", "param_type": "required"}, {"index": 1, "name": "true_expected_count", "param_type": "required"}, {"index": 2, "name": "true_expected_count", "param_type": "required"}], "attr": [{"name": "num_true", "type": "int"}, {"name": "num_sampled", "type": "int"}, {"name": "unique", "type": "bool"}, {"name": "range_max", "type": "int"}, {"name": "seed", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AiCPU"}
 {"op_name": "LogUniformCandidateSampler", "inputs": [{"index": 0, "name": "true_classes", "param_type": "required"}], "outputs": [{"index": 0, "name": "sampled_candidates", "param_type": "required"}, {"index": 1, "name": "true_expected_count", "param_type": "required"}, {"index": 2, "name": "true_expected_count", "param_type": "required"}], "attr": [{"name": "num_true", "type": "int"}, {"name": "num_sampled", "type": "int"}, {"name": "unique", "type": "bool"}, {"name": "range_max", "type": "int"}, {"name": "seed", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "AiCPU"}
 {"op_name": "ComputeAccidentalHits", "inputs": [{"index": 0, "name": "true_classes", "param_type": "required"}, {"index": 1, "name": "sampled_candidates", "param_type": "required"}], "outputs": [{"index": 0, "name": "indices", "param_type": "required"}, {"index": 1, "name": "ids", "param_type": "required"}, {"index": 2, "name": "weights", "param_type": "required"}], "attr": [{"name": "num_true", "type": "int"}], "fusion_type": "OPAQUE", "dtype_format": [[["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float64", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["float64", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["int64", "DefaultFormat"], ["float16", "DefaultFormat"]]], "imply_type": "AiCPU"}
@@ -338,8 +338,8 @@
 {"op_name": "BNTrainingUpdateV2", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float16", ""], ["float32", ""], ["float32", ""]], [["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""], ["float32", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_update_v2.so", "compute_cost": 10, "kernel_name": "bn_training_update_v2", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "dynamicFormat"}
 {"op_name": "BNTrainingUpdateV3", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "square_sum", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "scale", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "offset", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all", "reshape_type": "NC"}, {"index": 1, "name": "batch_mean", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "batch_variance", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 3, "name": "reserve_1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 4, "name": "reserve_2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "epsilon", "param_type": "required", "type": "float", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float16", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "bn_training_update_v3.so", "compute_cost": 10, "kernel_name": "bn_training_update_v3", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
 {"op_name": "SquareSumAll", "inputs": [{"index": 0, "name": "x1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "x2", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y1", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "y2", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "OPAQUE", "dtype_format": [[["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"], ["float32", "FracZ"]], [["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"], ["float32", "C1HWNCoC0"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "square_sum_all.so", "compute_cost": 10, "kernel_name": "square_sum_all", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
 {"op_name": "Pack", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "NDHWC"], ["int8", "NDHWC"]], [["int16", "NDHWC"], ["int16", "NDHWC"]], [["int32", "NDHWC"], ["int32", "NDHWC"]], [["int64", "NDHWC"], ["int64", "NDHWC"]], [["uint8", "NDHWC"], ["uint8", "NDHWC"]], [["uint16", "NDHWC"], ["uint16", "NDHWC"]], [["uint32", "NDHWC"], ["uint32", "NDHWC"]], [["uint64", "NDHWC"], ["uint64", "NDHWC"]], [["float16", "NDHWC"], ["float16", "NDHWC"]], [["float32", "NDHWC"], ["float32", "NDHWC"]], [["bool", "NDHWC"], ["bool", "NDHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "pack.so", "compute_cost": 10, "kernel_name": "pack", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": true, "op_pattern": ""}
 {"op_name": "Unpack", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "attr": [{"name": "num", "param_type": "optional", "type": "int", "value": "all"}, {"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "unpack.so", "compute_cost": 10, "kernel_name": "unpack", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "dynamicFormat"}
 {"op_name": "Stack", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "axis", "param_type": "optional", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["int16", "DefaultFormat"], ["int16", "DefaultFormat"]], [["int32", "DefaultFormat"], ["int32", "DefaultFormat"]], [["int64", "DefaultFormat"], ["int64", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["uint16", "DefaultFormat"], ["uint16", "DefaultFormat"]], [["uint32", "DefaultFormat"], ["uint32", "DefaultFormat"]], [["uint64", "DefaultFormat"], ["uint64", "DefaultFormat"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["bool", "DefaultFormat"], ["bool", "DefaultFormat"]], [["int8", "NDHWC"], ["int8", "NDHWC"]], [["int16", "NDHWC"], ["int16", "NDHWC"]], [["int32", "NDHWC"], ["int32", "NDHWC"]], [["int64", "NDHWC"], ["int64", "NDHWC"]], [["uint8", "NDHWC"], ["uint8", "NDHWC"]], [["uint16", "NDHWC"], ["uint16", "NDHWC"]], [["uint32", "NDHWC"], ["uint32", "NDHWC"]], [["uint64", "NDHWC"], ["uint64", "NDHWC"]], [["float16", "NDHWC"], ["float16", "NDHWC"]], [["float32", "NDHWC"], ["float32", "NDHWC"]], [["bool", "NDHWC"], ["bool", "NDHWC"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "pack.so", "compute_cost": 10, "kernel_name": "pack", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": true, "op_pattern": ""}
 {"op_name": "Unstack", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "dynamic", "shape": "all"}], "attr": [{"name": "num", "param_type": "optional", "type": "int", "value": "all"}, {"name": "axis", "param_type": "required", "type": "int", "value": "all"}], "fusion_type": "OPAQUE", "dtype_format": [[["", ""], ["", ""]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "unpack.so", "compute_cost": 10, "kernel_name": "unpack", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": "dynamicFormat"}
 {"op_name": "ScatterUpdate", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 2, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_update.so", "compute_cost": 10, "kernel_name": "scatter_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
 {"op_name": "ScatterUpdate", "inputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "indices", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "updates", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "var", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [{"name": "use_locking", "param_type": "optional", "type": "bool", "value": "all"}], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "DefaultFormat"], ["int32", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "DefaultFormat"], ["int32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]], [["int8", "DefaultFormat"], ["int32", "DefaultFormat"], ["int8", "DefaultFormat"], ["int8", "DefaultFormat"]], [["uint8", "DefaultFormat"], ["int32", "DefaultFormat"], ["uint8", "DefaultFormat"], ["uint8", "DefaultFormat"]], [["bool", "DefaultFormat"], ["int32", "DefaultFormat"], ["bool", "DefaultFormat"], ["bool", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "scatter_update.so", "compute_cost": 10, "kernel_name": "scatter_update", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": true, "need_check_supported": false, "op_pattern": ""}
 {"op_name": "PReLU", "inputs": [{"index": 0, "name": "x", "need_compile": false, "param_type": "required", "shape": "all"}, {"index": 1, "name": "weight", "need_compile": false, "param_type": "required", "shape": "all"}], "outputs": [{"index": 0, "name": "y", "need_compile": false, "param_type": "required", "shape": "all"}], "attr": [], "fusion_type": "ELEMWISE", "dtype_format": [[["float16", "NC1HWC0"], ["float16", "NC1HWC0"], ["float16", "NC1HWC0"]], [["float16", "DefaultFormat"], ["float16", "DefaultFormat"], ["float16", "DefaultFormat"]], [["float32", "NC1HWC0"], ["float32", "NC1HWC0"], ["float32", "NC1HWC0"]], [["float32", "DefaultFormat"], ["float32", "DefaultFormat"], ["float32", "DefaultFormat"]]], "imply_type": "TBE", "async_flag": false, "binfile_name": "prelu.so", "compute_cost": 10, "kernel_name": "prelu", "partial_flag": true, "reshape_type": "", "dynamic_format": false, "dynamic_shape": false, "need_check_supported": false, "op_pattern": ""}
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_kernel_mod.cc
@@ -142,6 +142,9 @@ bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::
  if (node_name_ == kTopK) {
    node_name_ = kTopKV2;
  }
  if (node_name_ == kStack) {
    node_name_ = kPack;
  }
  MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
               << ", args_size:" << args_.length();
  if (rtCpuKernelLaunch(reinterpret_cast<const void *>(node_so_.c_str()),
@@ -182,6 +185,10 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
    node_name_ = kTopKV2;
  }

  if (node_name_ == kStack) {
    node_name_ = kPack;
  }

  AicpuTaskInfoPtr task_info_ptr =
    make_shared<ge::model_runner::AicpuTaskInfo>(kernel_name_, stream_id, node_so_, node_name_, node_def_str_,
                                                 ext_info_, input_data_addrs, output_data_addrs, NeedDump());
--- a/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.h
@@ -43,6 +43,7 @@ constexpr auto kSeed1 = "Seed1";
 constexpr auto kSeed2 = "seed2";
 constexpr auto kTopK = "TopK";
 constexpr auto kTopKV2 = "TopKV2";
 constexpr auto kStack = "Stack";
 constexpr auto kEditDistance = "EditDistance";
 constexpr auto kGatherD = "GatherD";
 constexpr auto kIdentity = "Identity";
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/pack_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/pack_cpu_kernel.h
@@ -44,38 +44,38 @@ class PackCpuFwdKernel : public CPUKernel {
  std::unique_ptr<T *[]> inputs_host_;
 };

 MS_REG_CPU_KERNEL_T(Pack,
 MS_REG_CPU_KERNEL_T(Stack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
                    PackCpuFwdKernel, int8_t)
 MS_REG_CPU_KERNEL_T(Pack,
 MS_REG_CPU_KERNEL_T(Stack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
                    PackCpuFwdKernel, int16_t)
 MS_REG_CPU_KERNEL_T(Pack,
 MS_REG_CPU_KERNEL_T(Stack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
                    PackCpuFwdKernel, int32_t)
 MS_REG_CPU_KERNEL_T(Pack,
 MS_REG_CPU_KERNEL_T(Stack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
                    PackCpuFwdKernel, int64_t)
 MS_REG_CPU_KERNEL_T(Pack,
 MS_REG_CPU_KERNEL_T(Stack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
                    PackCpuFwdKernel, uint8_t)
 MS_REG_CPU_KERNEL_T(Pack,
 MS_REG_CPU_KERNEL_T(Stack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
                    PackCpuFwdKernel, bool)
 MS_REG_CPU_KERNEL_T(Pack,
 MS_REG_CPU_KERNEL_T(Stack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
                    PackCpuFwdKernel, uint16_t)
 MS_REG_CPU_KERNEL_T(Pack,
 MS_REG_CPU_KERNEL_T(Stack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
                    PackCpuFwdKernel, uint32_t)
 MS_REG_CPU_KERNEL_T(Pack,
 MS_REG_CPU_KERNEL_T(Stack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
                    PackCpuFwdKernel, uint64_t)
 MS_REG_CPU_KERNEL_T(
  Pack, KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
  Stack, KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
  PackCpuFwdKernel, float16)
 MS_REG_CPU_KERNEL_T(
  Pack, KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  Stack, KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  PackCpuFwdKernel, float)
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.h
@@ -49,38 +49,38 @@ class UnpackCPUKernel : public CPUKernel {
  T **outputs_host_{nullptr};
  TypeId dtype_{kTypeUnknown};
 };
 MS_REG_CPU_KERNEL_T(Unpack,
 MS_REG_CPU_KERNEL_T(Unstack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
                    UnpackCPUKernel, int8_t);
 MS_REG_CPU_KERNEL_T(Unpack,
 MS_REG_CPU_KERNEL_T(Unstack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
                    UnpackCPUKernel, int16_t);
 MS_REG_CPU_KERNEL_T(Unpack,
 MS_REG_CPU_KERNEL_T(Unstack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
                    UnpackCPUKernel, int);
 MS_REG_CPU_KERNEL_T(Unpack,
 MS_REG_CPU_KERNEL_T(Unstack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
                    UnpackCPUKernel, int64_t);
 MS_REG_CPU_KERNEL_T(Unpack,
 MS_REG_CPU_KERNEL_T(Unstack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
                    UnpackCPUKernel, bool);
 MS_REG_CPU_KERNEL_T(Unpack,
 MS_REG_CPU_KERNEL_T(Unstack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
                    UnpackCPUKernel, uint8_t);
 MS_REG_CPU_KERNEL_T(Unpack,
 MS_REG_CPU_KERNEL_T(Unstack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
                    UnpackCPUKernel, uint16_t);
 MS_REG_CPU_KERNEL_T(Unpack,
 MS_REG_CPU_KERNEL_T(Unstack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
                    UnpackCPUKernel, uint32_t);
 MS_REG_CPU_KERNEL_T(Unpack,
 MS_REG_CPU_KERNEL_T(Unstack,
                    KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
                    UnpackCPUKernel, uint64_t);
 MS_REG_CPU_KERNEL_T(
  Unpack, KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
  Unstack, KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
  UnpackCPUKernel, float16);
 MS_REG_CPU_KERNEL_T(
  Unpack, KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  Unstack, KernelAttr().SetAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  UnpackCPUKernel, float);
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/pack_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/pack_gpu_kernel.cc
@@ -18,38 +18,38 @@

 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_ONE(Pack,
 MS_REG_GPU_KERNEL_ONE(Stack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
                      PackGpuFwdKernel, int8_t)
 MS_REG_GPU_KERNEL_ONE(Pack,
 MS_REG_GPU_KERNEL_ONE(Stack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
                      PackGpuFwdKernel, int16_t)
 MS_REG_GPU_KERNEL_ONE(Pack,
 MS_REG_GPU_KERNEL_ONE(Stack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
                      PackGpuFwdKernel, int)
 MS_REG_GPU_KERNEL_ONE(Pack,
 MS_REG_GPU_KERNEL_ONE(Stack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
                      PackGpuFwdKernel, int64_t)
 MS_REG_GPU_KERNEL_ONE(Pack,
 MS_REG_GPU_KERNEL_ONE(Stack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
                      PackGpuFwdKernel, uint8_t)
 MS_REG_GPU_KERNEL_ONE(Pack,
 MS_REG_GPU_KERNEL_ONE(Stack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
                      PackGpuFwdKernel, bool)
 MS_REG_GPU_KERNEL_ONE(
  Pack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
  Stack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
  PackGpuFwdKernel, uint16_t)
 MS_REG_GPU_KERNEL_ONE(
  Pack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
  Stack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
  PackGpuFwdKernel, uint32_t)
 MS_REG_GPU_KERNEL_ONE(
  Pack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
  Stack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
  PackGpuFwdKernel, uint64_t)
 MS_REG_GPU_KERNEL_ONE(
  Pack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
  Stack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
  PackGpuFwdKernel, half)
 MS_REG_GPU_KERNEL_ONE(
  Pack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  Stack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  PackGpuFwdKernel, float)
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unpack_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unpack_gpu_kernel.cc
@@ -18,38 +18,38 @@

 namespace mindspore {
 namespace kernel {
 MS_REG_GPU_KERNEL_ONE(Unpack,
 MS_REG_GPU_KERNEL_ONE(Unstack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
                      UnpackGpuFwdKernel, int8_t)
 MS_REG_GPU_KERNEL_ONE(Unpack,
 MS_REG_GPU_KERNEL_ONE(Unstack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
                      UnpackGpuFwdKernel, int16_t)
 MS_REG_GPU_KERNEL_ONE(Unpack,
 MS_REG_GPU_KERNEL_ONE(Unstack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
                      UnpackGpuFwdKernel, int)
 MS_REG_GPU_KERNEL_ONE(Unpack,
 MS_REG_GPU_KERNEL_ONE(Unstack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
                      UnpackGpuFwdKernel, int64_t)
 MS_REG_GPU_KERNEL_ONE(Unpack,
 MS_REG_GPU_KERNEL_ONE(Unstack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
                      UnpackGpuFwdKernel, uint8_t)
 MS_REG_GPU_KERNEL_ONE(Unpack,
 MS_REG_GPU_KERNEL_ONE(Unstack,
                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
                      UnpackGpuFwdKernel, bool)
 MS_REG_GPU_KERNEL_ONE(
  Unpack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
  Unstack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
  UnpackGpuFwdKernel, uint16_t)
 MS_REG_GPU_KERNEL_ONE(
  Unpack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
  Unstack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
  UnpackGpuFwdKernel, uint32_t)
 MS_REG_GPU_KERNEL_ONE(
  Unpack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
  Unstack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
  UnpackGpuFwdKernel, uint64_t)
 MS_REG_GPU_KERNEL_ONE(
  Unpack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
  Unstack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
  UnpackGpuFwdKernel, half)
 MS_REG_GPU_KERNEL_ONE(
  Unpack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  Unstack, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
  UnpackGpuFwdKernel, float)
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/pack_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/pack_fission.cc
@@ -26,7 +26,7 @@ AnfNodePtr CreateNewPack(const FuncGraphPtr &func_graph, const CNodePtr &origin_
                         size_t offset) {
  MS_EXCEPTION_IF_NULL(func_graph);
  MS_EXCEPTION_IF_NULL(origin_pack_cnode);
  std::vector<AnfNodePtr> new_pack_inputs{NewValueNode(std::make_shared<Primitive>(prim::kPrimPack->name()))};
  std::vector<AnfNodePtr> new_pack_inputs{NewValueNode(std::make_shared<Primitive>(prim::kPrimStack->name()))};
  for (size_t i = begin_index; i < begin_index + offset; ++i) {
    new_pack_inputs.push_back(origin_pack_cnode->input(i));
  }
@@ -68,7 +68,7 @@ AnfNodePtr CreateNewPack(const FuncGraphPtr &func_graph, const CNodePtr &origin_

 const BaseRef PackFission::DefinePattern() const {
  VarPtr Xs = std::make_shared<SeqVar>();
  return VectorRef({prim::kPrimPack, Xs});
  return VectorRef({prim::kPrimStack, Xs});
 }

 const AnfNodePtr PackFission::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const {
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/pack_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/pack_fission.h
@@ -24,7 +24,7 @@ constexpr size_t kPackInputsDivisor = 63;
 class PackFission : public PatternProcessPass {
 public:
  explicit PackFission(bool multigraph = true)
      : PatternProcessPass("pack_fission", multigraph), inputs_divisor_(kPackInputsDivisor) {}
      : PatternProcessPass("stack_fission", multigraph), inputs_divisor_(kPackInputsDivisor) {}
  ~PackFission() override = default;
  const BaseRef DefinePattern() const override;
  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
--- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.cc
@@ -83,7 +83,7 @@ Graph::NodeType MakeNewOperator(const std::vector<std::shared_ptr<OperatorInfo>>
 OperatorRec CompleteOperatorInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
                                   Graph::NodeType NewTensor) {
  size_t input_tensor_size = ops[iter_ops]->inputs_tensor_info().size();
  if (ops[iter_ops]->type() == PACK) {
  if (ops[iter_ops]->type() == STACK) {
    input_tensor_size = 1;
  }
  if (input_tensor_size > MAX_INPUT_NUM) {
--- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_parse_graph.h
@@ -145,7 +145,7 @@ const std::map<std::string, OperatorType> DictOpType{
  {ASSIGN_SUB, OperatorType::kRecElmWiseOp},
  {"AssignAdd", OperatorType::kRecElmWiseOp},
  {DROPOUT_DO_MASK, OperatorType::kRecElmWiseOp},
  {PACK, OperatorType::kRecElmWiseOp}};
  {STACK, OperatorType::kRecElmWiseOp}};

 const TensorParam MakeTensor(int64_t n, int64_t c, int64_t h, int64_t w);

--- a/mindspore/ccsrc/frontend/parallel/dynamic_creator.h
+++ b/mindspore/ccsrc/frontend/parallel/dynamic_creator.h
@@ -187,7 +187,7 @@ REGISTER(BroadcastToInfo);
 REGISTER(StridedSliceInfo);
 REGISTER(SliceInfo);
 REGISTER(DropoutInfo);
 REGISTER(PackInfo);
 REGISTER(StackInfo);
 REGISTER(ConcatInfo);
 REGISTER(SplitInfo);
 REGISTER(UniqueInfo);
--- a/mindspore/ccsrc/frontend/parallel/node_check.cc
+++ b/mindspore/ccsrc/frontend/parallel/node_check.cc
@@ -23,7 +23,7 @@

 namespace mindspore {
 namespace parallel {
 const std::set<std::string> BATCH_PARALLEL_BLACK_LIST = {PACK, TENSOR_SCATTER_UPDATE, MIN_MAX_UPDATE_PER_LAYER};
 const std::set<std::string> BATCH_PARALLEL_BLACK_LIST = {STACK, TENSOR_SCATTER_UPDATE, MIN_MAX_UPDATE_PER_LAYER};

 bool IsInBatchParallelBlackList(const PrimitivePtr &prim) {
  MS_EXCEPTION_IF_NULL(prim);
--- a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
@@ -378,7 +378,7 @@ constexpr char STOP_GRADIENT[] = "stop_gradient";
 // Batch parallel black list
 constexpr char TENSOR_SCATTER_UPDATE[] = "TensorScatterUpdate";
 constexpr char MIN_MAX_UPDATE_PER_LAYER[] = "MinMaxUpdatePerLayer";
 constexpr char PACK[] = "Pack";
 constexpr char STACK[] = "Stack";

 constexpr size_t LAST_INDEX(size_t s) { return s - 1; }
 constexpr size_t SECOND_FROM_END(size_t s) { return s - 2; }
--- a/mindspore/ccsrc/frontend/parallel/ops_info/pack_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/pack_info.cc
@@ -28,7 +28,7 @@

 namespace mindspore {
 namespace parallel {
 Status PackInfo::GetAttrs() {
 Status StackInfo::GetAttrs() {
  int axis = 0;
  auto axis_iter = attrs_.find(AXIS);
  if (axis_iter != attrs_.end()) {
@@ -57,7 +57,7 @@ Status PackInfo::GetAttrs() {
  return SUCCESS;
 }

 Status PackInfo::CheckStrategy(const StrategyPtr &strategy) {
 Status StackInfo::CheckStrategy(const StrategyPtr &strategy) {
  MS_EXCEPTION_IF_NULL(strategy);
  if (CheckStrategyValue(strategy, inputs_shape_) != SUCCESS) {
    MS_LOG(ERROR) << name_ << ": Invalid strategy";
@@ -83,7 +83,7 @@ Status PackInfo::CheckStrategy(const StrategyPtr &strategy) {
  return SUCCESS;
 }

 Status PackInfo::InferDevMatrixShape() {
 Status StackInfo::InferDevMatrixShape() {
  MS_EXCEPTION_IF_NULL(strategy_);
  std::vector<Dimensions> stra = strategy_->GetInputDim();
  if (stra.empty()) {
@@ -95,7 +95,7 @@ Status PackInfo::InferDevMatrixShape() {
  return SUCCESS;
 }

 Status PackInfo::InferTensorMap() {
 Status StackInfo::InferTensorMap() {
  TensorMap in_tensor_map;
  TensorMap out_tensor_map;

@@ -119,7 +119,7 @@ Status PackInfo::InferTensorMap() {
  return SUCCESS;
 }

 Status PackInfo::InferMirrorOps() {
 Status StackInfo::InferMirrorOps() {
  mirror_ops_.clear();
  if (inputs_tensor_map_.empty()) {
    MS_LOG(ERROR) << name_ << ": The inputs tensor map is empty";
@@ -147,7 +147,7 @@ Status PackInfo::InferMirrorOps() {
  return SUCCESS;
 }

 Status PackInfo::InferTensorInfo() {
 Status StackInfo::InferTensorInfo() {
  if (inputs_shape_.empty() || outputs_shape_.empty() || inputs_tensor_map_.empty() || outputs_tensor_map_.empty()) {
    MS_LOG(ERROR) << name_ << ": Invalid args";
    return FAILED;
@@ -173,15 +173,15 @@ Status PackInfo::InferTensorInfo() {
  return SUCCESS;
 }

 void PackInfo::ReComputeBatchSplitFlagList() {
 void StackInfo::ReComputeBatchSplitFlagList() {
  for (size_t i = 0; i < inputs_shape_.size(); i++) {
    split_flag_list_[i] = true;
  }
 }

 Status PackInfo::SetCostUnderStrategy(const StrategyPtr &strategy) { return SetCostUnderStrategyBase(strategy); }
 Status StackInfo::SetCostUnderStrategy(const StrategyPtr &strategy) { return SetCostUnderStrategyBase(strategy); }

 Status PackInfo::GenerateStrategies(int64_t stage_id) {
 Status StackInfo::GenerateStrategies(int64_t stage_id) {
  if (InferAttrs() != SUCCESS) {
    MS_LOG(ERROR) << name_ << ": Infer attrs failed";
    return FAILED;
@@ -231,7 +231,7 @@ Status PackInfo::GenerateStrategies(int64_t stage_id) {
  return SUCCESS;
 }

 Status PackInfo::Init(const StrategyPtr &strategy) {
 Status StackInfo::Init(const StrategyPtr &strategy) {
  if (InitWithAutoRepeatCalc(strategy) != SUCCESS) {
    MS_LOG(ERROR) << name_ << ": Init failed.";
    return FAILED;
@@ -240,7 +240,7 @@ Status PackInfo::Init(const StrategyPtr &strategy) {
  return SUCCESS;
 }

 Status PackInfo::InitForCostModel(const StrategyPtr &strategy) {
 Status StackInfo::InitForCostModel(const StrategyPtr &strategy) {
  if (InitForCostModelWithAutoRepeatCalc(strategy) != SUCCESS) {
    MS_LOG(ERROR) << name_ << ": Init for cost model failed.";
    return FAILED;
--- a/mindspore/ccsrc/frontend/parallel/ops_info/pack_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/pack_info.h
@@ -29,12 +29,12 @@

 namespace mindspore {
 namespace parallel {
 class PackInfo : public OperatorInfo {
 class StackInfo : public OperatorInfo {
 public:
  PackInfo(const std::string &operator_name, const Shapes &inputs_shape, const Shapes &outputs_shape,
           const PrimitiveAttrs &attrs)
  StackInfo(const std::string &operator_name, const Shapes &inputs_shape, const Shapes &outputs_shape,
            const PrimitiveAttrs &attrs)
      : OperatorInfo(operator_name, inputs_shape, outputs_shape, attrs, std::make_shared<PackCost>()) {}
  ~PackInfo() override = default;
  ~StackInfo() override = default;

  Status Init(const StrategyPtr &strategy) override;
  Status InitForCostModel(const StrategyPtr &strategy) override;
@@ -55,7 +55,7 @@ class PackInfo : public OperatorInfo {
  size_t axis_ = 0;
 };

 using PackInfoPtr = std::shared_ptr<PackInfo>;
 using PackInfoPtr = std::shared_ptr<StackInfo>;
 }  // namespace parallel
 }  // namespace mindspore

--- a/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/frontend/parallel/step_auto_parallel.cc
@@ -154,7 +154,7 @@ bool IsSplittableOperator(const std::string &op_name) {
    {MATMUL, TRANSPOSE, GELU, TANH, SOFTMAX, SUB, MUL, DIV, RESHAPE, GREATER, LOG_SOFTMAX, ACTIVATION, PRELU,
     FLOORDIV, L2_NORMALIZE, ADD, MAXPOOL, MAXPOOLV2, VIRTUAL_DATA_SET, RELU, ONEHOT, DROPOUT_DO_MASK,
     REDUCE_MAX, REDUCE_MIN, ARGMAXWITHVALUE, ARGMINWITHVALUE, REDUCE_SUM, CONV2D, FUSE_BATCH_NORM, POOLING,
     MAX_POOL_WITH_ARGMAX, SIMPLE_MEAN, FLATTEN, BATCH_NORM, LAYER_NORM, BIAS_ADD, ASSIGN_SUB, COS, ACOS, EXP, PACK,
     MAX_POOL_WITH_ARGMAX, SIMPLE_MEAN, FLATTEN, BATCH_NORM, LAYER_NORM, BIAS_ADD, ASSIGN_SUB, COS, ACOS, EXP, STACK,
     LOG, REDUCE_MEAN, REAL_DIV, SIGMOID, POW, MAXIMUM, MINIMUM, EQUAL, NOT_EQUAL, LOGICALNOT, GATHERV2, SQRT, CONCAT,
     STRIDEDSLICE, GET_NEXT, CAST, NEG, SQUARE, BATCH_MATMUL, EXPAND_DIMS, SQUEEZE, SPARSE_GATHERV2, TILE, DROPOUT,
     SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, SIGMOID_CROSS_ENTROPY_WITH_LOGITS, SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS,
--- a/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h
+++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h
@@ -158,7 +158,6 @@ constexpr const char kNameSlice[] = "Slice";
 constexpr const char kNameAddN[] = "AddN";
 constexpr const char kNameLess[] = "Less";
 constexpr const char kNameGreater[] = "Greater";
 constexpr const char kNamePack[] = "Pack";
 constexpr const char kNameUnpack[] = "Unpack";
 constexpr const char kNameMerge[] = "Merge";
 constexpr const char kNameGeSwitch[] = "GeSwitch";
--- a/mindspore/ccsrc/transform/graph_ir/op_declare/split_combination_ops_declare.cc
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare/split_combination_ops_declare.cc
@@ -29,7 +29,7 @@ INPUT_MAP(Pack) = EMPTY_INPUT_MAP;
 DYN_INPUT_MAP(Pack) = {{1, DYN_INPUT_DESC(x)}};
 ATTR_MAP(Pack) = {{"num", ATTR_DESC(N, AnyTraits<int64_t>())}, {"axis", ATTR_DESC(axis, AnyTraits<int64_t>())}};
 OUTPUT_MAP(Pack) = {{0, OUTPUT_DESC(y)}};
 REG_ADPT_DESC(Pack, kNamePack, ADPT_DESC(Pack))
 REG_ADPT_DESC(Pack, prim::kStack, ADPT_DESC(Pack))

 // ConcatD
 INPUT_MAP(ConcatD) = EMPTY_INPUT_MAP;
--- a/mindspore/ccsrc/transform/graph_ir/op_declare/transformation_ops_declare.cc
+++ b/mindspore/ccsrc/transform/graph_ir/op_declare/transformation_ops_declare.cc
@@ -28,7 +28,7 @@ REG_ADPT_DESC(Flatten, prim::kPrimFlatten->name(), ADPT_DESC(Flatten))
 INPUT_MAP(Unpack) = {{1, INPUT_DESC(x)}};
 ATTR_MAP(Unpack) = {{"axis", ATTR_DESC(axis, AnyTraits<int64_t>())}, {"num", ATTR_DESC(num, AnyTraits<int64_t>())}};
 DYN_OUTPUT_MAP(Unpack) = {{0, DYN_OUTPUT_DESC(y)}};
 REG_ADPT_DESC(Unpack, kNameUnpack, ADPT_DESC(Unpack))
 REG_ADPT_DESC(Unpack, prim::kUnstack, ADPT_DESC(Unpack))

 // ExtractImagePatches
 INPUT_MAP(ExtractImagePatches) = {{1, INPUT_DESC(x)}};
--- a/mindspore/core/abstract/infer_functions.h
+++ b/mindspore/core/abstract/infer_functions.h
@@ -108,8 +108,8 @@ AbstractBasePtr InferImplArrayToScalar(const AnalysisEnginePtr &, const Primitiv
                                       const AbstractBasePtrList &args_spec_list);
 AbstractBasePtr InferImplBroadCastShape(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                        const AbstractBasePtrList &args_spec_list);
 AbstractBasePtr InferImplPack(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                              const AbstractBasePtrList &args_spec_list);
 AbstractBasePtr InferImplStack(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                               const AbstractBasePtrList &args_spec_list);

 AbstractBasePtr InferImplMakeTuple(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                   const AbstractBasePtrList &args_spec_list);
--- a/mindspore/core/abstract/prim_arrays.cc
+++ b/mindspore/core/abstract/prim_arrays.cc
@@ -114,8 +114,8 @@ AbstractBasePtr InferImplTile(const AnalysisEnginePtr &, const PrimitivePtr &pri
  return std::make_shared<AbstractTensor>(arg->element(), std::make_shared<Shape>(result_shp));
 }

 AbstractBasePtr InferImplPack(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                              const AbstractBasePtrList &args_spec_list) {
 AbstractBasePtr InferImplStack(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                               const AbstractBasePtrList &args_spec_list) {
  // Inputs: a tuple of tensor.
  const std::string op_name = primitive->name();
  CheckArgsSize(op_name, args_spec_list, 1);
--- a/mindspore/core/abstract/primitive_infer_map.cc
+++ b/mindspore/core/abstract/primitive_infer_map.cc
@@ -78,7 +78,7 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() {
    {prim::kPrimScalarToArray, {InferImplScalarToArray, true}},
    {prim::kPrimArrayToScalar, {InferImplArrayToScalar, true}},
    {prim::kPrimBroadcastShape, {InferImplBroadCastShape, true}},
    {prim::kPrimPack, {InferImplPack, true}},
    {prim::kPrimStack, {InferImplStack, true}},
    {prim::kPrimPad, {InferImplPad, true}},
    {prim::kPrimUnique, {InferImplUnique, true}},
    {prim::kPrimUniqueGrad, {InferImplUniqueGrad, true}},
--- a/mindspore/core/base/core_ops.h
+++ b/mindspore/core/base/core_ops.h
@@ -38,6 +38,10 @@ constexpr auto kScalarTrunc = "ScalarTrunc";
 constexpr auto kScalarFloor = "ScalarFloor";
 constexpr auto kScalarUadd = "ScalarUadd";
 constexpr auto kScalarUsub = "ScalarUsub";

 // Arrays
 constexpr auto kStack = "Stack";
 constexpr auto kUnstack = "Unstack";
 constexpr auto kTupleGetItem = "TupleGetItem";

 // Here list all primitives used in backend or some special primitives used by core.
@@ -118,8 +122,9 @@ inline const PrimitivePtr kPrimSize = std::make_shared<Primitive>("Size");
 inline const PrimitivePtr kPrimArgMax = std::make_shared<Primitive>("Argmax");
 inline const PrimitivePtr kPrimArgMin = std::make_shared<Primitive>("Argmin");
 inline const PrimitivePtr kPrimPack = std::make_shared<Primitive>("Pack");
 inline const PrimitivePtr kPrimStack = std::make_shared<Primitive>(kStack);
 inline const PrimitivePtr kPrimUnpack = std::make_shared<Primitive>("Unpack");
 inline const PrimitivePtr kPrimUnstack = std::make_shared<Primitive>("Unstack");
 inline const PrimitivePtr kPrimUnstack = std::make_shared<Primitive>(kUnstack);
 inline const PrimitivePtr kPrimUnsortedSegmentMax = std::make_shared<Primitive>("UnsortedSegmentMax");
 inline const PrimitivePtr kPrimUnsortedSegmentSum = std::make_shared<Primitive>("UnsortedSegmentSum");
 inline const PrimitivePtr kPrimUnsortedSegmentMin = std::make_shared<Primitive>("UnsortedSegmentMin");
@@ -426,7 +431,6 @@ inline const PrimitivePtr kPrimEquivFormat = std::make_shared<Primitive>("EquivF
 inline const PrimitivePtr kPrimLshProjection = std::make_shared<Primitive>("LshProjection");
 inline const PrimitivePtr kPrimHashtableLookup = std::make_shared<Primitive>("HashtableLookup");
 inline const PrimitivePtr kPrimCustomPredict = std::make_shared<Primitive>("CustomPredict");
 inline const PrimitivePtr kPrimStack = std::make_shared<Primitive>("Stack");
 inline const PrimitivePtr kPrimPriorBox = std::make_shared<Primitive>("PriorBox");
 inline const PrimitivePtr kPrimQuantDTypeCast = std::make_shared<Primitive>("QuantDTypeCast");
 inline const PrimitivePtr kPrimWhile = std::make_shared<Primitive>("While");
--- a/mindspore/nn/layer/image.py
+++ b/mindspore/nn/layer/image.py
@@ -332,7 +332,7 @@ class MSSSIM(Cell):
        self.reduce_mean = P.ReduceMean()
        self.prod = P.ReduceProd()
        self.pow = P.Pow()
        self.pack = P.Pack(axis=-1)
        self.stack = P.Stack(axis=-1)
        self.concat = P.Concat(axis=1)

    def construct(self, img1, img2):
@@ -360,7 +360,7 @@ class MSSSIM(Cell):
            img1, img2 = _downsample(img1, img2, self.avg_pool)

        mcs = mcs[0:-1:1]
        mcs_and_ssim = self.pack(mcs + (self.relu(sim),))
        mcs_and_ssim = self.stack(mcs + (self.relu(sim),))
        mcs_and_ssim = self.pow(mcs_and_ssim, self.weight_tensor)
        ms_ssim = self.prod(mcs_and_ssim, -1)
        loss = self.reduce_mean(ms_ssim, -1)
--- a/mindspore/nn/layer/timedistributed.py
+++ b/mindspore/nn/layer/timedistributed.py
@@ -15,7 +15,7 @@
 """Time Distributed."""

 from mindspore.ops.primitive import constexpr, Primitive
 from mindspore.ops import Reshape, Transpose, Pack, Unpack
 from mindspore.ops import Reshape, Transpose, Stack, Unstack
 from mindspore.common import Tensor
 from mindspore._checkparam import Validator
 from ..cell import Cell
@@ -130,13 +130,13 @@ class TimeDistributed(Cell):
                outputs_shape_new += outputs.shape[reshape_pos + 1:]
            return self.reshape(outputs, outputs_shape_new)

        unpack = Unpack(time_axis)
        inputs = unpack(inputs)
        unstack = Unstack(time_axis)
        inputs = unstack(inputs)
        y = ()
        for item in inputs:
            outputs = self.layer(item)
            _check_data(isinstance(outputs, Tensor))
            _check_expand_dims_axis(time_axis, outputs.ndim)
            y += (outputs,)
        y = Pack(time_axis)(y)
        y = Stack(time_axis)(y)
        return y
--- a/mindspore/ops/_grad/grad_array_ops.py
+++ b/mindspore/ops/_grad/grad_array_ops.py
@@ -84,6 +84,8 @@ def get_bprop_dtype(self):


 dout_cast = C.MultitypeFuncGraph("dout_cast")


@dout_cast.register("Tensor", "Tensor")
 def dout_cast_tensor(dout, x):
    cast = P.Cast()
@@ -91,6 +93,7 @@ def dout_cast_tensor(dout, x):
    dx = cast(dout, get_dtype(x))
    return dx


@dout_cast.register("Number", "Number")
 def dout_cast_number(dout, x):
    cast = P.Cast()
@@ -98,6 +101,7 @@ def dout_cast_number(dout, x):
    dx = cast(dout, get_dtype(x))
    return dx


@dout_cast.register("RowTensor", "Tensor")
 def dout_cast_row_tensor(dout, x):
    cast = P.Cast()
@@ -253,6 +257,7 @@ def get_bprop_embedding_lookup(self):
    """Generate bprop for EmbeddingLookup"""
    sub_op = P.Sub()
    reshape_op = P.Reshape()

    def bprop_sparse(x, indices, offset, out, dout):
        x_shp = shape_op(x)
        new_indices = sub_op(indices, offset)
@@ -264,13 +269,16 @@ def get_bprop_embedding_lookup(self):
        # Reshape the 'actual_dout' on device
        actual_dout = reshape_op(dout, actual_dout_shape_changed)
        return RowTensor(new_indices, actual_dout, x_shp), zeros_like(indices), zeros_like(offset)

    return bprop_sparse


@constexpr
 def make_begin(shp):
    begin = tuple([0 for _ in shp])
    return begin


@bprop_getters.register(P.Padding)
 def get_bprop_padding(self):
    """Grad definition for `Padding` operation."""
@@ -299,11 +307,12 @@ def _concat_grad_uniform(input_shapes, input_nums):
    """Helper function for bprop of Concat"""
    is_uniform = True
    for i in range(1, input_nums):
        if input_shapes[i-1] != input_shapes[i]:
        if input_shapes[i - 1] != input_shapes[i]:
            is_uniform = False
            break
    return is_uniform


@bprop_getters.register(P.Concat)
 def get_bprop_concat(self):
    """Generate bprop for Concat"""
@@ -325,6 +334,7 @@ def get_bprop_concat(self):
                slice_out = P.Slice()(dout, out_offset[i], input_shapes[i])
                dx = dx + (slice_out,)
        return (dx,)

    return bprop


@@ -494,6 +504,7 @@ def _range_op(start, limit, delta, dtype):
    output_tensor = Tensor(list(range(start, limit, delta)), dtype)
    return output_tensor


@constexpr
 def _get_1d_shape(in_shape):
    """helper function for grad of Sort"""
@@ -502,6 +513,7 @@ def _get_1d_shape(in_shape):
        out_shape *= i
    return (out_shape,)


@constexpr
 def _get_transposition(axis, rank):
    """helper function for grad of Sort"""
@@ -511,6 +523,7 @@ def _get_transposition(axis, rank):
    trans = tuple(transposition.tolist())
    return trans


@bprop_getters.register(P.Sort)
 def get_bprop_sort(self):
    """Grad definition for `Sort` operation."""
@@ -589,14 +602,14 @@ def get_bprop_range(self):
    return bprop


@bprop_getters.register(P.Pack)
 def get_bprop_pack(self):
    """Generate bprop for Pack"""
@bprop_getters.register(P.Stack)
 def get_bprop_stack(self):
    """Generate bprop for Stack"""
    axis = self.axis

    def bprop(x, out, dout):
        pack_grad = P.Unpack(axis)
        out = pack_grad(dout)
        stack_grad = P.Unstack(axis)
        out = stack_grad(dout)
        if is_sub_class(F.typeof(x), ms.list_):
            ret = []
            for item in out:
@@ -619,14 +632,15 @@ def get_bprop_reverse_v2(self):

    return bprop

@bprop_getters.register(P.Unpack)
 def get_bprop_unpack(self):
    """Generate bprop for Unpack"""

@bprop_getters.register(P.Unstack)
 def get_bprop_unstack(self):
    """Generate bprop for Unstack"""
    axis = self.axis

    def bprop(x, out, dout):
        unpack_grad = P.Pack(axis)
        out = unpack_grad(dout)
        unstack_grad = P.Stack(axis)
        out = unstack_grad(dout)
        return (out,)

    return bprop
@@ -747,6 +761,7 @@ def get_bprop_scatter_non_aliasing_add_update(self):

    return bprop


@bprop_getters.register(P.TensorScatterUpdate)
 def get_bprop_tensor_scatter_update(self):
    """Generate bprop for TensorScatterUpdate"""
@@ -761,7 +776,6 @@ def get_bprop_tensor_scatter_update(self):
    return bprop



@bprop_getters.register(P.ScatterMax)
 def get_bprop_scatter_max(self):
    """Generate bprop for ScatterMax"""
@@ -891,7 +905,7 @@ def get_bprop_unsorted_segment_sum(self):

    def bprop(x, segment_ids, num_segments, out, dout):
        return _gather_drop_negatives(dout, segment_ids, None, None)[0], zeros_like(segment_ids), \
                zeros_like(num_segments)
               zeros_like(num_segments)

    return bprop

@@ -974,9 +988,11 @@ def get_bprop_batch_to_space(self):
 def get_bprop_space_to_batch_nd(self):
    """Generate bprop for SpaceToBatchND"""
    space_to_batch_nd_grad = P.BatchToSpaceND(self.block_shape, self.paddings)

    def bprop(x, out, dout):
        dx = space_to_batch_nd_grad(dout)
        return (dx,)

    return bprop


@@ -984,11 +1000,14 @@ def get_bprop_space_to_batch_nd(self):
 def get_bprop_batch_to_space_nd(self):
    """Generate bprop for BatchToSpaceND"""
    batch_to_space_nd_grad = P.SpaceToBatchND(self.block_shape, self.crops)

    def bprop(x, out, dout):
        dx = batch_to_space_nd_grad(dout)
        return (dx,)

    return bprop


@bprop_getters.register(P.BroadcastTo)
 def get_bprop_broadcast_to(self):
    """Generate bprop for BroadcastTo"""
@@ -1005,6 +1024,7 @@ def get_bprop_broadcast_to(self):
        reduced_grad = reduce_keep_dim(dout, reduction_axes)
        dx = reshape(reduced_grad, x_shape)
        return (dx,)

    return bprop


@@ -1016,6 +1036,7 @@ def get_bprop_reverse_sequence(self):
    def bprop(x, seq_lengths, out, dout):
        dx = reverse_sequence_grad(dout, seq_lengths)
        return dx, zeros_like(seq_lengths)

    return bprop


@@ -1023,9 +1044,11 @@ def get_bprop_reverse_sequence(self):
 def get_bprop_trans_shape(self):
    """Generate bprop for TransShape"""
    op = P.TransShape()

    def bprop(x, shape, out, dout):
        dx = op(dout, shape_op(x))
        return (dx, zeros_like(shape))

    return bprop


@@ -1033,7 +1056,9 @@ def get_bprop_trans_shape(self):
 def get_bprop_unique(self):
    """Generate bprop for Unique"""
    op = G.UniqueGrad()

    def bprop(x, out, dout):
        dx = op(dout, out)
        return (dx,)

    return bprop
--- a/mindspore/ops/_op_impl/aicpu/init.py
+++ b/mindspore/ops/_op_impl/aicpu/init.py
@@ -38,7 +38,7 @@ from .squeeze import _squeeze_aicpu
 from .expand_dims import _expand_dims_aicpu
 from .randperm import _randperm_aicpu
 from .random_choice_with_mask import _random_choice_with_mask_aicpu
 from .pack import _pack_aicpu
 from .stack import _stack_aicpu
 from .uniform_candidate_sampler import _uniform_candidate_sampler_aicpu
 from .log_uniform_candidate_sampler import _log_uniform_candidate_sampler_aicpu
 from .compute_accidental_hits import _compute_accidental_hits_aicpu
--- a/mindspore/ops/_op_impl/aicpu/stack.py
+++ b/mindspore/ops/_op_impl/aicpu/stack.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 # ============================================================================

 """Pack op"""
 """Stack op"""
 from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType

 pack_op_info = AiCPURegOp("Pack") \
 stack_op_info = AiCPURegOp("Stack") \
    .fusion_type("OPAQUE") \
    .attr("axis", "int") \
    .input(0, "x", "dynamic") \
@@ -35,7 +35,7 @@ pack_op_info = AiCPURegOp("Pack") \
    .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \
    .get_op_info()

@op_info_register(pack_op_info)
 def _pack_aicpu():
    """Pack AiCPU register"""
@op_info_register(stack_op_info)
 def _stack_aicpu():
    """Stack AiCPU register"""
    return
--- a/mindspore/ops/_op_impl/tbe/pack.py
+++ b/mindspore/ops/_op_impl/tbe/pack.py
@@ -16,7 +16,7 @@
 """Pack op"""
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType

 pack_op_info = TBERegOp("Pack") \
 stack_op_info = TBERegOp("Stack") \
    .fusion_type("OPAQUE") \
    .async_flag(False) \
    .binfile_name("pack.so") \
@@ -52,7 +52,7 @@ pack_op_info = TBERegOp("Pack") \
    .get_op_info()


@op_info_register(pack_op_info)
@op_info_register(stack_op_info)
 def _pack_tbe():
    """Pack TBE register"""
    return
--- a/mindspore/ops/_op_impl/tbe/unpack.py
+++ b/mindspore/ops/_op_impl/tbe/unpack.py
@@ -16,7 +16,7 @@
 """Unpack op"""
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType

 unpack_op_info = TBERegOp("Unpack") \
 unstack_op_info = TBERegOp("Unstack") \
    .fusion_type("OPAQUE") \
    .async_flag(False) \
    .binfile_name("unpack.so") \
@@ -32,7 +32,7 @@ unpack_op_info = TBERegOp("Unpack") \
    .get_op_info()


@op_info_register(unpack_op_info)
@op_info_register(unstack_op_info)
 def _unpack_tbe():
    """Unpack TBE register"""
    return
--- a/mindspore/ops/composite/multitype_ops/_compile_utils.py
+++ b/mindspore/ops/composite/multitype_ops/_compile_utils.py
@@ -23,7 +23,7 @@ from ....common import dtype as mstype
 from ....common._register_for_tensor import tensor_operator_registry

 hyper_map = base.HyperMap()
 pack = P.Pack(axis=-1)
 pack = P.Stack(axis=-1)


 def _tensor_getitem(self, index):
@@ -362,7 +362,7 @@ def _generate_updates_from_tuple(data, index, value, op_type):
        value_shapes = hyper_map(F.shape, value)
        shapes_same = const_utils.check_shapes_same(value_shapes, const_utils.TENSOR_SETITEM)
        if shapes_same:
            value = F.pack(value)
            value = F.stack(value)
        return _generate_updates_from_tensor(data, index, value, op_type)

    data_shape = F.shape(data)
--- a/mindspore/ops/functional.py
+++ b/mindspore/ops/functional.py
@@ -90,6 +90,7 @@ gather_nd = P.GatherNd()
 scatter_update = P.ScatterUpdate()
 scatter_nd_update = P.ScatterNdUpdate()
 pack = P.Pack()
 stack = P.Stack()
 partial = P.Partial()
 # depend: mount a node to another node
 depend = P.Depend()
--- a/mindspore/ops/operations/init.py
+++ b/mindspore/ops/operations/init.py
@@ -20,7 +20,7 @@ A collection of operators to build neural networks or to compute functions.
 """

 from .image_ops import (CropAndResize)
 from .array_ops import (Argmax, Argmin, Cast, Concat, Pack, Unpack,
 from .array_ops import (Argmax, Argmin, Cast, Concat, Pack, Stack, Unpack, Unstack,
                        Diag, DiagPart, DType, ExpandDims, Eye,
                        Fill, Ones, Zeros, GatherNd, GatherV2, Gather, SparseGatherV2, InvertPermutation,
                        IsInstance, IsSubClass, ArgMaxWithValue, OnesLike, ZerosLike,
@@ -169,7 +169,9 @@ __all__ = [
    'UniqueWithPad',
    'Concat',
    'Pack',
    'Stack',
    'Unpack',
    'Unstack',
    'Tile',
    'BiasAdd',
    'Gelu',
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -2252,8 +2252,8 @@ class ParallelConcat(PrimitiveWithInfer):
        return out


 def _get_pack_shape(x_shape, x_type, axis, prim_name):
    """for pack output shape"""
 def _get_stack_shape(x_shape, x_type, axis, prim_name):
    """for stack output shape"""
    validator.check_value_type("shape", x_shape, [tuple, list], prim_name)
    validator.check_int(len(x_shape), 1, Rel.GE, "len of input_x", prim_name)
    validator.check_subclass("input_x[0]", x_type[0], mstype.tensor, prim_name)
@@ -2271,24 +2271,24 @@ def _get_pack_shape(x_shape, x_type, axis, prim_name):
    return out_shape


 class Pack(PrimitiveWithInfer):
 class Stack(PrimitiveWithInfer):
    r"""
    Packs a list of tensors in specified axis.
    Stacks a list of tensors in specified axis.

    Packs the list of input tensors with the same rank `R`, output is a tensor of rank `(R+1)`.
    Stacks the list of input tensors with the same rank `R`, output is a tensor of rank `(R+1)`.

    Given input tensors of shape :math:`(x_1, x_2, ..., x_R)`. Set the number of input tensors as `N`.
    If :math:`0 \le axis`, the shape of the output tensor is :math:`(x_1, x_2, ..., x_{axis}, N, x_{axis+1}, ..., x_R)`.

    Args:
        axis (int): Dimension to pack. Default: 0.
        axis (int): Dimension to stack. Default: 0.
                    Negative values wrap around. The range is [-(R+1), R+1).

    Inputs:
        - **input_x** (Union[tuple, list]) - A Tuple or list of Tensor objects with the same shape and type.

    Outputs:
        Tensor. A packed Tensor with the same type as `input_x`.
        Tensor. A stacked Tensor with the same type as `input_x`.

    Raises:
        TypeError: If the data types of elements in `input_x` are not the same.
@@ -2302,8 +2302,8 @@ class Pack(PrimitiveWithInfer):
    Examples:
        >>> data1 = Tensor(np.array([0, 1]).astype(np.float32))
        >>> data2 = Tensor(np.array([2, 3]).astype(np.float32))
        >>> pack = ops.Pack()
        >>> output = pack([data1, data2])
        >>> stack = ops.Stack()
        >>> output = stack([data1, data2])
        >>> print(output)
        [[0. 1.]
         [2. 3.]]
@@ -2311,7 +2311,7 @@ class Pack(PrimitiveWithInfer):

    @prim_attr_register
    def __init__(self, axis=0):
        """Initialize Pack"""
        """Initialize Stack"""
        validator.check_value_type("axis", axis, [int], self.name)
        self.axis = axis

@@ -2319,18 +2319,39 @@ class Pack(PrimitiveWithInfer):
        x_shape = value['shape']
        x_type = value['dtype']
        self.add_prim_attr('num', len(x_shape))
        all_shape = _get_pack_shape(x_shape, x_type, self.axis, self.name)
        all_shape = _get_stack_shape(x_shape, x_type, self.axis, self.name)
        out = {'shape': all_shape,
               'dtype': x_type[0],
               'value': None}
        return out

 def Pack(axis=0):
    """
    Packs a list of tensors in specified axis.

 class Unpack(PrimitiveWithInfer):
    r"""
    The usage of Pack is deprecated. Please use Stack.

    """
    logger.warning("WARN_DEPRECATED: The usage of Pack is deprecated. Please use Stack.")
    return Stack(axis)


 def Unpack(axis=0):
    """
    Unpacks tensor in specified axis.

    Unpacks a tensor of rank `R` along axis dimension, output tensors will have rank `(R-1)`.
    The usage of Unpack is deprecated. Please use Unstack.

    """
    logger.warning("WARN_DEPRECATED: The usage of Unpack is deprecated. Please use Unstack.")
    return Unstack(axis)


 class Unstack(PrimitiveWithInfer):
    r"""
    Unstacks tensor in specified axis.

    Unstacks a tensor of rank `R` along axis dimension, output tensors will have rank `(R-1)`.

    Given a tensor of shape :math:`(x_1, x_2, ..., x_R)`. If :math:`0 \le axis`,
    the shape of tensor in output is :math:`(x_1, x_2, ..., x_{axis}, x_{axis+2}, ..., x_R)`.
@@ -2343,7 +2364,7 @@ class Unpack(PrimitiveWithInfer):

    Inputs:
        - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
          A tensor to be unpacked and the rank of the tensor must be greater than 0.
          A tensor to be unstacked and the rank of the tensor must be greater than 0.

    Outputs:
        A tuple of tensors, the shape of each objects is the same.
@@ -2355,9 +2376,9 @@ class Unpack(PrimitiveWithInfer):
        ``Ascend`` ``GPU`` ``CPU``

    Examples:
        >>> unpack = ops.Unpack()
        >>> unstack = ops.Unstack()
        >>> input_x = Tensor(np.array([[1, 1, 1, 1], [2, 2, 2, 2]]))
        >>> output = unpack(input_x)
        >>> output = unstack(input_x)
        >>> print(output)
        (Tensor(shape=[4], dtype=Int32, value= [1, 1, 1, 1]),
         Tensor(shape=[4], dtype=Int32, value= [2, 2, 2, 2]))
@@ -2365,7 +2386,7 @@ class Unpack(PrimitiveWithInfer):

    @prim_attr_register
    def __init__(self, axis=0):
        """Initialize Unpack"""
        """Initialize Unstack"""
        validator.check_value_type("axis", axis, [int], self.name)
        self.axis = axis

@@ -2382,7 +2403,7 @@ class Unpack(PrimitiveWithInfer):
        self.add_prim_attr('num', output_num)
        output_valid_check = x_shape[self.axis] - output_num
        validator.check_int(output_valid_check, 0, Rel.EQ,
                            "The dimension which to unpack divides output_num", self.name)
                            "The dimension which to unstack divides output_num", self.name)
        out_shapes = []
        out_dtypes = []
        out_shape = x_shape[:self.axis] + x_shape[self.axis + 1:]
--- a/model_zoo/official/nlp/gpt/src/gpt.py
+++ b/model_zoo/official/nlp/gpt/src/gpt.py
@@ -265,7 +265,7 @@ class Attention(nn.Cell):
            past_key = self.transpose(layer_past[0], (0, 1, 3, 2))
            key = self.concat_k((past_key, key))
            value = self.concat_v(past_value, value)
        layer_present = P.Pack()([self.transpose(key, (0, 1, 3, 2)), value])
        layer_present = P.Stack()([self.transpose(key, (0, 1, 3, 2)), value])
        attention = self._attn(query, key, value, attention_mask)
        attention_merge = self.merge_heads(attention)
        output = self.projection(attention_merge)
--- a/tests/st/ops/ascend/test_aicpu_ops/test_pack.py
+++ b/tests/st/ops/ascend/test_aicpu_ops/test_pack.py
@@ -25,11 +25,11 @@ context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
 class Net(nn.Cell):
    def __init__(self, x, axis):
        super(Net, self).__init__()
        self.pack = P.Pack(axis)
        self.stack = P.Stack(axis)
        self.x = x

    def construct(self):
        return self.pack(self.x)
        return self.stack(self.x)


 def test_net_bool():
--- a/tests/st/ops/cpu/test_pack_op.py
+++ b/tests/st/ops/cpu/test_pack_op.py
@@ -28,7 +28,7 @@ from mindspore.common.parameter import Parameter
 class PackNet(nn.Cell):
    def __init__(self, nptype):
        super(PackNet, self).__init__()
        self.pack = P.Pack(axis=2)
        self.stack = P.Stack(axis=2)
        self.data_np = np.array([0] * 16).astype(nptype)
        self.data_np = np.reshape(self.data_np, (2, 2, 2, 2))
        self.x1 = Parameter(initializer(
@@ -38,7 +38,7 @@ class PackNet(nn.Cell):

    @ms_function
    def construct(self):
        return self.pack((self.x1, self.x2))
        return self.stack((self.x1, self.x2))


 def pack(nptype):
--- a/tests/st/ops/cpu/test_unpack_op.py
+++ b/tests/st/ops/cpu/test_unpack_op.py
@@ -29,7 +29,7 @@ class Net(nn.Cell):
    def __init__(self, nptype):
        super(Net, self).__init__()

        self.unpack = P.Unpack(axis=3)
        self.unstack = P.Unstack(axis=3)
        self.data_np = np.array([[[[[0, 0],
                                    [-2, -1]],
                                   [[0, 0],
@@ -70,7 +70,7 @@ class Net(nn.Cell):

    @ms_function
    def construct(self):
        return self.unpack(self.x1)
        return self.unstack(self.x1)


 def unpack(nptype):
@@ -125,7 +125,7 @@ def unpack_pynative(nptype):
    x1 = Tensor(x1)
    expect = (np.reshape(np.array([0] * 36).astype(nptype), (3, 3, 2, 2)),
              np.arange(-2, 34, 1).reshape(3, 3, 2, 2).astype(nptype))
    output = P.Unpack(axis=3)(x1)
    output = P.Unstack(axis=3)(x1)

    for i, exp in enumerate(expect):
        assert (output[i].asnumpy() == exp).all()
--- a/tests/st/ops/gpu/test_pack_op.py
+++ b/tests/st/ops/gpu/test_pack_op.py
@@ -25,11 +25,11 @@ from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter


 class PackNet(nn.Cell):
 class StackNet(nn.Cell):
    def __init__(self, nptype):
        super(PackNet, self).__init__()
        super(StackNet, self).__init__()

        self.pack = P.Pack(axis=2)
        self.stack = P.Stack(axis=2)
        self.data_np = np.array([0] * 16).astype(nptype)
        self.data_np = np.reshape(self.data_np, (2, 2, 2, 2))
        self.x1 = Parameter(initializer(
@@ -39,13 +39,13 @@ class PackNet(nn.Cell):

    @ms_function
    def construct(self):
        return self.pack((self.x1, self.x2))
        return self.stack((self.x1, self.x2))


 def pack(nptype):
 def stack(nptype):
    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
    pack_ = PackNet(nptype)
    output = pack_()
    stack_ = StackNet(nptype)
    output = stack_()
    expect = np.array([[[[[0, 0],
                          [0, 0]],
                         [[0, 1],
@@ -64,7 +64,7 @@ def pack(nptype):
                          [14, 15]]]]]).astype(nptype)
    assert (output.asnumpy() == expect).all()

 def pack_pynative(nptype):
 def stack_pynative(nptype):
    context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
    x1 = np.array([0] * 16).astype(nptype)
    x1 = np.reshape(x1, (2, 2, 2, 2))
@@ -86,77 +86,77 @@ def pack_pynative(nptype):
                          [0, 0]],
                         [[12, 13],
                          [14, 15]]]]]).astype(nptype)
    output = P.Pack(axis=2)((x1, x2))
    output = P.Stack(axis=2)((x1, x2))
    assert (output.asnumpy() == expect).all()

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_graph_float32():
    pack(np.float32)
 def test_stack_graph_float32():
    stack(np.float32)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_graph_float16():
    pack(np.float16)
 def test_stack_graph_float16():
    stack(np.float16)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_graph_int32():
    pack(np.int32)
 def test_stack_graph_int32():
    stack(np.int32)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_graph_int16():
    pack(np.int16)
 def test_stack_graph_int16():
    stack(np.int16)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_graph_uint8():
    pack(np.uint8)
 def test_stack_graph_uint8():
    stack(np.uint8)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_graph_bool():
    pack(np.bool)
 def test_stack_graph_bool():
    stack(np.bool)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_pynative_float32():
    pack_pynative(np.float32)
 def test_stack_pynative_float32():
    stack_pynative(np.float32)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_pynative_float16():
    pack_pynative(np.float16)
 def test_stack_pynative_float16():
    stack_pynative(np.float16)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_pynative_int32():
    pack_pynative(np.int32)
 def test_stack_pynative_int32():
    stack_pynative(np.int32)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_pynative_int16():
    pack_pynative(np.int16)
 def test_stack_pynative_int16():
    stack_pynative(np.int16)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_pynative_uint8():
    pack_pynative(np.uint8)
 def test_stack_pynative_uint8():
    stack_pynative(np.uint8)

@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_pack_pynative_bool():
    pack_pynative(np.bool)
 def test_stack_pynative_bool():
    stack_pynative(np.bool)
--- a/tests/st/ops/gpu/test_unpack_op.py
+++ b/tests/st/ops/gpu/test_unpack_op.py
@@ -25,11 +25,11 @@ from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter


 class UnpackNet(nn.Cell):
 class UnstackNet(nn.Cell):
    def __init__(self, nptype):
        super(UnpackNet, self).__init__()
        super(UnstackNet, self).__init__()

        self.unpack = P.Unpack(axis=3)
        self.unstack = P.Unstack(axis=3)
        self.data_np = np.array([[[[[0, 0],
                                    [0, 1]],
                                   [[0, 0],
@@ -50,20 +50,21 @@ class UnpackNet(nn.Cell):

    @ms_function
    def construct(self):
        return self.unpack(self.x1)
        return self.unstack(self.x1)


 def unpack(nptype):
 def unstack(nptype):
    context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
    unpack_ = UnpackNet(nptype)
    output = unpack_()
    unstack_ = UnstackNet(nptype)
    output = unstack_()
    expect = (np.reshape(np.array([0] * 16).astype(nptype), (2, 2, 2, 2)),
              np.arange(2 * 2 * 2 * 2).reshape(2, 2, 2, 2).astype(nptype))

    for i, exp in enumerate(expect):
        assert (output[i].asnumpy() == exp).all()

 def unpack_pynative(nptype):

 def unstack_pynative(nptype):
    context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
    x1 = np.array([[[[[0, 0],
                      [0, 1]],
@@ -84,79 +85,91 @@ def unpack_pynative(nptype):
    x1 = Tensor(x1)
    expect = (np.reshape(np.array([0] * 16).astype(nptype), (2, 2, 2, 2)),
              np.arange(2 * 2 * 2 * 2).reshape(2, 2, 2, 2).astype(nptype))
    output = P.Unpack(axis=3)(x1)
    output = P.Unstack(axis=3)(x1)

    for i, exp in enumerate(expect):
        assert (output[i].asnumpy() == exp).all()


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_graph_float32():
    unpack(np.float32)
 def test_unstack_graph_float32():
    unstack(np.float32)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_graph_float16():
    unpack(np.float16)
 def test_unstack_graph_float16():
    unstack(np.float16)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_graph_int32():
    unpack(np.int32)
 def test_unstack_graph_int32():
    unstack(np.int32)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_graph_int16():
    unpack(np.int16)
 def test_unstack_graph_int16():
    unstack(np.int16)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_graph_uint8():
    unpack(np.uint8)
 def test_unstack_graph_uint8():
    unstack(np.uint8)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_graph_bool():
    unpack(np.bool)
 def test_unstack_graph_bool():
    unstack(np.bool)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_pynative_float32():
    unpack_pynative(np.float32)
 def test_unstack_pynative_float32():
    unstack_pynative(np.float32)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_pynative_float16():
    unpack_pynative(np.float16)
 def test_unstack_pynative_float16():
    unstack_pynative(np.float16)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_pynative_int32():
    unpack_pynative(np.int32)
 def test_unstack_pynative_int32():
    unstack_pynative(np.int32)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_pynative_int16():
    unpack_pynative(np.int16)
 def test_unstack_pynative_int16():
    unstack_pynative(np.int16)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_pynative_uint8():
    unpack_pynative(np.uint8)
 def test_unstack_pynative_uint8():
    unstack_pynative(np.uint8)


@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
 def test_unpack_pynative_bool():
    unpack_pynative(np.bool)
 def test_unstack_pynative_bool():
    unstack_pynative(np.bool)
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/pack_fission_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/pack_fission_test.cc
@@ -26,14 +26,14 @@ namespace mindspore {
 namespace opt {
 class TestHWPackFission : public BackendCommon {
 public:
  TestHWPackFission() : get_py_fun_("gtest_input.pre_activate.pack_fission_test", true) {}
  TestHWPackFission() : get_py_fun_("gtest_input.pre_activate.stack_fission_test", true) {}
  ~TestHWPackFission() override = default;

  UT::PyFuncGraphFetcher get_py_fun_;
 };

 TEST_F(TestHWPackFission, test_pack_fission_divided_by_3) {
  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_pack_fission", "before");
 TEST_F(TestHWPackFission, test_stack_fission_divided_by_3) {
  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_stack_fission", "before");
  EXPECT_NE(g, nullptr);
  std::vector<int64_t> shp{2, 32, 224, 224};
  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
@@ -51,13 +51,13 @@ TEST_F(TestHWPackFission, test_pack_fission_divided_by_3) {
  optimizer->AddPassManager(pm);
  FuncGraphPtr new_graph = optimizer->Optimize(kg);

  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_pack_fission", "after_divided_by_3");
  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_stack_fission", "after_divided_by_3");
  EXPECT_NE(g_after, nullptr);
  EXPECT_TRUE(CheckEqualGraph(g_after, new_graph));
 }

 TEST_F(TestHWPackFission, test_pack_fission_divided_by_4) {
  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_pack_fission", "before");
 TEST_F(TestHWPackFission, test_stack_fission_divided_by_4) {
  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_stack_fission", "before");
  EXPECT_NE(g, nullptr);
  std::vector<int64_t> shp{2, 32, 224, 224};
  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
@@ -75,7 +75,7 @@ TEST_F(TestHWPackFission, test_pack_fission_divided_by_4) {
  optimizer->AddPassManager(pm);
  FuncGraphPtr new_graph = optimizer->Optimize(kg);

  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_pack_fission", "after_divided_by_4");
  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_stack_fission", "after_divided_by_4");
  EXPECT_NE(g_after, nullptr);
  EXPECT_TRUE(CheckEqualGraph(g_after, new_graph));
 }
--- a/tests/ut/cpp/python_input/gtest_input/pre_activate/stack_fission_test.py
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/stack_fission_test.py
@@ -16,7 +16,7 @@
 from mindspore.ops import operations as P
 from mindspore.ops import Primitive

 pack = P.Pack()
 stack = P.Stack()
 concat = P.Concat()
 make_tuple = Primitive('make_tuple')

@@ -32,26 +32,26 @@ class FnDict:
        return self.fnDict[name]


 def test_pack_fission(tag):
 def test_stack_fission(tag):
    """ test_adam_apply_one_with_decay_rule """
    fns = FnDict()

    @fns
    def before(input0, input1, input2, input3, input4, input5, input6, input7, input8):
        return pack((input0, input1, input2, input3, input4, input5, input6, input7, input8))
        return stack((input0, input1, input2, input3, input4, input5, input6, input7, input8))

    @fns
    def after_divided_by_3(input0, input1, input2, input3, input4, input5, input6, input7, input8):
        pack1 = pack(input0, input1, input2)
        pack2 = pack(input3, input4, input5)
        pack3 = pack(input6, input7, input8)
        return make_tuple(concat(pack1, pack2, pack3))
        stack1 = stack(input0, input1, input2)
        stack2 = stack(input3, input4, input5)
        stack3 = stack(input6, input7, input8)
        return make_tuple(concat(stack1, stack2, stack3))

    @fns
    def after_divided_by_4(input0, input1, input2, input3, input4, input5, input6, input7, input8):
        pack1 = pack(input0, input1, input2, input3)
        pack2 = pack(input4, input5, input6, input7)
        pack3 = pack(input8)
        return make_tuple(concat(pack1, pack2, pack3))
        stack1 = stack(input0, input1, input2, input3)
        stack2 = stack(input4, input5, input6, input7)
        stack3 = stack(input8)
        return make_tuple(concat(stack1, stack2, stack3))

    return fns[tag]
--- a/tests/ut/python/ops/test_array_ops.py
+++ b/tests/ut/python/ops/test_array_ops.py
@@ -258,19 +258,19 @@ class SpaceToBatchNet(Cell):
 class PackNet(Cell):
    def __init__(self):
        super(PackNet, self).__init__()
        self.pack = P.Pack()
        self.stack = P.Stack()

    def construct(self, x):
        return self.pack((x, x))
        return self.stack((x, x))


 class UnpackNet(Cell):
    def __init__(self):
        super(UnpackNet, self).__init__()
        self.unpack = P.Unpack()
        self.unstack = P.Unstack()

    def construct(self, x):
        return self.unpack(x)
        return self.unstack(x)
 class SpaceToDepthNet(Cell):
    def __init__(self):
        super(SpaceToDepthNet, self).__init__()
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -115,9 +115,9 @@ class NetForConcat4(nn.Cell):
        return self.concat((x1, x2, x3))


 class NetForPackInput(nn.Cell):
 class NetForStackInput(nn.Cell):
    def __init__(self, op):
        super(NetForPackInput, self).__init__()
        super(NetForStackInput, self).__init__()
        self.op = op
        self.mul = P.Mul()

@@ -2272,32 +2272,32 @@ test_case_array_ops = [
                        Tensor(np.array([1], np.float32)),
                        Tensor(np.array([1], np.float32))],
        'desc_bprop': [[3,]]}),
    ('Pack_0', {
        'block': NetForPackInput(P.Pack()),
    ('Stack_0', {
        'block': NetForStackInput(P.Stack()),
        'desc_inputs': [[2, 2], [2, 2], [2, 2]],
        'desc_bprop': [[3, 2, 2]],
    }),
    ('Pack_1', {
        'block': NetForPackInput(P.Pack(axis=-2)),
    ('Stack_1', {
        'block': NetForStackInput(P.Stack(axis=-2)),
        'desc_inputs': [[3, 2, 3], [3, 2, 3], [3, 2, 3]],
        'desc_bprop': [[3, 2, 3, 3]],
    }),
    ('Pack_2', {
        'block': NetForPackInput(P.Pack()),
    ('Stack_2', {
        'block': NetForStackInput(P.Stack()),
        'desc_inputs': [[128, 128], [128, 128]],
        'desc_bprop': [[2, 128, 128]],
    }),
    ('Pack_3', {
        'block': NetForPackInput(P.Pack()),
    ('Stack_3', {
        'block': NetForStackInput(P.Stack()),
        'desc_inputs': [[2, 2]],
        'desc_bprop': [[1, 2, 2]]}),
    ('Unpack_0', {
        'block': NetForUnpackInput(P.Unpack(axis=0)),
        'block': NetForUnpackInput(P.Unstack(axis=0)),
        'desc_inputs': [[2, 4]],
        'desc_bprop': [[4], [4]],
    }),
    ('Unpack_1', {
        'block': NetForUnpackInput(P.Unpack(axis=-1)),
        'block': NetForUnpackInput(P.Unstack(axis=-1)),
        'desc_inputs': [Tensor(np.array([[1, 1, 1]], np.float32))],
        'desc_bprop': [[1], [1], [1]],
    }),
--- a/tests/ut/python/parallel/test_pack.py
+++ b/tests/ut/python/parallel/test_pack.py
@@ -26,7 +26,7 @@ from mindspore.nn import Dense, Flatten
 class Net(nn.Cell):
    def __init__(self, weight1, weight2, axis=0, strategy1=None, strategy2=None, is_parameter=True):
        super(Net, self).__init__()
        self.pack = P.Pack(axis=axis).shard(strategy1)
        self.pack = P.Stack(axis=axis).shard(strategy1)
        self.mul = P.Mul().shard(strategy2)
        if is_parameter:
            self.weight1 = Parameter(weight1, "w1")
@@ -43,7 +43,7 @@ class Net(nn.Cell):
 class Net1(nn.Cell):
    def __init__(self, weight1, weight2, axis=0, strategy1=None, strategy2=None):
        super(Net1, self).__init__()
        self.pack = P.Pack(axis=axis).shard(strategy1)
        self.pack = P.Stack(axis=axis).shard(strategy1)
        self.mul = P.Mul().shard(strategy2)
        self.weight1 = Parameter(weight1, "w1")
        self.weight2 = Parameter(weight2, "w2")
@@ -57,7 +57,7 @@ class Net1(nn.Cell):
 class Net2(nn.Cell):
    def __init__(self, weight1, weight2, weight3, axis=0, strategy1=None, strategy2=None, is_parameter=True):
        super(Net2, self).__init__()
        self.pack = P.Pack(axis=axis).shard(strategy1)
        self.pack = P.Stack(axis=axis).shard(strategy1)
        self.mul = P.Mul().shard(strategy2)
        if is_parameter:
            self.weight1 = Parameter(weight1, "w1")
@@ -85,7 +85,7 @@ class PackConstantNet1(nn.Cell):
                           bias_init=Tensor(bias_np),
                           has_bias=True)
        self.mul = P.Mul()
        self.pack = P.Pack(axis)
        self.pack = P.Stack(axis)
        if strategy is not None:
            self.pack.shard(strategy)

@@ -112,7 +112,7 @@ class PackConstantNet2(nn.Cell):
                           bias_init=Tensor(bias_np),
                           has_bias=True)
        self.mul = P.Mul()
        self.pack = P.Pack(axis)
        self.pack = P.Stack(axis)
        if strategy is not None:
            self.pack.shard(strategy)