From f51d5fc411104153c8169a1b6313b89412faa64e Mon Sep 17 00:00:00 2001
From: wsc <wangshaocong1@huawei.com>
Date: Mon, 20 Apr 2020 16:34:00 +0800
Subject: [PATCH 001/242] Add interface to get the attributes of network

---
 mindspore/nn/cell.py              | 9 ++++++++-
 mindspore/nn/wrap/cell_wrapper.py | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py
index 5507d12af8..853abff0b6 100755
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@@ -56,7 +56,7 @@ class Cell:
         >>>    def construct(self, x):
         >>>        return self.relu(x)
     """
-    def __init__(self, auto_prefix=True):
+    def __init__(self, auto_prefix=True, flags=None):
         self._params = OrderedDict()
         self._cells = OrderedDict()
         self.training = False
@@ -74,6 +74,8 @@ class Cell:
         if _get_parallel_mode() in ["auto_parallel", "semi_auto_parallel"]:
             self._get_construct_inputs_number_and_name()
         self._parallel_inputs_run = None
+        if flags:
+            self.add_flags(**flags)
 
     @property
     def create_time(self):
@@ -603,6 +605,11 @@ class Cell:
             cell.add_flags_recursive(**flags)
         return self
 
+    def get_flags(self):
+        if not hasattr(self, "_mindspore_flags"):
+            self._mindspore_flags = {}
+        return self._mindspore_flags
+
     def to_float(self, dst_type):
         """
         Add cast on all inputs of cell and child cells to run with certain float type.
diff --git a/mindspore/nn/wrap/cell_wrapper.py b/mindspore/nn/wrap/cell_wrapper.py
index 64c382557a..453ddae0fc 100644
--- a/mindspore/nn/wrap/cell_wrapper.py
+++ b/mindspore/nn/wrap/cell_wrapper.py
@@ -226,7 +226,7 @@ class DataWrapper(Cell):
     """
 
     def __init__(self, network, dataset_types, dataset_shapes, queue_name):
-        super(DataWrapper, self).__init__(auto_prefix=False)
+        super(DataWrapper, self).__init__(auto_prefix=False, flags=network.get_flags())
 
         self.get_next = P.GetNext(dataset_types, dataset_shapes, len(dataset_types), queue_name)
         self.network = network

From 6328edf455bd28ca3fd6bf90d19059c6cc4a54e2 Mon Sep 17 00:00:00 2001
From: panfengfeng <panfengfeng@huawei.com>
Date: Wed, 22 Apr 2020 16:27:35 +0800
Subject: [PATCH 002/242] modify dataset & mindrecord log compile

---
 mindspore/ccsrc/dataset/CMakeLists.txt    | 8 ++++++--
 mindspore/ccsrc/mindrecord/CMakeLists.txt | 6 ++++++
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/dataset/CMakeLists.txt b/mindspore/ccsrc/dataset/CMakeLists.txt
index 879a9346bc..dc93d18a28 100644
--- a/mindspore/ccsrc/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/CMakeLists.txt
@@ -71,7 +71,6 @@ else ()
     add_library(_c_dataengine SHARED ${submodules})
 endif ()
 
-
 set_target_properties(_c_dataengine PROPERTIES
     PREFIX "${PYTHON_MODULE_PREFIX}"
     SUFFIX "${PYTHON_MODULE_EXTENSION}"
@@ -110,5 +109,10 @@ endif()
 
 if (USE_GLOG)
     target_link_libraries(_c_dataengine PRIVATE mindspore::glog)
+else()
+    if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+        target_link_options(_c_dataengine PRIVATE -Wl,-init,mindspore_log_init)
+    elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
+        set_target_properties(_c_dataengine PROPERTIES MACOSX_RPATH ON)
+    endif ()
 endif()
-
diff --git a/mindspore/ccsrc/mindrecord/CMakeLists.txt b/mindspore/ccsrc/mindrecord/CMakeLists.txt
index fdd648a50f..f523eae569 100644
--- a/mindspore/ccsrc/mindrecord/CMakeLists.txt
+++ b/mindspore/ccsrc/mindrecord/CMakeLists.txt
@@ -34,4 +34,10 @@ endif()
 
 if (USE_GLOG)
     target_link_libraries(_c_mindrecord PRIVATE mindspore::glog)
+else()
+    if (CMAKE_SYSTEM_NAME MATCHES "Linux")
+        target_link_options(_c_mindrecord PRIVATE -Wl,-init,mindspore_log_init)
+    elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
+        set_target_properties(_c_mindrecord PROPERTIES MACOSX_RPATH ON)
+    endif ()
 endif()

From d5597e6d1fefaad52a804dc5af4a6cd9ebc5baae Mon Sep 17 00:00:00 2001
From: seatea <t.cai@huawei.com>
Date: Wed, 22 Apr 2020 14:17:45 +0800
Subject: [PATCH 003/242] Fix bug for `ParseAttribute`.

---
 mindspore/ccsrc/pipeline/parse/parse.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/pipeline/parse/parse.cc b/mindspore/ccsrc/pipeline/parse/parse.cc
index 51c4fc17ec..b2f74d44b8 100644
--- a/mindspore/ccsrc/pipeline/parse/parse.cc
+++ b/mindspore/ccsrc/pipeline/parse/parse.cc
@@ -594,8 +594,9 @@ AnfNodePtr Parser::ParseAttribute(const FunctionBlockPtr &block, const py::objec
       std::string var_name = "self.";
       std::string attr_name = node.attr("attr").cast<std::string>();
       (void)var_name.append(attr_name);
+      auto obj = ast()->obj().attr(attr_name.c_str());
       if (py::hasattr(ast()->obj(), attr_name.c_str()) &&
-          py::hasattr(ast()->obj().attr(attr_name.c_str()), PYTHON_PRIMITIVE_FLAG)) {
+          (data_converter::IsCellInstance(obj) || py::hasattr(obj, PYTHON_PRIMITIVE_FLAG))) {
         return block->MakeResolveSymbol(var_name);
       } else {
         return block->ReadVariable(var_name);

From a9e926614922fbf19e0b42249e19a71cb9986751 Mon Sep 17 00:00:00 2001
From: hesham <h.farahat@huawei.com>
Date: Sat, 18 Apr 2020 22:32:53 -0400
Subject: [PATCH 004/242] Deepcopy problem when pyfunc cannot be pickled

---
 mindspore/dataset/engine/datasets.py     | 36 ++++++++++++++++++++++++
 tests/ut/python/dataset/test_iterator.py | 32 ++++++++++++++++++++-
 2 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 1648734704..8625f1e003 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -30,7 +30,9 @@ from enum import Enum
 from importlib import import_module
 import threading
 
+import copy
 import numpy as np
+
 from mindspore._c_dataengine import DataType, TFReaderOp, ImageFolderOp, CifarOp, MnistOp, ManifestOp, \
     MindRecordOp, TextFileOp, CBatchInfo
 from mindspore._c_expression import typing
@@ -1376,6 +1378,23 @@ class MapDataset(DatasetOp):
         """
         return self.input[0].get_dataset_size()
 
+    def __deepcopy__(self, memodict):
+        if id(self) in memodict:
+            return memodict[id(self)]
+        cls = self.__class__
+        new_op = cls.__new__(cls)
+        memodict[id(self)] = new_op
+        new_op.input = copy.deepcopy(self.input, memodict)
+        new_op.input_columns = copy.deepcopy(self.input_columns, memodict)
+        new_op.output_columns = copy.deepcopy(self.output_columns, memodict)
+        new_op.columns_order = copy.deepcopy(self.columns_order, memodict)
+        new_op.num_parallel_workers = copy.deepcopy(self.num_parallel_workers, memodict)
+        new_op.output = copy.deepcopy(self.output, memodict)
+        new_op.input_indexs = copy.deepcopy(self._input_indexs, memodict)
+        new_op.python_multiprocessing = copy.deepcopy(self.python_multiprocessing, memodict)
+        new_op.operations = self.operations
+        return new_op
+
     # Iterator bootstrap will be called on iterator construction.
     # A deep copy of Dataset object is created prior of iterator_bootstrap.
     # This method will create per iterator process pool and bind pyfunc execution to the pool.
@@ -2599,6 +2618,23 @@ class GeneratorDataset(SourceDataset):
         else:
             raise ValueError('set dataset_size with negative value {}'.format(value))
 
+    def __deepcopy__(self, memodict):
+        if id(self) in memodict:
+            return memodict[id(self)]
+        cls = self.__class__
+        new_op = cls.__new__(cls)
+        memodict[id(self)] = new_op
+        new_op.input = copy.deepcopy(self.input, memodict)
+        new_op.output = copy.deepcopy(self.output, memodict)
+        new_op.num_parallel_workers = copy.deepcopy(self.num_parallel_workers, memodict)
+        new_op.column_types = copy.deepcopy(self.column_types, memodict)
+        new_op.column_names = copy.deepcopy(self.column_names, memodict)
+
+        new_op.source = self.source
+        new_op.sampler = self.sampler
+
+        return new_op
+
 
 class TFRecordDataset(SourceDataset):
     """
diff --git a/tests/ut/python/dataset/test_iterator.py b/tests/ut/python/dataset/test_iterator.py
index 7c69adf561..58beecbe16 100644
--- a/tests/ut/python/dataset/test_iterator.py
+++ b/tests/ut/python/dataset/test_iterator.py
@@ -14,7 +14,7 @@
 # ==============================================================================
 import numpy as np
 import pytest
-
+import copy
 import mindspore.dataset as ds
 from mindspore.dataset.engine.iterators import ITERATORS_LIST, _cleanup
 
@@ -81,3 +81,33 @@ def test_iterator_weak_ref():
     assert sum(itr() is not None for itr in ITERATORS_LIST) == 2
 
     _cleanup()
+
+
+class MyDict(dict):
+    def __getattr__(self, key):
+        return self[key]
+
+    def __setattr__(self, key, value):
+        self[key] = value
+
+    def __call__(self, t):
+        return t
+
+
+def test_tree_copy():
+    #  Testing copying the tree with a pyfunc that cannot be pickled
+
+    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=COLUMNS)
+    data1 = data.map(operations=[MyDict()])
+
+    itr = data1.create_tuple_iterator()
+
+    assert id(data1) != id(itr.dataset)
+    assert id(data) != id(itr.dataset.input[0])
+    assert id(data1.operations[0]) == id(itr.dataset.operations[0])
+
+    itr.release()
+
+
+if __name__ == '__main__':
+    test_tree_copy()
\ No newline at end of file

From b7076d260e155cb89720ae55796076e5f9af4b27 Mon Sep 17 00:00:00 2001
From: lyfne <linyifan@huawei.com>
Date: Wed, 8 Apr 2020 20:46:01 +0800
Subject: [PATCH 005/242] Do cse graph by graph

---
 mindspore/ccsrc/optimizer/cse.cc | 30 +++++++++++++++---------------
 mindspore/ccsrc/optimizer/cse.h  |  1 +
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/mindspore/ccsrc/optimizer/cse.cc b/mindspore/ccsrc/optimizer/cse.cc
index 82050f6108..42ebf5a658 100644
--- a/mindspore/ccsrc/optimizer/cse.cc
+++ b/mindspore/ccsrc/optimizer/cse.cc
@@ -40,14 +40,14 @@ BasePtr AbsOf(const AnfNodePtr &node) {
   return node_abs;
 }
 
-namespace {
-void BuildOrderGroup(const FuncGraphManagerPtr manager, std::vector<std::size_t> *const order_group,
-                     std::unordered_map<std::size_t, std::vector<AnfNodePtr>> *groups) {
-  MS_EXCEPTION_IF_NULL(order_group);
-
-  std::unordered_map<AnfNodePtr, std::size_t> hashes;
+bool CSE::BuildOrderGroupAndDoReplace(const FuncGraphManagerPtr manager) const {
+  bool changed = false;
   for (FuncGraphPtr fg : manager->func_graphs()) {
     MS_EXCEPTION_IF_NULL(fg);
+    std::vector<std::size_t> order_group;
+    std::unordered_map<std::size_t, std::vector<AnfNodePtr>> groups;
+    std::unordered_map<AnfNodePtr, std::size_t> hashes;
+
     std::vector<AnfNodePtr> toposet = TopoSort(fg->get_return());
     for (auto node : toposet) {
       MS_EXCEPTION_IF_NULL(node);
@@ -75,17 +75,20 @@ void BuildOrderGroup(const FuncGraphManagerPtr manager, std::vector<std::size_t>
       }
 
       hashes[node] = h;
-      if (groups->find(h) == groups->end()) {
+      if (groups.find(h) == groups.end()) {
         std::vector<AnfNodePtr> innervec({node});
-        (*groups)[h] = innervec;
-        order_group->emplace_back(h);
+        groups[h] = innervec;
+        order_group.emplace_back(h);
       } else {
-        (*groups)[h].push_back(node);
+        groups[h].push_back(node);
       }
     }
+
+    changed = DoReplace(manager, order_group, &groups) || changed;
   }
+
+  return changed;
 }
-}  // namespace
 
 bool CSE::CheckReplace(const AnfNodePtr &main, const AnfNodePtr &node) const {
   MS_EXCEPTION_IF_NULL(main);
@@ -177,10 +180,7 @@ bool CSE::Cse(const FuncGraphPtr root, const FuncGraphManagerPtr manager) const
   MS_EXCEPTION_IF_NULL(manager);
   manager->AddFuncGraph(root);
 
-  std::unordered_map<std::size_t, std::vector<AnfNodePtr>> groups;
-  std::vector<std::size_t> order_group;
-  BuildOrderGroup(manager, &order_group, &groups);
-  return DoReplace(manager, order_group, &groups);
+  return BuildOrderGroupAndDoReplace(manager);
 }
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/optimizer/cse.h b/mindspore/ccsrc/optimizer/cse.h
index 823b24edb7..544e6cb6a3 100644
--- a/mindspore/ccsrc/optimizer/cse.h
+++ b/mindspore/ccsrc/optimizer/cse.h
@@ -46,6 +46,7 @@ class CSE {
   bool Cse(const FuncGraphPtr root, const FuncGraphManagerPtr manager) const;
 
  private:
+  bool BuildOrderGroupAndDoReplace(const FuncGraphManagerPtr manager) const;
   bool DoReplace(const FuncGraphManagerPtr manager, const std::vector<std::size_t> &order_group,
                  std::unordered_map<std::size_t, std::vector<AnfNodePtr>> *groups) const;
   bool report_changes_;

From 8a988ce0d170cda7bd0cc89354b5011b623a48e9 Mon Sep 17 00:00:00 2001
From: zhoufeng <zhoufeng54@huawei.com>
Date: Thu, 23 Apr 2020 11:11:54 +0800
Subject: [PATCH 006/242] add vc runtime dll to package

---
 cmake/package.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmake/package.cmake b/cmake/package.cmake
index d35ce0463b..1961b10025 100644
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -153,10 +153,11 @@ endif ()
 if (CMAKE_SYSTEM_NAME MATCHES "Windows")
     get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH)
     file(GLOB CXX_LIB_LIST ${CXX_DIR}/*.dll)
+    file(GLOB VC_LIB_LIST $ENV{SystemRoot}/System32/msvcp*.dll $ENV{SystemRoot}/System32/vcomp*.dll)
     file(GLOB JPEG_LIB_LIST ${jpeg_turbo_LIBPATH}/*.dll)
     file(GLOB SQLITE_LIB_LIST ${sqlite_LIBPATH}/*.dll)
     install(
-        FILES ${CXX_LIB_LIST} ${JPEG_LIB_LIST} ${SQLITE_LIB_LIST}
+        FILES ${CXX_LIB_LIST} ${JPEG_LIB_LIST} ${SQLITE_LIB_LIST} ${VC_LIB_LIST}
         DESTINATION ${INSTALL_LIB_DIR}
         COMPONENT mindspore
     )

From 1555f1d8093d735d4b6a644d62f4279222f8c289 Mon Sep 17 00:00:00 2001
From: dengwentao <dengwentao1@huawei.com>
Date: Thu, 23 Apr 2020 11:24:10 +0800
Subject: [PATCH 007/242] modify tbe attr log

---
 mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
index 5255cc6450..9ec20b3fbb 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
@@ -385,7 +385,8 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_no
       (*attrs_json).push_back(attr_obj);
     } else {
       if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD && op_info->impl_path() != "") {
-        MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name << "is required, but not set.";
+        MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name
+                          << " is required, but not set.";
       }
     }
   }

From 5baee3797837f54f05933b8e840abc440fcc5b2e Mon Sep 17 00:00:00 2001
From: xulei2020 <“xulei83@huawei.com”>
Date: Wed, 22 Apr 2020 11:10:15 +0800
Subject: [PATCH 008/242] add code

---
 mindspore/dataset/engine/datasets.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 1648734704..c0b3583c1c 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -428,11 +428,11 @@ class Dataset:
              If input_columns not provided or empty, all columns will be used.
 
         Args:
-            predicate: python callable which returns a boolean value.
-            input_columns: (list[str]): List of names of the input columns, when
-            default=None, the predicate will be applied on all columns in the dataset.
+            predicate(callable): python callable which returns a boolean value.
+            input_columns: (list[str], optional): List of names of the input columns, when
+                default=None, the predicate will be applied on all columns in the dataset.
             num_parallel_workers (int, optional): Number of workers to process the Dataset
-            in parallel (default=None).
+                in parallel (default=None).
 
         Returns:
             FilterDataset, dataset filter.

From 0cef4d8abd302c6a6e75dfb21ab063595f948aa9 Mon Sep 17 00:00:00 2001
From: caifubi <caifubi1@huawei.com>
Date: Wed, 22 Apr 2020 20:08:13 +0800
Subject: [PATCH 009/242] set fp_start profiling point after getnext node

---
 .../ascend/profiling/profiling_utils.cc       | 57 ++++++++++++++++---
 .../device/ascend/profiling/profiling_utils.h |  5 ++
 mindspore/ccsrc/device/kernel_adjust.cc       |  5 +-
 3 files changed, 58 insertions(+), 9 deletions(-)

diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
index 7960a08938..fdfff96fde 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
@@ -73,9 +73,45 @@ void ProfilingUtils::GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order,
 
 std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order) {
   const char *trace_begin = std::getenv(kFpStartNode);
-  auto &first_cnode = cnode_exec_order.front();
-  MS_EXCEPTION_IF_NULL(first_cnode);
-  return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin);
+  if (trace_begin != nullptr) {
+    return std::string(trace_begin);
+  }
+
+  std::string fp_start_str = "";
+  std::set<std::string> getnext_outputs;
+  GetCNodeOutputRealNode(kGetNextOpName, cnode_exec_order, NOT_NULL(&getnext_outputs));
+  if (getnext_outputs.empty()) {
+    auto first_node = cnode_exec_order.front();
+    MS_EXCEPTION_IF_NULL(first_node);
+    fp_start_str = first_node->fullname_with_scope();
+  } else {
+    for (auto &cnode : cnode_exec_order) {
+      if (getnext_outputs.count(cnode->fullname_with_scope()) != 0) {
+        fp_start_str = cnode->fullname_with_scope();
+        break;
+      }
+    }
+  }
+  return fp_start_str;
+}
+
+void ProfilingUtils::GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order,
+                                            NotNull<std::set<std::string> *> getnext_outputs) {
+  for (auto cnode : cnode_exec_order) {
+    for (auto input : cnode->inputs()) {
+      auto prev_cnode = AnfAlgo::VisitKernel(input, 0);
+      if (!prev_cnode.first->isa<CNode>()) {
+        continue;
+      }
+      if (AnfAlgo::GetCNodeName(prev_cnode.first) == node_name) {
+        getnext_outputs->insert(cnode->fullname_with_scope());
+        MS_LOG(INFO) << "Find GetNext Output CNode:" << cnode->fullname_with_scope();
+      }
+    }
+  }
+  if (getnext_outputs->empty()) {
+    MS_LOG(WARNING) << "GetNext not found";
+  }
 }
 
 std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order) {
@@ -158,17 +194,22 @@ void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node
                                            NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
   if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) {
     MS_LOG(INFO) << "Profiling Match FpStart:" << profiling_trace_info.trace_begin;
-    auto job_id = ProfilingManager::GetInstance().GetJobId();
-    ProfilingContent job_profiling_context = {false, job_id, 0};
-    auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr);
-    kernel_list->emplace_back(job_profiling_node);
-
+    ProfilingTraceJobId(anf_node, graph_ptr, kernel_list);
     ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0};
     auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr);
     kernel_list->emplace_back(fp_profiling_node);
   }
 }
 
+void ProfilingUtils::ProfilingTraceJobId(const AnfNodePtr &anf_node, NotNull<session::KernelGraph *> graph_ptr,
+                                         NotNull<std::vector<CNodePtr> *> kernel_list) {
+  MS_LOG(INFO) << "Profiling Match start";
+  auto job_id = ProfilingManager::GetInstance().GetJobId();
+  ProfilingContent job_profiling_context = {false, job_id, 0};
+  auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr);
+  kernel_list->emplace_back(job_profiling_node);
+}
+
 CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node,
                                                         const ProfilingContent &profiling_content,
                                                         NotNull<session::KernelGraph *> graph_ptr) {
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
index f9f08c9d3f..1f7815b320 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
@@ -65,6 +65,9 @@ class ProfilingUtils {
                                     NotNull<session::KernelGraph *> graph_ptr,
                                     NotNull<std::vector<CNodePtr> *> kernel_list);
 
+  static void ProfilingTraceJobId(const AnfNodePtr &anf_node, NotNull<session::KernelGraph *> graph_ptr,
+                                  NotNull<std::vector<CNodePtr> *> kernel_list);
+
   // Insert net output profiling node, which tells the device to stop profiling.
   // The notify in struct ProfilingContent should be 'true', which tells the device to send data to host.
   static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
@@ -113,6 +116,8 @@ class ProfilingUtils {
   static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order);
   static void GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order,
                            NotNull<ProfilingTraceInfo *> profiling_trace);
+  static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order,
+                                     NotNull<std::set<std::string> *> getnext_outputs);
 
   // graph id --> (kernel name list)
   static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name_;
diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc
index b557436db9..e8f38aa339 100644
--- a/mindspore/ccsrc/device/kernel_adjust.cc
+++ b/mindspore/ccsrc/device/kernel_adjust.cc
@@ -464,10 +464,13 @@ void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_tra
   }
   std::vector<CNodePtr> new_cnode_list;
   std::vector<CNodePtr> cnode_ptr_list = kernel_graph_ptr->execution_order();
+  if (cnode_ptr_list.empty()) {
+    MS_LOG(ERROR) << "No CNode in graph";
+    return;
+  }
   for (const auto &cnode_ptr : cnode_ptr_list) {
     ProfilingUtils::ProfilingTraceFpStart(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
     new_cnode_list.emplace_back(cnode_ptr);
-
     ProfilingUtils::ProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
     ProfilingUtils::ProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
     ProfilingUtils::ProfilingTraceEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));

From 663d597330039ed6f0ced66cdc7139250fde795e Mon Sep 17 00:00:00 2001
From: candanzg <zhangshucheng@huawei.com>
Date: Thu, 23 Apr 2020 09:00:33 +0800
Subject: [PATCH 010/242] tensor assign with slice index

Signed-off-by: candanzg <zhangshucheng@huawei.com>
---
 mindspore/_extends/parse/__init__.py          |   4 +-
 mindspore/_extends/parse/parser.py            |   5 +
 mindspore/_extends/utils.py                   |  11 +
 mindspore/ccsrc/ir/value.h                    |   3 +
 mindspore/ccsrc/pipeline/parse/parse_base.h   |   2 +
 .../ccsrc/pipeline/static_analysis/prim.cc    |   7 +
 mindspore/ccsrc/utils/convert_utils.cc        |   8 +
 .../multitype_ops/_multitype_ops_util.py      | 117 ++++++++++
 .../composite/multitype_ops/setitem_impl.py   | 200 +++++++++++++++---
 mindspore/ops/functional.py                   |   1 +
 tests/ut/python/ops/test_tensor_slice.py      | 109 +++++++++-
 11 files changed, 432 insertions(+), 35 deletions(-)

diff --git a/mindspore/_extends/parse/__init__.py b/mindspore/_extends/parse/__init__.py
index f8a34057c5..9366b5a2d2 100644
--- a/mindspore/_extends/parse/__init__.py
+++ b/mindspore/_extends/parse/__init__.py
@@ -18,7 +18,7 @@ Interfaces for parser module in c++.
 
 from .parser import (Parser, create_obj_instance, generate_scope,
                      get_bprop_method_of_class, get_class_instance_type,
-                     get_class_member_namespace_symbol,
+                     get_class_member_namespace_symbol, create_slice_obj,
                      get_dataclass_attributes, get_dataclass_methods,
                      get_module_namespace, get_obj_type, get_object_key,
                      get_parse_method_of_class, get_scope_name,
@@ -29,4 +29,4 @@ __all__ = ['parse_cb', 'get_parse_method_of_class', 'get_bprop_method_of_class',
            'get_object_key', 'get_class_instance_type', 'is_class_member', 'get_obj_type',
            'create_obj_instance', 'get_module_namespace', 'get_class_member_namespace_symbol',
            'Parser', 'get_dataclass_attributes', 'get_dataclass_methods', 'dump_obj', 'load_obj',
-           'get_dataclass_methods', 'get_scope_name']
+           'get_dataclass_methods', 'get_scope_name', 'create_slice_obj']
diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py
index e88c9c15e9..d8039cd56a 100644
--- a/mindspore/_extends/parse/parser.py
+++ b/mindspore/_extends/parse/parser.py
@@ -29,6 +29,7 @@ from mindspore.common.dtype import pytype_to_dtype
 from mindspore.common.api import _MindSporeFunction
 from .namespace import CellNamespace, ClosureNamespace, ClassMemberNamespace
 from .resources import parse_object_map, convert_object_map, trope_ns, SYMBOL_UNDEFINE, NO_IMPLEMENT
+from ..utils import Slice
 
 # define return value
 RET_SUCCESS = 0
@@ -69,6 +70,10 @@ parse_expr_statement_white_list = (
     "append",
 )
 
+def create_slice_obj(start, end, step):
+    """Create Slice object"""
+    return Slice(start, end, step)
+
 
 def parse_cb(func, parse_method=None):
     """Implements the function of parse."""
diff --git a/mindspore/_extends/utils.py b/mindspore/_extends/utils.py
index 8469ddda8b..d0457607b5 100644
--- a/mindspore/_extends/utils.py
+++ b/mindspore/_extends/utils.py
@@ -19,6 +19,7 @@ import logging
 import os
 import inspect
 from functools import wraps
+from dataclasses import dataclass
 
 
 def cal_sha256(file_path):
@@ -99,3 +100,13 @@ def cell_attr_register(fn=None, attrs=None):
     if fn is not None:
         return wrap_cell(fn)
     return wrap_cell
+
+
+@dataclass
+class Slice:
+    """
+    Slice class
+    """
+    start: int
+    end: int
+    step: int
diff --git a/mindspore/ccsrc/ir/value.h b/mindspore/ccsrc/ir/value.h
index c80e22f735..160eac7b5c 100644
--- a/mindspore/ccsrc/ir/value.h
+++ b/mindspore/ccsrc/ir/value.h
@@ -123,6 +123,9 @@ class ValueSlice : public Value {
 
   abstract::AbstractBasePtr ToAbstract() override;
   std::string DumpText() const override { return ToString(); }
+  ValuePtr start() const { return start_; }
+  ValuePtr stop() const { return stop_; }
+  ValuePtr step() const { return step_; }
 
  private:
   ValuePtr start_;
diff --git a/mindspore/ccsrc/pipeline/parse/parse_base.h b/mindspore/ccsrc/pipeline/parse/parse_base.h
index aad8be0d6e..a3ca67b60a 100644
--- a/mindspore/ccsrc/pipeline/parse/parse_base.h
+++ b/mindspore/ccsrc/pipeline/parse/parse_base.h
@@ -79,6 +79,8 @@ const char PYTHON_PARSE_EXPAND_EXPR_STATEMENT[] = "expand_expr_statement";
 const char PYTHON_PARSE_GENERATE_SCOPE[] = "generate_scope";
 const char PYTHON_PARSE_GET_SCOPE_NAME[] = "get_scope_name";
 
+const char PYTHON_PARSE_CLASS_SLICE[] = "create_slice_obj";
+
 // define the common name
 const char NAMED_PRIMITIVE_ITER[] = "iter";
 const char NAMED_PRIMITIVE_NEXT[] = "next";
diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
index 233d5df305..46e088ab11 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
@@ -289,6 +289,13 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) {
     dic["shape"] = shape;
     dic["dtype"] = abs_base->BuildType();
     dic["value"] = BuildValue(abs_base->BuildValue());
+  } else if (abs_base->isa<AbstractSlice>()) {
+    auto arg_slice = dyn_cast<AbstractSlice>(abs_base);
+    std::vector<int> shape;
+    dic["shape"] = shape;
+    dic["dtype"] = arg_slice->BuildType();
+    dic["value"] = BuildValue(arg_slice->BuildValue());
+
   } else if (abs_base->isa<AbstractTuple>()) {
     auto arg_tuple = dyn_cast<AbstractTuple>(abs_base);
     size_t len = arg_tuple->size();
diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc
index e840ff8734..049c1dcdb8 100644
--- a/mindspore/ccsrc/utils/convert_utils.cc
+++ b/mindspore/ccsrc/utils/convert_utils.cc
@@ -28,6 +28,7 @@
 
 #include "ir/meta_tensor.h"
 #include "pipeline/parse/parse.h"
+#include "pipeline/parse/parse_base.h"
 #include "ir/value.h"
 
 namespace mindspore {
@@ -97,6 +98,13 @@ py::object ValuePtrToPyData(const ValuePtr &value) {
       i++;
     }
     ret = rets;
+  } else if (value->isa<ValueSlice>()) {
+    auto slice = value->cast<ValueSlicePtr>();
+    auto start = ValuePtrToPyData(slice->start());
+    auto end = ValuePtrToPyData(slice->stop());
+    auto step = ValuePtrToPyData(slice->step());
+    ret = parse::python_adapter::CallPyFn(parse::PYTHON_MOD_PARSE_MODULE, parse::PYTHON_PARSE_CLASS_SLICE, start, end,
+                                          step);
   } else if (value->isa<Type>()) {
     py::tuple v(1);
     v[0] = value->cast<TypePtr>();
diff --git a/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py b/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py
index b3687c553c..49773ff8ad 100644
--- a/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py
+++ b/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py
@@ -15,7 +15,43 @@
 
 """constexpr util"""
 
+import numpy as np
 from ...primitive import constexpr
+from ....common.tensor import Tensor
+from ....common import dtype as mstype
+from ...._extends.utils import Slice
+
+@constexpr
+def check_equal(param1, param2, msg="{},{}"):
+    if param1 != param2:
+        raise ValueError(msg.format(param1, param2))
+    return param1
+
+@constexpr
+def check_tensor_setitem_index(index, element_type=None):
+    """Check tuple index type of tensor assignment."""
+    if index is None:
+        raise ValueError("Tensor's index cannot be None.")
+    # eg. Tensor[Slice] = u
+    if isinstance(index, Slice):
+        return True
+    # eg. Tensor[Tuple] = u
+    if isinstance(index, tuple):
+        if not index:
+            raise ValueError("Tensor's index cannot be empty.")
+        # eg. Tensor[Tuple(Slice...)] = u
+        if not isinstance(index[0], Slice):
+            raise ValueError("Index of type '{}' is not supported yet.".format(type(index[0])))
+        return True
+    # eg. Tensor[Tensor[dtype=bool]] = u
+    if index == mstype.tensor:
+        if element_type is None or element_type != mstype.bool_:
+            raise ValueError(
+                "The index of tensor should be a bool type tensor. \
+                {} type is not supported yet.".format(element_type))
+        return True
+
+    raise ValueError("Index of type '{}' is not supported yet.".format(type(index)))
 
 
 @constexpr
@@ -43,3 +79,84 @@ def error_msg(msg="", format_values=""):
     """
 
     raise ValueError(msg.format(*format_values))
+
+def slice_expand(input_slices, shape):
+    """
+    Convert slice to indices.
+
+    Inputs:
+        slices (List or Tuple(List, ...)): Slice tuple or slice.
+        shape (Tuple): The shape of a sensor is an integer element tuple.
+
+    Outputs:
+        (List, List, List), This is expressed as (begins, ends, strides).
+    """
+    begin = []
+    end = []
+    strides = []
+    index = 0
+    slices = None
+    # Slice or Tuple(Slice...)
+    if isinstance(input_slices, Slice):
+        slices = (input_slices,)
+    elif isinstance(input_slices, (tuple, list)) and input_slices and isinstance(input_slices[0], Slice):
+        slices = input_slices
+    else:
+        raise ValueError("Tensor's index type is not supported yet.")
+
+    for s in slices:
+        start = 0 if (s.start is None) else s.start
+        stop = shape[index] if (s.end is None) else s.end
+        step = 1 if (s.step is None) else s.step
+        begin.append(start)
+        end.append(stop)
+        strides.append(step)
+        index += 1
+    while index < len(shape):
+        begin.append(0)
+        end.append(shape[index])
+        strides.append(1)
+        index += 1
+    return begin, end, strides
+
+@constexpr
+def slice2indices(input_slices, shape):
+    """
+    Convert slice to indices.
+
+    Inputs:
+        slices (List or Tuple(List, ...)): Slice tuple or slice.
+        shape (Tuple): The shape of a sensor is an integer element tuple.
+
+    Outputs:
+        Tensor, the shape is (n, 1).
+    """
+    begin, end, strides = slice_expand(input_slices, shape)
+    np_r = []
+    for i, element in enumerate(shape):
+        s = begin[i] if (begin[i] >= 0) else (element + begin[i])
+        e = end[i] if (end[i] >= 0) else (element + end[i])
+        np_r.append(np.r_[s:e:strides[i]])
+    # Reference: np.ravel_multi_index((np.ix_(np.r_[1:3:1], np.r_[0:4:1], np.r_[4:0:-1])), a.shape)
+    np_ix = np.ix_(*np_r)
+    ravel = np.ravel_multi_index(np_ix, shape)
+    ravel = Tensor(ravel.reshape(-1, 1), dtype=mstype.int32)
+    return ravel
+
+@constexpr
+def check_indices(indices_size, index):
+    if indices_size < 1:
+        raise ValueError("The tensor's index is unreasonable. index:{}".format(index))
+    return indices_size
+
+
+@constexpr
+def check_indices_value_size(indices_size, value_size):
+    if value_size < 1:
+        raise ValueError("The value assigned to tensor cannot be empty.")
+    if value_size > 1:
+        if value_size != indices_size:
+            raise ValueError(
+                "The value given to tensor does not match the index size. \
+                value size:{}, indics size:{}".format(value_size, indices_size))
+    return value_size
diff --git a/mindspore/ops/composite/multitype_ops/setitem_impl.py b/mindspore/ops/composite/multitype_ops/setitem_impl.py
index 31c96932c5..742ee57166 100644
--- a/mindspore/ops/composite/multitype_ops/setitem_impl.py
+++ b/mindspore/ops/composite/multitype_ops/setitem_impl.py
@@ -138,25 +138,23 @@ def _tensor_setitem_by_tensor_v1(data, index, value_tensor):
     Outputs:
         Tensor, element type and shape is same as data.
     """
+    result = None
     index_dtype = F.dtype(index)
     index_shape = F.shape(index)
-    is_bool = mult_util.is_same_type(index_dtype, mstype.bool_)
-    if not is_bool:
-        return mult_util.error_msg(
-            "The tensor index should be a bool type tensor. {} type tensor is not supported yet.", (index_dtype,))
-    data_shape = F.shape(data)
-    if index_shape != data_shape:
-        return mult_util.error_msg(
-            "The tensor(shape={}) and tensor index(shape={}) should be the same shape.", (data_shape, index_shape))
-    size = F.size(value_tensor)
-    if size != 1:
-        return mult_util.error_msg(
-            "When assign value is a tensor, its size should be 1, but current size is {}.", (size,))
-    dtype = F.dtype(data)
-    u_cast = F.cast(value_tensor, dtype)
-    one_data = F.ones_like(data)
-    u = F.tensor_mul(one_data, u_cast)
-    return F.select(index, u, data)
+    check_result = mult_util.check_tensor_setitem_index(mstype.tensor, index_dtype)
+    if check_result:
+        data_shape = F.shape(data)
+        data_shape = mult_util.check_equal(data_shape, index_shape,
+                                           "The tensor(shape={}) and tensor index(shape={}) should be the same shape.")
+        size = F.size(value_tensor)
+        size = mult_util.check_equal(1, size,
+                                     "When assign value is a tensor, its size should be {}, but current size is {}.")
+        dtype = F.dtype(data)
+        u_cast = F.cast(value_tensor, dtype)
+        one_data = F.ones_like(data)
+        u = F.tensor_mul(one_data, u_cast)
+        result = F.select(index, u, data)
+    return result
 
 
 @setitem.register("Tensor", "Tensor", "Number")
@@ -179,16 +177,162 @@ def _tensor_setitem_by_tensor_v2(data, index, value):
     Outputs:
         Tensor, element type and shape is same as data.
     """
+    result = None
     index_dtype = F.dtype(index)
     index_shape = F.shape(index)
-    is_bool = mult_util.is_same_type(index_dtype, mstype.bool_)
-    if not is_bool:
-        return mult_util.error_msg(
-            "The tensor index should be a bool type tensor. {} type tensor is not supported yet.", (index_dtype,))
-    shape = F.shape(data)
-    if index_shape != shape:
-        return mult_util.error_msg(
-            "The tensor(shape={}) and tensor index(shape={}) should be the same shape.", (shape, index_shape))
-    dtype = F.dtype(data)
-    u = F.fill(dtype, shape, value)
-    return F.select(index, u, data)
+    check_result = mult_util.check_tensor_setitem_index(mstype.tensor, index_dtype)
+    if check_result:
+        shape = F.shape(data)
+        shape = mult_util.check_equal(
+            shape, index_shape, "The tensor(shape={}) and tensor index(shape={}) should be the same shape.")
+        dtype = F.dtype(data)
+        u = F.fill(dtype, shape, value)
+        result = F.select(index, u, data)
+    return result
+
+
+@setitem.register("Tensor", "Slice", "Tensor")
+def _tensor_setitem_with_slice_v3(data, input_slice, value):
+    """
+    Tensor assignment.
+
+    Note:
+        Syntax support: A[Slice] = U
+        Restraint condition: A is a Tensor
+                             Slice like "1:3"
+                             U is a Tensor(size=1) or Tensor(size>1)
+
+    Inputs:
+        data (Tensor): Assigned tensor.
+        input_slice (Slice): Slice expression.
+        value (Number): Assignment value.
+
+    Outputs:
+        Tensor, element type and shape is same as data.
+    """
+    return _tensor_assgin_tensor(data, input_slice, value)
+
+
+@setitem.register("Tensor", "Tuple", "Tensor")
+def _tensor_setitem_with_slice_v4(data, input_slice, value):
+    """
+    Tensor assignment.
+
+    Note:
+        Syntax support: A[Slice] = U
+        Restraint condition: A is a Tensor
+                             Slice like "1:3, ::, :4:-1"
+                             U is a Tensor(size=1) or Tensor(size>1)
+
+    Inputs:
+        data (Tensor): Assigned tensor.
+        input_slice (Tuple(Slice)): Slice expression.
+        value (Number): Assignment value.
+
+    Outputs:
+        Tensor, element type and shape is same as data.
+    """
+    return _tensor_assgin_tensor(data, input_slice, value)
+
+
+def _tensor_assgin_tensor(data, input_slice, value):
+    """Given a tensor value assign to tensor by slice"""
+    # 1. condition
+    result = None
+    check_result = mult_util.check_tensor_setitem_index(input_slice)
+    if check_result:
+        data_shape = F.shape(data)
+        data_size = F.size(data)
+        data_dtype = F.dtype(data)
+        indices = mult_util.slice2indices(input_slice, data_shape)
+        indices_size = F.size(indices)
+        indices_size = mult_util.check_indices(indices_size, input_slice)
+        update = F.fill(data_dtype, (indices_size,), 1)
+        condition_1d = F.scatter_nd(indices, update, (data_size,))
+        condition_1d = F.cast(condition_1d, mstype.bool_)
+        condition = F.reshape(condition_1d, data_shape)
+        # 2. u
+        value_fill = None
+        value_size = F.size(value)
+
+        value_size = mult_util.check_indices_value_size(indices_size, value_size)
+        if value_size == 1:
+            value_fill = F.fill(data_dtype, (indices_size,), 1)
+            value = F.cast(value, data_dtype)
+            value_fill = F.tensor_mul(value_fill, value)
+        elif value_size > 1:
+            value_fill = F.reshape(value, (indices_size,))
+        value_1d = F.scatter_nd(indices, value_fill, (data_size,))
+        u = F.reshape(value_1d, data_shape)
+        # A[slice]= u -> A[B]=U -> select(B, U, A)
+        result = F.select(condition, u, data)
+    return result
+
+
+@setitem.register("Tensor", "Slice", "Number")
+def _tensor_setitem_with_slice_v1(data, input_slice, value):
+    """
+    Tensor assignment.
+
+    Note:
+        Syntax support: A[Slice] = u
+        Restraint condition: A is a Tensor.
+                             Slice like "1:3"
+                             u is a scalar
+
+    Inputs:
+        data (Tensor): Assigned tensor.
+        input_slice (Slice): slice expression.
+        value (Number): Assignment value.
+
+    Outputs:
+        Tensor, element type and shape is same as data.
+    """
+    return _tensor_assgin_number(data, input_slice, value)
+
+
+@setitem.register("Tensor", "Tuple", "Number")
+def _tensor_setitem_with_slice_v2(data, input_slice, value):
+    """
+    Tensor assignment.
+
+    Note:
+        Syntax support: A[Slice] = u
+        Restraint condition: A is a Tensor.
+                             Slice like "1:3, ::, :4:-1"
+                             u is a scalar
+
+    Inputs:
+        data (Tensor): Assigned tensor.
+        input_slice (Tuple(Slice)): slice expression.
+        value (Number): Assignment value.
+
+    Outputs:
+        Tensor, element type and shape is same as data.
+    """
+    return _tensor_assgin_number(data, input_slice, value)
+
+
+def _tensor_assgin_number(data, input_slice, value):
+    """Given a scalar assign to tensor by slice"""
+    # 1. condition
+    check_result = mult_util.check_tensor_setitem_index(input_slice)
+    result = None
+    if check_result:
+        data_shape = F.shape(data)
+        data_size = F.size(data)
+        data_dtype = F.dtype(data)
+        indices = mult_util.slice2indices(input_slice, data_shape)
+        indices_size = F.size(indices)
+        indices_size = mult_util.check_indices(indices_size, input_slice)
+        update = F.fill(data_dtype, (indices_size,), 1)
+        condition_1d = F.scatter_nd(indices, update, (data_size,))
+        condition_1d = F.cast(condition_1d, mstype.bool_)
+        condition = F.reshape(condition_1d, data_shape)
+        # 2. u
+        value_fill = F.fill(data_dtype, (indices_size,), value)
+        value_1d = F.scatter_nd(indices, value_fill, (data_size,))
+        u = F.reshape(value_1d, data_shape)
+        # A[slice]= u -> A[B]=U -> select(B, U, A)
+        result = F.select(condition, u, data)
+    return result
diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py
index c5b8752ae2..4135133e85 100644
--- a/mindspore/ops/functional.py
+++ b/mindspore/ops/functional.py
@@ -68,6 +68,7 @@ tuple_to_array = P.TupleToArray()
 scalar_cast = P.ScalarCast()
 print_ = P.Print()
 expand_dims = P.ExpandDims()
+scatter_nd = P.ScatterNd()
 
 tuple_setitem = Primitive('tuple_setitem')
 tuple_getitem = Primitive('tuple_getitem')
diff --git a/tests/ut/python/ops/test_tensor_slice.py b/tests/ut/python/ops/test_tensor_slice.py
index ddd1fb46a1..08ba143de8 100644
--- a/tests/ut/python/ops/test_tensor_slice.py
+++ b/tests/ut/python/ops/test_tensor_slice.py
@@ -94,10 +94,101 @@ class NetWorkReduceToScalar(Cell):
         return ret
 
 
+class TensorAssignWithSliceError1(Cell):
+    def __init__(self):
+        super(TensorAssignWithSliceError1, self).__init__()
+
+    def construct(self, a, b):
+        a[1:3:-1,::] = b
+        return a
+
+class TensorAssignWithSliceError2(Cell):
+    def __init__(self):
+        super(TensorAssignWithSliceError2, self).__init__()
+
+    def construct(self, a, b):
+        a[1:3:-1] = b
+        return a
+class TensorAssignWithSlice2(Cell):
+    def __init__(self):
+        super(TensorAssignWithSlice2, self).__init__()
+
+    def construct(self, a, b):
+        a[1:5] = b
+        a[3:4] = 5
+        a[-1:1:-1] = b
+        a[-1:3:-1] = 5
+        a[::] = b
+        a[::] = 9
+        return a
+class TensorAssignWithSlice(Cell):
+    def __init__(self):
+        super(TensorAssignWithSlice, self).__init__()
+        self.c = 2
+
+    def construct(self, a, b):
+        a[1:3,::] = b
+        a[2:3:,3:] = b
+        a[::] = b
+        a[::] = self.c
+        a[::,::] = b
+        a[::,::] = self.c
+        a[2:3:,0:, 4:1:-1] = b
+        a[2:3:,0:, 4:1:-1] = self.c
+        z = a
+        return z
+
+def test_tensor_assign_with_slice():
+    context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
+    net = TensorAssignWithSlice()
+    net2= TensorAssignWithSlice2()
+    net_e1 = TensorAssignWithSliceError1()
+    net_e2 = TensorAssignWithSliceError2()
+    a = np.arange(60).reshape(3,4,5)
+    b = Tensor([1])
+    Ta = Tensor(a)
+    Tb= Tensor([1,3])
+    Tc= Tensor([])
+    t = Tensor([1, 2, 3, 4, 5, 6, 7, 8])
+    net(Ta, b)
+    net2(t, b)
+    # Error for A[Slice] = Number
+    # 1. A[Slice] = Number,  Slice error
+    with pytest.raises(ValueError):
+        net_e2(t, 2)
+
+    # Error for A[Slice] = U, U is a Tensor
+    # 1. A[Slice] = U,  u.size is error
+    with pytest.raises(ValueError):
+        net2(t, Tb)
+    # 2. A[Slice] = U, U is empty
+    with pytest.raises(ValueError):
+        net2(t, Tc)
+    # 3. A[Slice] = U, U.size error
+    with pytest.raises(ValueError):
+        net2(t, Tb)
+
+    # Error for A[Tuple(Slice...)] = Tensor
+    # 1. A[Tuple(Slice...)] = U, U is empty
+    with pytest.raises(ValueError):
+        net(Ta, Tc)
+    # 2. A[Tuple(Slice...)] = U, U.size error
+    with pytest.raises(ValueError):
+        net(Ta, Tb)
+    # 3. A[Tuple(Slice...)] = U,  Slice error
+    with pytest.raises(ValueError):
+        net_e1(Ta, b)
+
+    # Error for A[Tuple(Slice...)] = Number
+    # 1. A[Tuple(Slice...)] = Number,  Slice error
+    with pytest.raises(ValueError):
+        net_e1(Ta, 2)
+
+
 class TensorAssignWithBoolTensorIndex(Cell):
     def __init__(self):
         super(TensorAssignWithBoolTensorIndex, self).__init__()
-        self.t = Tensor(np.arange(6).reshape([2, 3]), dtype=mstype.float64)
+        self.t = Tensor(np.arange(60).reshape([3,4,5]), dtype = mstype.float64)
 
     def construct(self, a, b, c, u_tensor, _scalar):
         a[c] = u_scalar
@@ -119,6 +210,7 @@ class TensorAssignWithBoolTensorIndex2(Cell):
     def __init__(self):
         super(TensorAssignWithBoolTensorIndex2, self).__init__()
         self.t = Tensor(np.arange(6).reshape([2, 3]), dtype=mstype.float64)
+        self.t = Tensor(np.arange(60).reshape([3,4,5]), dtype = mstype.float64)
 
     def construct(self, a, u_tensor, _scalar):
         a[a > 8] = u_tensor
@@ -139,7 +231,7 @@ class TensorAssignWithBoolTensorIndex2Error(Cell):
         return a
 
 
-a = np.random.uniform(1, 10, [2, 3])
+a = np.random.uniform(1,10,[3,4,5])
 b = a > 5
 c = a < 3
 Ta = Tensor(a)
@@ -148,13 +240,13 @@ Tc = Tensor(c)
 Td = Tensor([True, True])
 u_tensor = Tensor([1])
 u_tensor_error = Tensor([1, 2])
+t_1d = Tensor([1, 2, 3, 4, 5, 6, 7, 8])
 u_scalar = 5
 
-
 def test_tensor_assign_bool_index():
     net1 = TensorAssignWithBoolTensorIndex()
     net2 = TensorAssignWithBoolTensorIndex2()
-
+    net1(Ta, Tb, Tc, u_tensor, u_scalar)
     net1(Ta, Tb, Tc, u_tensor, u_scalar)
     with pytest.raises(ValueError):
         net1(Ta, Td, Tc, u_tensor, u_scalar)
@@ -180,8 +272,15 @@ def test_tensor_assign_bool_index():
     with pytest.raises(AttributeError):
         net4(Ta, u_scalar)
 
-
 test_cases = [
+    ('TensorAssignWithSlice', {
+        'block': TensorAssignWithSlice(),
+        'desc_inputs': [Ta,  u_tensor],
+    }),
+    ('TensorAssignWithSlice2', {
+        'block': TensorAssignWithSlice2(),
+        'desc_inputs': [t_1d,  u_tensor],
+    }),
     ('TensorAssignWithBoolTensorIndex', {
         'block': TensorAssignWithBoolTensorIndex(),
         'desc_inputs': [Ta, Tb, Tc, u_tensor, u_scalar],

From 6a6a3cf077e890aa88f94cea190c37d82686b3d0 Mon Sep 17 00:00:00 2001
From: yanghaitao <yanghaitao1@huawei.com>
Date: Thu, 23 Apr 2020 14:40:16 +0800
Subject: [PATCH 011/242] GeneratorDataset column_names support string type

---
 mindspore/dataset/engine/datasets.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 1648734704..dff443dc1e 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -2569,6 +2569,8 @@ class GeneratorDataset(SourceDataset):
                 # Random accessible input is also iterable
                 self.source = (lambda: _iter_fn(source, num_samples))
 
+        if column_names is not None and not isinstance(column_names, list):
+            column_names = [column_names]
         self.column_names = column_names
 
         if column_types is not None:

From adeed4fe67bf3f518d52b7c3cdd1cf023a0dcfcd Mon Sep 17 00:00:00 2001
From: Yanjun Peng <pengyanjun1@huawei.com>
Date: Thu, 23 Apr 2020 15:15:12 +0800
Subject: [PATCH 012/242] fix dataset api doc

---
 mindspore/dataset/engine/datasets.py                 | 3 +--
 mindspore/dataset/transforms/vision/c_transforms.py  | 5 +++--
 mindspore/dataset/transforms/vision/py_transforms.py | 5 +++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 1648734704..10728d78bc 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -2494,8 +2494,7 @@ class GeneratorDataset(SourceDataset):
         shuffle (bool, optional): Whether or not to perform shuffle on the dataset. Random accessible input is required.
             (default=None, expected order behavior shown in the table).
         sampler (Sampler/Iterable, optional): Object used to choose samples from the dataset. Random accessible input is
-        required.
-            (default=None, expected order behavior shown in the table).
+            required (default=None, expected order behavior shown in the table).
         num_shards (int, optional): Number of shards that the dataset should be divided into (default=None).
             This argument should be specified only when 'num_samples' is "None". Random accessible input is required.
         shard_id (int, optional): The shard ID within num_shards (default=None). This argument should be specified only
diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py
index 07011b1d53..1b495ffe92 100644
--- a/mindspore/dataset/transforms/vision/c_transforms.py
+++ b/mindspore/dataset/transforms/vision/c_transforms.py
@@ -89,8 +89,8 @@ class Normalize(cde.NormalizeOp):
     Normalize the input image with respect to mean and standard deviation.
 
     Args:
-        mean (list): List of mean values for each channel, w.r.t channel order.
-        std (list): List of standard deviations for each channel, w.r.t. channel order.
+        mean (sequence): List or tuple of mean values for each channel, w.r.t channel order.
+        std (sequence): List or tuple of standard deviations for each channel, w.r.t. channel order.
     """
 
     @check_normalize_c
@@ -109,6 +109,7 @@ class RandomCrop(cde.RandomCropOp):
             If size is an int, a square crop of size (size, size) is returned.
             If size is a sequence of length 2, it should be (height, width).
         padding (int or sequence, optional): The number of pixels to pad the image (default=None).
+            If padding is not None, pad image firstly with padding values.
             If a single number is provided, it pads all borders with this value.
             If a tuple or list of 2 values are provided, it pads the (left and top)
             with the first value and (right and bottom) with the second value.
diff --git a/mindspore/dataset/transforms/vision/py_transforms.py b/mindspore/dataset/transforms/vision/py_transforms.py
index 51bea80b21..de9ce3f450 100644
--- a/mindspore/dataset/transforms/vision/py_transforms.py
+++ b/mindspore/dataset/transforms/vision/py_transforms.py
@@ -225,8 +225,8 @@ class Normalize:
     The values of the array need to be in range [0.0, 1.0].
 
     Args:
-        mean (list): List of mean values for each channel, w.r.t channel order.
-        std (list): List of standard deviations for each channel, w.r.t. channel order.
+        mean (sequence): List or tuple of mean values for each channel, w.r.t channel order.
+        std (sequence): List or tuple of standard deviations for each channel, w.r.t. channel order.
 
     Examples:
         >>> py_transforms.ComposeOp([py_transforms.Decode(),
@@ -262,6 +262,7 @@ class RandomCrop:
             If size is an int, a square crop of size (size, size) is returned.
             If size is a sequence of length 2, it should be (height, width).
         padding (int or sequence, optional): The number of pixels to pad the image (default=None).
+            If padding is not None, pad image firstly with padding values.
             If a single number is provided, it pads all borders with this value.
             If a tuple or list of 2 values are provided, it pads the (left and top)
             with the first value and (right and bottom) with the second value.

From 3b8cb07c69ba886e2c7e329fe28f892d364df145 Mon Sep 17 00:00:00 2001
From: liubuyu <liubuyu1@huawei.com>
Date: Thu, 23 Apr 2020 14:59:42 +0800
Subject: [PATCH 013/242] fix default parameter set

---
 mindspore/train/model.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/mindspore/train/model.py b/mindspore/train/model.py
index 698105889a..36e9417095 100755
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@@ -99,12 +99,8 @@ class Model:
         self._loss_scale_manager_set = False
         self._keep_bn_fp32 = True
         self._check_kwargs(kwargs)
-        if 'keep_batchnorm_fp32' in kwargs:
-            self._keep_bn_fp32 = kwargs['keep_batchnorm_fp32']
-        if 'loss_scale_manager' in kwargs:
-            self._loss_scale_manager = kwargs['loss_scale_manager']
-            self._loss_scale_manager_set = True
         self._amp_level = amp_level
+        self._process_amp_args(kwargs)
         self._parallel_mode = _get_parallel_mode()
         self._device_number = _get_device_num()
         self._global_rank = _get_global_rank()
@@ -114,6 +110,15 @@ class Model:
         self._build_eval_network(metrics, eval_network, eval_indexes)
         self._build_predict_network()
 
+    def _process_amp_args(self, kwargs):
+        if self._amp_level == "O0":
+            self._keep_bn_fp32 = False
+        if 'keep_batchnorm_fp32' in kwargs:
+            self._keep_bn_fp32 = kwargs['keep_batchnorm_fp32']
+        if 'loss_scale_manager' in kwargs:
+            self._loss_scale_manager = kwargs['loss_scale_manager']
+            self._loss_scale_manager_set = True
+
     def _check_kwargs(self, kwargs):
         for arg in kwargs:
             if arg not in ['loss_scale_manager', 'keep_batchnorm_fp32']:

From 181fd93e45d1c728206ac54ab8a856b14c8cb71b Mon Sep 17 00:00:00 2001
From: zjun <zhangjun0@huawei.com>
Date: Thu, 23 Apr 2020 15:26:30 +0800
Subject: [PATCH 014/242] fix lstm out size

---
 mindspore/nn/layer/lstm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/nn/layer/lstm.py b/mindspore/nn/layer/lstm.py
index 84c156a1c2..bdc49739ac 100755
--- a/mindspore/nn/layer/lstm.py
+++ b/mindspore/nn/layer/lstm.py
@@ -149,7 +149,7 @@ class LSTM(Cell):
         if self.batch_first:
             x = self.transpose1(x, (1, 0, 2))
         h0, c0 = hx
-        output, hn, cn, _ = self.lstm(x, h0, c0, self.weight)
+        output, hn, cn, _, _ = self.lstm(x, h0, c0, self.weight)
         if self.batch_first:
             output = self.transpose2(output, (1, 0, 2))
         return (output, (hn, cn))

From 75381bc30c7866c80043197e44b0422f320df5ff Mon Sep 17 00:00:00 2001
From: ZPaC <zhoupeichen@huawei.com>
Date: Wed, 22 Apr 2020 17:56:49 +0800
Subject: [PATCH 015/242] Add gpu test case for dynamic lr.

---
 tests/st/nccl/test_nccl_lenet.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tests/st/nccl/test_nccl_lenet.py b/tests/st/nccl/test_nccl_lenet.py
index 3880f1d473..4ed424b6ee 100644
--- a/tests/st/nccl/test_nccl_lenet.py
+++ b/tests/st/nccl/test_nccl_lenet.py
@@ -21,11 +21,12 @@ from mindspore.nn.optim import Momentum
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.ops import operations as P
 from mindspore.communication.management import init, get_rank, get_group_size
+from mindspore.common import dtype as mstype
 
 context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
 init('nccl')
 
-epoch = 2
+epoch = 5
 total = 5000
 batch_size = 32
 mini_batch = total // batch_size
@@ -67,12 +68,20 @@ class LeNet(nn.Cell):
         return output
 
 
+def multisteplr(total_steps, gap, base_lr=0.9, gamma=0.1, dtype=mstype.float32):
+    lr = []
+    for step in range(total_steps):
+        lr_ = base_lr * gamma ** (step//gap)
+        lr.append(lr_)
+    return Tensor(np.array(lr), dtype)
+
+
 def test_lenet_nccl():
     net = LeNet()
     net.set_train()
 
-    learning_rate = 0.01
-    momentum = 0.9
+    learning_rate = multisteplr(epoch, 2)
+    momentum = Tensor(np.array([0.9]).astype(np.float32))
     mom_optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
     criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
     net_with_criterion = WithLossCell(net, criterion)

From 7321bdb51431de5785a39020d8ad44107c63efca Mon Sep 17 00:00:00 2001
From: gengdongjie <gengdongjie@huawei.com>
Date: Wed, 22 Apr 2020 13:02:57 +0800
Subject: [PATCH 016/242] add readme in resnet50_cifar10 example

---
 example/resnet50_cifar10/README.md | 125 +++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 example/resnet50_cifar10/README.md

diff --git a/example/resnet50_cifar10/README.md b/example/resnet50_cifar10/README.md
new file mode 100644
index 0000000000..afa8519bee
--- /dev/null
+++ b/example/resnet50_cifar10/README.md
@@ -0,0 +1,125 @@
+# ResNet-50 Example
+
+## Description
+
+This is an example of training ResNet-50 with CIFAR-10 dataset in MindSpore.
+
+## Requirements
+
+- Install [MindSpore](https://www.mindspore.cn/install/en).
+
+- Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz).
+
+> Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows:
+> ```
+> .  
+> ├── cifar-10-batches-bin  # train dataset
+> └── cifar-10-verify-bin   # infer dataset
+> ```
+
+
+## Example structure
+
+```shell
+.
+├── config.py                       # parameter configuration
+├── dataset.py                      # data preprocessing
+├── eval.py                         # infer script
+├── lr_generator.py                 # generate learning rate for each step
+├── run_distribute_train.sh         # launch distributed training
+├── run_infer.sh                    # launch infering
+├── run_standalone_train.sh         # launch standalone training 
+└── train.py                        # train script
+```
+
+
+## Parameter configuration
+
+Parameters for both training and inference can be set in config.py.
+
+```
+"class_num": 10,                  # dataset class num
+"batch_size": 32,                 # batch size of input tensor
+"loss_scale": 1024,               # loss scale
+"momentum": 0.9,                  # momentum
+"weight_decay": 1e-4,             # weight decay 
+"epoch_size": 90,                 # only valid for taining, which is always 1 for inference 
+"buffer_size": 100,               # number of queue size in data preprocessing
+"image_height": 224,              # image height
+"image_width": 224,               # image width
+"save_checkpoint": True,          # whether save checkpoint or not
+"save_checkpoint_steps": 195,     # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step
+"keep_checkpoint_max": 10,        # only keep the last keep_checkpoint_max checkpoint
+"save_checkpoint_path": "./",     # path to save checkpoint
+"lr_init": 0.01,                  # initial learning rate
+"lr_end": 0.00001,                # final learning rate
+"lr_max": 0.1,                    # maximum learning rate
+"warmup_epochs": 5,               # number of warmup epoch
+"lr_decay_mode": "poly"           # decay mode can be selected in steps, ploy and default
+```
+
+## Running the example
+
+### Train
+
+#### Usage
+
+```
+# distribute training
+Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]
+
+# standalone training
+Usage: sh run_standalone_train.sh [DATASET_PATH]
+```
+
+
+#### Launch
+
+```
+# distribute training example
+sh run_distribute_train.sh rank_table.json ~/cifar-10-batches-bin
+
+# standalone training example
+sh run_standalone_train.sh ~/cifar-10-batches-bin
+```
+
+> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
+
+#### Result
+
+Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the followings in log.
+
+```
+# distribute training result(8p)
+epoch: 1 step: 195, loss is 1.9601055
+epoch: 2 step: 195, loss is 1.8555021
+epoch: 3 step: 195, loss is 1.6707983
+epoch: 4 step: 195, loss is 1.8162166
+epoch: 5 step: 195, loss is 1.393667
+```
+
+### Infer
+
+#### Usage
+
+```
+# infer
+Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]
+```
+
+#### Launch
+
+```
+# infer example
+sh run_infer.sh ~/cifar10-10-verify-bin ~/resnet50_cifar10/train_parallel0/resnet-90_195.ckpt
+```
+
+> checkpoint can be produced in training process.
+
+#### Result
+
+Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.
+
+```
+result: {'acc': 0.91446314102564111} ckpt=~/resnet50_cifar10/train_parallel0/resnet-90_195.ckpt
+```

From 8bb93411f3845ceac91171a3c511f42bf88ca245 Mon Sep 17 00:00:00 2001
From: fary86 <fary.fanrui@huawei.com>
Date: Thu, 16 Apr 2020 11:41:51 +0800
Subject: [PATCH 017/242] Add prim name to error message for _grad_ops.py

---
 mindspore/_checkparam.py              |   2 +-
 mindspore/ops/_utils/utils.py         |  20 +-
 mindspore/ops/operations/_grad_ops.py | 271 ++++++++++++--------------
 mindspore/ops/operations/array_ops.py |   2 +-
 4 files changed, 132 insertions(+), 163 deletions(-)

diff --git a/mindspore/_checkparam.py b/mindspore/_checkparam.py
index 3543f58cf5..bf24b7e522 100644
--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@@ -206,8 +206,8 @@ class Validator:
         def _check_tensor_type(arg):
             arg_key, arg_val = arg
             elem_type = arg_val
-            type_names = []
             if not elem_type in valid_values:
+                type_names = []
                 for t in valid_values:
                     type_names.append(str(t))
                 types_info = '[' + ", ".join(type_names) + ']'
diff --git a/mindspore/ops/_utils/utils.py b/mindspore/ops/_utils/utils.py
index fbd81c4f0d..90496afc9b 100644
--- a/mindspore/ops/_utils/utils.py
+++ b/mindspore/ops/_utils/utils.py
@@ -15,7 +15,7 @@
 
 """utils for operator"""
 
-from ..._checkparam import ParamValidator as validator
+from ..._checkparam import Validator as validator
 from ..._checkparam import Rel
 from ...common import dtype as mstype
 
@@ -62,25 +62,25 @@ def _get_broadcast_shape(x_shape, y_shape, prim_name):
     return broadcast_shape
 
 
-def _get_concat_offset(x_shp, x_type, axis):
+def _get_concat_offset(x_shp, x_type, axis, prim_name):
     """for concat and concatoffset check args and compute offset"""
-    validator.check_type("shape", x_shp, [tuple])
-    validator.check_integer("len of input_x shape", len(x_shp), 0, Rel.GT)
-    validator.check_subclass("shape0", x_type[0], mstype.tensor)
-    validator.check_integer("len of input_x0 shape", len(x_shp[0]), 0, Rel.GT)
+    validator.check_value_type("shape", x_shp, [tuple], prim_name)
+    validator.check_integer("input_x rank", len(x_shp), 0, Rel.GT, prim_name)
+    validator.check_subclass("shape0", x_type[0], mstype.tensor, prim_name)
+    validator.check_integer("len of x_shp[0]", len(x_shp[0]), 0, Rel.GT, prim_name)
     rank_base = len(x_shp[0])
-    validator.check_int_range('axis', axis, -rank_base - 1, rank_base, Rel.INC_BOTH)
+    validator.check_int_range('axis', axis, -rank_base - 1, rank_base, Rel.INC_BOTH, prim_name)
     if axis < 0:
         axis = axis + rank_base
     all_shp = x_shp[0][axis]
     offset = [0,]
     for i in range(1, len(x_shp)):
         v = x_shp[i]
-        validator.check('len of x_shp[%d]' % i, len(v), 'len of base', len(x_shp[0]))
-        validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0])
+        validator.check('len of x_shp[%d]' % i, len(v), 'len of x_shp[0]', len(x_shp[0]), Rel.EQ, prim_name)
+        validator.check('x_type[%d]' % i, x_type[i], 'x_type[0]', x_type[0], Rel.EQ, prim_name)
         for j in range(rank_base):
             if j != axis and v[j] != x_shp[0][j]:
-                raise ValueError("Concat evaluator element %d shape in input can not concat with first element" % i)
+                raise ValueError(f"For \'{prim_name}\' element {i} shape in input can not concat with first element")
         offset.append(all_shp)
         all_shp += v[axis]
     return offset, all_shp, axis
diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py
index c29832dcb7..e130dcc382 100644
--- a/mindspore/ops/operations/_grad_ops.py
+++ b/mindspore/ops/operations/_grad_ops.py
@@ -18,8 +18,7 @@
 from ..._c_expression import signature_rw as sig_rw
 from ..._c_expression import signature_kind as sig_kind
 from ..primitive import Primitive, PrimitiveWithInfer, prim_attr_register
-from ..._checkparam import ParamValidator as validator
-from ..._checkparam import Rel, check_int_positive, check_bool
+from ..._checkparam import Validator as validator, Rel
 from .._utils import _get_concat_offset
 from ...common import dtype as mstype
 
@@ -51,12 +50,12 @@ class ACosGrad(PrimitiveWithInfer):
         """init ACosGrad"""
 
     def infer_shape(self, x, dout):
-        validator.check_param_equal("x", x, "dout", dout)
+        validator.check("x shape", x, "dout shape", dout, Rel.EQ, self.name)
         return x
 
     def infer_dtype(self, x, dout):
         args = {"x": x, "dout": dout}
-        validator.check_type_same(args, mstype.number_type)
+        validator.check_tensor_type_same(args, mstype.number_type, self.name)
         return x
 
 
@@ -65,8 +64,8 @@ class BatchNormGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, is_training=False, epsilon=1e-5):
-        self.is_training = validator.check_type('is_training', is_training, (bool,))
-        self.epsilon = validator.check_number_range('epsilon', epsilon, 0, 1, Rel.INC_RIGHT)
+        self.is_training = validator.check_value_type('is_training', is_training, (bool,), self.name)
+        self.epsilon = validator.check_number_range('epsilon', epsilon, 0, 1, Rel.INC_RIGHT, self.name)
         self.add_prim_attr('data_format', "NCHW")
 
     def infer_shape(self, y_backprop_shape, x_shape, scale_shape, reserve_1_shape, reserve_2_shape, reserve_3_shape):
@@ -93,19 +92,19 @@ class BinaryCrossEntropyGrad(PrimitiveWithInfer):
     """Computes gradients for `BinaryCrossEntropy` operation."""
     @prim_attr_register
     def __init__(self, reduction='mean'):
-        self.reduction = validator.check_string('reduction', reduction, ['none', 'mean', 'sum'])
+        self.reduction = validator.check_string('reduction', reduction, ['none', 'mean', 'sum'], self.name)
 
     def infer_shape(self, x_shape, y_shape, doutput_shape, weight_shape):
-        validator.check_param_equal('x_shape', x_shape, 'y_shape', y_shape)
+        validator.check('x_shape', x_shape, 'y_shape', y_shape, Rel.EQ, self.name)
         if weight_shape:
-            validator.check_param_equal('y_shape', y_shape, 'weight_shape', weight_shape)
+            validator.check('y_shape', y_shape, 'weight_shape', weight_shape, Rel.EQ, self.name)
         return x_shape
 
     def infer_dtype(self, x_type, y_type, doutput_type, weight_type):
         args = {'x_type': x_type, 'y_type': y_type, 'doutput_type': doutput_type}
-        validator.check_type_same(args, (mstype.float16, mstype.float32))
+        validator.check_tensor_type_same(args, (mstype.float16, mstype.float32), self.name)
         if weight_type:
-            validator.check_two_types_same('x_type', x_type, 'weight_type', weight_type)
+            validator.check('x_type', x_type, 'weight_type', weight_type, Rel.EQ, TypeError)
         return x_type
 
 
@@ -120,7 +119,7 @@ class ConcatOffset(PrimitiveWithInfer):
         axis = self.axis
         x_shp = input_x['shape']
         x_type = input_x['dtype']
-        offset, _, axis = _get_concat_offset(x_shp, x_type, axis)
+        offset, _, axis = _get_concat_offset(x_shp, x_type, axis, self.name)
         self.add_prim_attr('T', x_type[0].element_type())
         offset_values = []
         for i in range(len(x_shp)):
@@ -184,11 +183,11 @@ class Conv2DBackpropFilter(PrimitiveWithInfer):
 
     def __infer__(self, doutput, x, w_size):
         w_size_v = w_size['value']
-        validator.check_type('w_size', w_size_v, [tuple])
+        validator.check_value_type('w_size', w_size_v, [tuple], self.name)
         for i, dim_len in enumerate(w_size_v):
-            validator.check_type("w_size[%d]" % i, dim_len, [int])
-        validator.check_typename('x_dtype', x['dtype'], [mstype.int8, mstype.int32, mstype.float16, mstype.float32])
-        validator.check_two_types_same('doutput_dtype', doutput['dtype'], 'x_dtype', x['dtype'])
+            validator.check_value_type("w_size[%d]" % i, dim_len, [int], self.name)
+        args = {"x": x['dtype'], "doutput": doutput['dtype']}
+        validator.check_tensor_type_same(args, [mstype.int8, mstype.int32, mstype.float16, mstype.float32], self.name)
         out = {
             'value': None,
             'shape': w_size_v,
@@ -250,8 +249,8 @@ class DepthwiseConv2dNativeBackpropFilter(PrimitiveWithInfer):
 
     def __infer__(self, x, w_size, dout):
         w_size_v = w_size['value']
-        args = {'x_dtype': x['dtype'], 'dout_type': dout['dtype']}
-        validator.check_type_same(args, mstype.number_type)
+        args = {'x': x['dtype'], 'dout': dout['dtype']}
+        validator.check_tensor_type_same(args, mstype.number_type, self.name)
         out = {
             'value': None,
             'shape': w_size_v,
@@ -310,8 +309,8 @@ class DepthwiseConv2dNativeBackpropInput(PrimitiveWithInfer):
         raise NotImplementedError
 
     def __infer__(self, x_size, w, dout):
-        args = {'w_dtype': w['dtype'], 'dout_type': dout['dtype']}
-        validator.check_type_same(args, mstype.number_type)
+        args = {'w': w['dtype'], 'dout': dout['dtype']}
+        validator.check_tensor_type_same(args, mstype.number_type, self.name)
         x_size_v = x_size['value']
         out = {
             'value': None,
@@ -360,9 +359,9 @@ class GeluGrad(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, y_backprop_dtype, x_dtype, y_dtype):
-        validator.check_typename("y_backprop_dtype", y_backprop_dtype, (mstype.float16, mstype.float32))
-        validator.check_typename("x_dtype", x_dtype, (mstype.float16, mstype.float32))
-        validator.check_typename("y_dtype", y_dtype, (mstype.float16, mstype.float32))
+        validator.check_tensor_type_same({"y_backprop": y_backprop_dtype}, (mstype.float16, mstype.float32), self.name)
+        validator.check_tensor_type_same({"x": x_dtype}, (mstype.float16, mstype.float32), self.name)
+        validator.check_tensor_type_same({"y": y_dtype}, (mstype.float16, mstype.float32), self.name)
         return x_dtype
 
 
@@ -373,56 +372,36 @@ class _PoolGrad(PrimitiveWithInfer):
     def __init__(self, ksize, strides, padding="VALID"):
         self.init_prim_io_names(inputs=['x_origin', 'out_origin', 'grad'], outputs=['output'])
 
-        validator.check_type('ksize', ksize, [int, tuple])
-        validator.check_type('strides', strides, [int, tuple])
-        self.padding = validator.check_string('padding', padding.upper(), ['VALID', 'SAME'])
+        validator.check_value_type('ksize', ksize, [int, tuple], self.name)
+        validator.check_value_type('strides', strides, [int, tuple], self.name)
+        self.padding = validator.check_string('padding', padding.upper(), ['VALID', 'SAME'], self.name)
         self.add_prim_attr("padding", self.padding)
         self.is_maxpoolgradwithargmax = (self.name == "MaxPoolGradWithArgmax")
         if not self.is_maxpoolgradwithargmax:
             self.add_prim_attr('data_format', "NCHW")
 
-        if isinstance(ksize, int):
-            validator.check_integer("ksize", ksize, 1, Rel.GE)
-            if self.is_maxpoolgradwithargmax:
-                self.ksize = (1, ksize, ksize, 1)
+        def _grad_check_int_or_tuple(arg_name, arg_val, is_argmax):
+            validator.check_value_type(arg_name, arg_val, (int, tuple), self.name)
+            error_msg = ValueError(f"For '{self.name}' the '{arg_name}' should be an positive int number "
+                                   f"or a tuple of two or four positive int numbers, but got {arg_val}")
+            if isinstance(arg_val, int):
+                ret = (1, arg_val, arg_val, 1) if is_argmax else (1, 1, arg_val, arg_val)
+            elif len(arg_val) == 2:
+                ret = (1, arg_val[0], arg_val[1], 1) if is_argmax else (1, 1, arg_val[0], arg_val[1])
+            elif len(arg_val) == 4:
+                ret = arg_val
             else:
-                self.ksize = (1, 1, ksize, ksize)
-        else:
-            ksize_error = ValueError(f"The 'ksize' passed to operator {self.name} should be an positive int number"
-                                     f"or a tuple of two or four positive int numbers, but got {ksize}")
-            if len(ksize) != 2 and len(ksize) != 4:
-                raise ksize_error
-            for ksize_val in ksize:
-                if not isinstance(ksize_val, int) or (ksize_val <= 0):
-                    raise ksize_error
-            if len(ksize) == 2 and self.is_maxpoolgradwithargmax:
-                self.ksize = (1, ksize[0], ksize[1], 1)
-            elif len(ksize) == 2 and not self.is_maxpoolgradwithargmax:
-                self.ksize = (1, 1, ksize[0], ksize[1])
-            else:
-                self.ksize = ksize
+                raise error_msg
+            # whether all elements of tuple are positive integers
+            for item in ret:
+                if not isinstance(item, int) or item <= 0:
+                    raise error_msg
+            return ret
+
+        self.ksize = _grad_check_int_or_tuple("ksize", ksize, self.is_maxpoolgradwithargmax)
         self.add_prim_attr("ksize", self.ksize)
 
-        if isinstance(strides, int):
-            validator.check_integer("strides", strides, 1, Rel.GE)
-            if self.is_maxpoolgradwithargmax:
-                self.strides = (1, strides, strides, 1)
-            else:
-                self.strides = (1, 1, strides, strides)
-        else:
-            strides_error = ValueError(f"The 'strides' passed to operator {self.name} should be an positive int number"
-                                       f"or a tuple of two or four positive int numbers, but got {strides}")
-            if len(strides) != 2 and len(strides) != 4:
-                raise strides_error
-            for strides_val in strides:
-                if not isinstance(strides_val, int) or (strides_val <= 0):
-                    raise strides_error
-            if len(strides) == 2 and self.is_maxpoolgradwithargmax:
-                self.strides = (1, strides[0], strides[1], 1)
-            elif len(strides) == 2 and not self.is_maxpoolgradwithargmax:
-                self.strides = (1, 1, strides[0], strides[1])
-            else:
-                self.strides = strides
+        self.strides = _grad_check_int_or_tuple("strides", strides, self.is_maxpoolgradwithargmax)
         self.add_prim_attr("strides", self.strides)
 
 
@@ -529,17 +508,17 @@ class L2NormalizeGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=0, epsilon=1e-4):
-        validator.check_type('axis', axis, [int])
-        validator.check_type('epsilon', epsilon, [int, float])
+        validator.check_value_type('axis', axis, [int], self.name)
+        validator.check_value_type('epsilon', epsilon, [int, float], self.name)
 
     def infer_shape(self, input_x, out, dout):
-        validator.check_param_equal('input_x', input_x, 'out', out)
-        validator.check_param_equal('input_x', input_x, 'dout', dout)
+        validator.check('input_x shape', input_x, 'out shape', out, Rel.EQ, self.name)
+        validator.check('input_x shape', input_x, 'dout shape', dout, Rel.EQ, self.name)
         return input_x
 
     def infer_dtype(self, input_x, out, dout):
         args = {'input_x': input_x, 'out': out, 'dout': dout}
-        validator.check_type_same(args, mstype.number_type)
+        validator.check_tensor_type_same(args, mstype.number_type, self.name)
         return input_x
 
 
@@ -560,8 +539,8 @@ class LayerNormGrad(Primitive):
     @prim_attr_register
     def __init__(self, begin_norm_axis=1, begin_params_axis=1):
         """init"""
-        self.begin_norm_axis = validator.check_type('begin_norm_axis', begin_norm_axis, [int])
-        self.begin_params_axis = validator.check_type('begin_params_axis', begin_params_axis, [int])
+        self.begin_norm_axis = validator.check_value_type('begin_norm_axis', begin_norm_axis, [int], self.name)
+        self.begin_params_axis = validator.check_value_type('begin_params_axis', begin_params_axis, [int], self.name)
 
     def __call__(self, x, dy, variance, mean, gamma):
         raise NotImplementedError
@@ -573,15 +552,15 @@ class LogSoftmaxGrad(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, axis=-1):
         """init LogSoftmaxGrad"""
-        validator.check_type("axis", axis, [int])
+        validator.check_value_type("axis", axis, [int], self.name)
 
     def infer_shape(self, dout, logits):
         rank = len(logits)
-        validator.check_int_range('axis', self.axis, -rank - 1, rank, Rel.INC_BOTH)
+        validator.check_int_range('axis', self.axis, -rank - 1, rank, Rel.INC_BOTH, self.name)
         return logits
 
     def infer_dtype(self, dout, logits):
-        validator.check_subclass("logits", logits, mstype.tensor)
+        validator.check_subclass("logits", logits, mstype.tensor, self.name)
         return logits
 
 
@@ -590,13 +569,13 @@ class LSTMGradData(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
-        self.input_size = check_int_positive(input_size)
-        self.hidden_size = check_int_positive(hidden_size)
-        self.num_layers = check_int_positive(num_layers)
-        self.has_bias = check_bool(has_bias)
-        self.bidirectional = check_bool(bidirectional)
-        self.dropout = validator.check_type("dropout", dropout, [float])
-        self.dropout = validator.check_number_range('dropout', dropout, 0, 1, Rel.INC_BOTH)
+        self.input_size = validator.check_integer('input_size', input_size, 0, Rel.GT, self.name)
+        self.hidden_size = validator.check_integer('hidden_size', hidden_size, 0, Rel.GT, self.name)
+        self.num_layers = validator.check_integer('num_layers', num_layers, 0, Rel.GT, self.name)
+        self.has_bias = validator.check_value_type('has_bias', has_bias, (bool,), self.name)
+        self.bidirectional = validator.check_value_type('bidirectional', bidirectional, (bool,), self.name)
+        self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
+        self.dropout = validator.check_number_range('dropout', dropout, 0, 1, Rel.INC_BOTH, self.name)
 
         if bidirectional:
             self.num_directions = 2
@@ -606,19 +585,19 @@ class LSTMGradData(PrimitiveWithInfer):
     def infer_shape(self, y_shape, dy_shape, dhy_shape, dcy_shape, w_shape,
                     hx_shape, cx_shape, reserve_shape, state_shape):
         # dhy and dcy should be same shape
-        validator.check_integer("h_shape", len(dhy_shape), 3, Rel.EQ)
-        validator.check_integer("h_shape", len(dhy_shape), len(dcy_shape), Rel.EQ)
-        validator.check_integer("h_shape[0]", dhy_shape[0], dcy_shape[0], Rel.EQ)
-        validator.check_integer("h_shape[1]", dhy_shape[1], dcy_shape[1], Rel.EQ)
-        validator.check_integer("h_shape[2]", dhy_shape[2], dcy_shape[2], Rel.EQ)
+        validator.check_integer("h_shape", len(dhy_shape), 3, Rel.EQ, self.name)
+        validator.check_integer("h_shape", len(dhy_shape), len(dcy_shape), Rel.EQ, self.name)
+        validator.check_integer("h_shape[0]", dhy_shape[0], dcy_shape[0], Rel.EQ, self.name)
+        validator.check_integer("h_shape[1]", dhy_shape[1], dcy_shape[1], Rel.EQ, self.name)
+        validator.check_integer("h_shape[2]", dhy_shape[2], dcy_shape[2], Rel.EQ, self.name)
 
-        validator.check_integer("h_shape[0]", dhy_shape[0], self.num_layers * self.num_directions, Rel.EQ)
-        validator.check_integer("h_shape[2]", dhy_shape[2], self.hidden_size, Rel.EQ)
+        validator.check_integer("h_shape[0]", dhy_shape[0], self.num_layers * self.num_directions, Rel.EQ, self.name)
+        validator.check_integer("h_shape[2]", dhy_shape[2], self.hidden_size, Rel.EQ, self.name)
 
         # dy: (seq_len, batch_size, hidden_size * num_directions)
-        validator.check_integer("dy_shape", len(dy_shape), 3, Rel.EQ)
-        validator.check_integer("dy[1]", dy_shape[1], dhy_shape[1], Rel.EQ)
-        validator.check_integer("dy[2]", dy_shape[2], self.hidden_size * self.num_directions, Rel.EQ)
+        validator.check_integer("dy_shape", len(dy_shape), 3, Rel.EQ, self.name)
+        validator.check_integer("dy[1]", dy_shape[1], dhy_shape[1], Rel.EQ, self.name)
+        validator.check_integer("dy[2]", dy_shape[2], self.hidden_size * self.num_directions, Rel.EQ, self.name)
 
         # (seq_len, batch_size, input_size)
         dx_shape = (y_shape[0], y_shape[1], self.input_size)
@@ -629,11 +608,8 @@ class LSTMGradData(PrimitiveWithInfer):
 
     def infer_dtype(self, y_dtype, dy_dtype, dhy_dtype, dcy_dtype, w_dtype,
                     hx_dtype, cx_dtype, reserve_dtype, state_dtype):
-        validator.check_typename("dy_dtype", dy_dtype, (mstype.float32, mstype.float16))
-        validator.check_typename("dhy_dtype", dhy_dtype, (mstype.float32, mstype.float16))
-        validator.check_typename("dcy_dtype", dcy_dtype, (mstype.float32, mstype.float16))
-        validator.check_typename("datatype", dy_dtype, (dhy_dtype.element_type(),))
-        validator.check_typename("datatype", dy_dtype, (dcy_dtype.element_type(),))
+        args = {"dy": dy_dtype, "dhy": dhy_dtype, "dcy": dcy_dtype}
+        validator.check_tensor_type_same(args, (mstype.float32, mstype.float16), self.name)
         return (dy_dtype, dy_dtype, dy_dtype)
 
 
@@ -642,13 +618,13 @@ class LSTMGradWeight(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
-        self.input_size = check_int_positive(input_size)
-        self.hidden_size = check_int_positive(hidden_size)
-        self.num_layers = check_int_positive(num_layers)
-        self.has_bias = check_bool(has_bias)
-        self.bidirectional = check_bool(bidirectional)
-        self.dropout = validator.check_type("dropout", dropout, [float])
-        self.dropout = validator.check_number_range('dropout', dropout, 0, 1, Rel.INC_BOTH)
+        self.input_size = validator.check_integer('input_size', input_size, 0, Rel.GT, self.name)
+        self.hidden_size = validator.check_integer('hidden_size', hidden_size, 0, Rel.GT, self.name)
+        self.num_layers = validator.check_integer('num_layers', num_layers, 0, Rel.GT, self.name)
+        self.has_bias = validator.check_value_type('has_bias', has_bias, (bool,), self.name)
+        self.bidirectional = validator.check_value_type('bidirectional', bidirectional, (bool,), self.name)
+        self.dropout = validator.check_value_type("dropout", dropout, [float], self.name)
+        self.dropout = validator.check_number_range('dropout', dropout, 0, 1, Rel.INC_BOTH, self.name)
 
         if bidirectional:
             self.num_directions = 2
@@ -693,9 +669,10 @@ class PReLUGrad(PrimitiveWithInfer):
         return y_backprop_shape, w_shape
 
     def infer_dtype(self, y_backprop_dtype, A_dtype, w_dtype):
-        validator.check_typename("y_backprop_dtype", y_backprop_dtype, (mstype.float16, mstype.float32))
-        validator.check_typename("A_dtype", A_dtype, (mstype.float16, mstype.float32))
-        validator.check_typename("w_dtype", w_dtype, (mstype.float16, mstype.float32))
+        valid_types = (mstype.float16, mstype.float32)
+        validator.check_tensor_type_same({"y_backprop": y_backprop_dtype}, valid_types, self.name)
+        validator.check_tensor_type_same({"A_dtype": A_dtype}, valid_types, self.name)
+        validator.check_tensor_type_same({"w_dtype": w_dtype}, valid_types, self.name)
         return y_backprop_dtype, w_dtype
 
 
@@ -725,8 +702,8 @@ class ReLU6Grad(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, y_grad_dtype, x_dtype):
-        validator.check_typename("y_grad_dtype", y_grad_dtype, (mstype.float16, mstype.float32))
-        validator.check_typename("x_dtype", x_dtype, (mstype.float16, mstype.float32))
+        validator.check_tensor_type_same({"y_grad": y_grad_dtype}, (mstype.float16, mstype.float32), self.name)
+        validator.check_tensor_type_same({"x": x_dtype}, (mstype.float16, mstype.float32), self.name)
         return x_dtype
 
 
@@ -744,10 +721,8 @@ class ReluGradV2(PrimitiveWithInfer):
         return gradients_shape
 
     def infer_dtype(self, gradients_dtype, mask_dtype):
-        args_type = {'gradients': gradients_dtype, 'mask': mask_dtype}
-        validator.check_args_tensor(args_type)
-        validator.check_typename("gradients_dtype", gradients_dtype, mstype.number_type)
-        validator.check_typename("mask_dtype", mask_dtype, (mstype.uint8,))
+        validator.check_tensor_type_same({'gradients': gradients_dtype}, mstype.number_type, self.name)
+        validator.check_tensor_type_same({'mask': mask_dtype}, (mstype.uint8,), self.name)
         return gradients_dtype
 
 
@@ -762,10 +737,8 @@ class EluGrad(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, y_grad_dtype, x_dtype):
-        args_type = {'y_grad': y_grad_dtype, 'x': x_dtype}
-        validator.check_args_tensor(args_type)
-        args_dtype = {'y_grad_dtype': y_grad_dtype, 'x_dtype': x_dtype}
-        validator.check_type_same(args_dtype, mstype.float_type)
+        args = {'y_grad': y_grad_dtype, 'x': x_dtype}
+        validator.check_tensor_type_same(args, mstype.float_type, self.name)
         return x_dtype
 
 
@@ -821,11 +794,11 @@ class ROIAlignGrad(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, xdiff_shape, pooled_height, pooled_width, spatial_scale, sample_num=2):
         """init ROIAlignGrad"""
-        validator.check_type("pooled_height", pooled_height, [int])
-        validator.check_type("pooled_width", pooled_width, [int])
-        validator.check_type("spatial_scale", spatial_scale, [float])
-        validator.check_type("sample_num", sample_num, [int])
-        validator.check_type("xdiff_shape", xdiff_shape, [tuple])
+        validator.check_value_type("pooled_height", pooled_height, [int], self.name)
+        validator.check_value_type("pooled_width", pooled_width, [int], self.name)
+        validator.check_value_type("spatial_scale", spatial_scale, [float], self.name)
+        validator.check_value_type("sample_num", sample_num, [int], self.name)
+        validator.check_value_type("xdiff_shape", xdiff_shape, [tuple], self.name)
         self.xdiff_shape = xdiff_shape
         self.pooled_height = pooled_height
         self.pooled_width = pooled_width
@@ -850,10 +823,8 @@ class SigmoidGrad(PrimitiveWithInfer):
         return out
 
     def infer_dtype(self, out, dout):
-        validator.check_typename("dout dtype", dout, (mstype.float16, mstype.float32))
-        validator.check_typename("out dtype", out, (mstype.float16, mstype.float32))
-        args = {"out type": out, "dout type": dout}
-        validator.check_type_same(args, mstype.number_type)
+        args = {'out': out, 'dout': dout}
+        validator.check_tensor_type_same(args, mstype.number_type, self.name)
         return out
 
 
@@ -868,8 +839,8 @@ class HSigmoidGrad(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, y_grad_dtype, x_dtype):
-        validator.check_typename("y_grad dtype", y_grad_dtype, (mstype.float16, mstype.float32))
-        validator.check_typename("x dtype", x_dtype, (mstype.float16, mstype.float32))
+        validator.check_tensor_type_same({"y_grad": y_grad_dtype}, (mstype.float16, mstype.float32), self.name)
+        validator.check_tensor_type_same({"x": x_dtype}, (mstype.float16, mstype.float32), self.name)
         return x_dtype
 
 
@@ -884,8 +855,8 @@ class HSwishGrad(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, y_grad_dtype, x_dtype):
-        validator.check_typename("y_grad dtype", y_grad_dtype, (mstype.float16, mstype.float32))
-        validator.check_typename("x_ dtype", x_dtype, (mstype.float16, mstype.float32))
+        validator.check_tensor_type_same({"y_grad": y_grad_dtype}, (mstype.float16, mstype.float32), self.name)
+        validator.check_tensor_type_same({"x": x_dtype}, (mstype.float16, mstype.float32), self.name)
         return x_dtype
 
 
@@ -898,13 +869,13 @@ class SigmoidCrossEntropyWithLogitsGrad(PrimitiveWithInfer):
         self.init_prim_io_names(inputs=['x', 'y', 'dout'], outputs=['x_grad'])
 
     def infer_shape(self, x_shape, y_shape, dout_shape):
-        validator.check_param_equal("x_shape", x_shape, "y_shape", y_shape)
-        validator.check_param_equal("x_shape", x_shape, "dout_shape", dout_shape)
+        validator.check("x_shape", x_shape, "y_shape", y_shape, Rel.EQ, self.name)
+        validator.check("x_shape", x_shape, "dout_shape", dout_shape, Rel.EQ, self.name)
         return x_shape
 
     def infer_dtype(self, x_dtype, y_dtype, dout_dtype):
         args = {"x_dtype": x_dtype, "y_dtype": y_dtype, 'dout_dtype': dout_dtype}
-        validator.check_type_same(args, mstype.number_type)
+        validator.check_tensor_type_same(args, mstype.number_type, self.name)
         return dout_dtype
 
 
@@ -920,8 +891,8 @@ class SliceGrad(PrimitiveWithInfer):
         dy_shape, x_shape, size_value = dy['shape'], x['shape'], size['value']
         dy_shape_len = len(dy_shape)
         for i in range(dy_shape_len):
-            validator.check(f'dy_shape[{i}]', dy_shape[i], f'x_shape[{i}]', x_shape[i], Rel.LE)
-            validator.check(f'dy_shape[{i}]', dy_shape[i], f'size_shape[{i}]', size_value[i], Rel.EQ)
+            validator.check(f'dy_shape[{i}]', dy_shape[i], f'x_shape[{i}]', x_shape[i], Rel.LE, self.name)
+            validator.check(f'dy_shape[{i}]', dy_shape[i], f'size_shape[{i}]', size_value[i], Rel.EQ, self.name)
         return {'shape': x_shape,
                 'dtype': x['dtype'],
                 'value': None}
@@ -935,13 +906,13 @@ class SmoothL1LossGrad(PrimitiveWithInfer):
         pass
 
     def infer_shape(self, prediction, target, dloss):
-        validator.check_param_equal('prediction', prediction, 'target', target)
-        validator.check_param_equal('prediction', prediction, 'dloss', dloss)
+        validator.check('prediction shape', prediction, 'target shape', target, Rel.EQ, self.name)
+        validator.check('prediction shape', prediction, 'dloss shape', dloss, Rel.EQ, self.name)
         return prediction
 
     def infer_dtype(self, prediction, target, dloss):
         args = {"prediction": prediction, "target": target, 'dloss': dloss}
-        validator.check_type_same(args, mstype.number_type)
+        validator.check_tensor_type_same(args, mstype.number_type, self.name)
         return dloss
 
 
@@ -968,11 +939,11 @@ class StridedSliceGrad(PrimitiveWithInfer):
                  new_axis_mask=0,
                  shrink_axis_mask=0):
         """init StrideSliceGrad"""
-        validator.check_type('begin_mask', begin_mask, [int])
-        validator.check_type('end_mask', end_mask, [int])
-        validator.check_type('ellipsis_mask', ellipsis_mask, [int])
-        validator.check_type('new_axis_mask', new_axis_mask, [int])
-        validator.check_type('shrink_axis_mask', shrink_axis_mask, [int])
+        validator.check_value_type('begin_mask', begin_mask, [int], self.name)
+        validator.check_value_type('end_mask', end_mask, [int], self.name)
+        validator.check_value_type('ellipsis_mask', ellipsis_mask, [int], self.name)
+        validator.check_value_type('new_axis_mask', new_axis_mask, [int], self.name)
+        validator.check_value_type('shrink_axis_mask', shrink_axis_mask, [int], self.name)
         self.init_prim_io_names(inputs=['dy', 'shapex', 'begin', 'end', 'strides'], outputs=['output'])
 
     def __infer__(self, dy, shapex, begin, end, strides):
@@ -992,10 +963,8 @@ class TanhGrad(PrimitiveWithInfer):
         return out
 
     def infer_dtype(self, out, dout):
-        validator.check_subclass("out", out, mstype.tensor)
-        validator.check_subclass("dout", dout, mstype.tensor)
-        args = {"out type": out, "dout type": dout}
-        validator.check_type_same(args, mstype.number_type)
+        args = {"out": out, "dout": dout}
+        validator.check_tensor_type_same(args, mstype.number_type, self.name)
         return out
 
 
@@ -1005,13 +974,13 @@ class MirrorPadGrad(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, mode="REFLECT"):
         """init MirrorPad"""
-        validator.check_string('mode', mode, ['REFLECT', 'SYMMETRIC'])
+        validator.check_string('mode', mode, ['REFLECT', 'SYMMETRIC'], self.name)
         self.mode = mode
 
     def __infer__(self, dout, paddings, x):
-        validator.check_subclass("dout", dout['dtype'], mstype.tensor)
-        validator.check_subclass("paddings", paddings['dtype'], mstype.tensor)
-        validator.check_subclass("input_x", x['dtype'], mstype.tensor)
+        validator.check_subclass("dout", dout['dtype'], mstype.tensor, self.name)
+        validator.check_subclass("paddings", paddings['dtype'], mstype.tensor, self.name)
+        validator.check_subclass("input_x", x['dtype'], mstype.tensor, self.name)
         return {'shape': x['shape'],
                 'dtype': dout['dtype'],
                 'value': None}
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index bb6755bef3..43398a5f29 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -1316,7 +1316,7 @@ class Concat(PrimitiveWithInfer):
         axis = self.axis
         x_shp = input_x['shape']
         x_type = input_x['dtype']
-        _, all_shp, _ = _get_concat_offset(x_shp, x_type, axis)
+        _, all_shp, _ = _get_concat_offset(x_shp, x_type, axis, self.name)
         self.add_prim_attr('T', x_type[0].element_type())
         self.add_prim_attr('inputNums', len(x_shp))
         ret_shp = x_shp[0].copy()

From 15ccc5c56e80095d2b43441882c9e158ead867bc Mon Sep 17 00:00:00 2001
From: wukesong <wukesong1@huawei.com>
Date: Thu, 23 Apr 2020 16:10:32 +0800
Subject: [PATCH 018/242] modify lenet&alexnet

---
 example/alexnet_cifar10/README.md |  9 ++++-----
 example/lenet_mnist/README.md     | 13 ++++++-------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/example/alexnet_cifar10/README.md b/example/alexnet_cifar10/README.md
index 0efd3ca1bf..99245dfe1e 100644
--- a/example/alexnet_cifar10/README.md
+++ b/example/alexnet_cifar10/README.md
@@ -25,7 +25,7 @@ This is the simple tutorial for training AlexNet in MindSpore.
 python train.py --data_path cifar-10-batches-bin
 ```
 
-You can get loss with each step similar to this:
+You will get the loss value of each step as following:
 
 ```bash
 epoch: 1 step: 1, loss is 2.2791853
@@ -36,17 +36,16 @@ epoch: 1 step: 1538, loss is 1.0221305
 ...
 ```
 
-Then, test AlexNet according to network model
+Then, evaluate AlexNet according to network model
 ```python
-# test AlexNet, 1 epoch training accuracy is up to 51.1%; 10 epoch training accuracy is up to 81.2%
+# evaluate AlexNet, 1 epoch training accuracy is up to 51.1%; 10 epoch training accuracy is up to 81.2%
 python eval.py --data_path cifar-10-verify-bin --mode test --ckpt_path checkpoint_alexnet-1_1562.ckpt
 ```
 
 ## Note
-There are some optional arguments:
+Here are some optional parameters:
 
 ```bash
--h, --help           show this help message and exit
 --device_target {Ascend,GPU}
                      device where the code will be implemented (default: Ascend)
 --data_path DATA_PATH
diff --git a/example/lenet_mnist/README.md b/example/lenet_mnist/README.md
index fea92883c6..72f3681e30 100644
--- a/example/lenet_mnist/README.md
+++ b/example/lenet_mnist/README.md
@@ -19,8 +19,8 @@ This is the simple and basic tutorial for constructing a network in MindSpore.
     │      t10k-labels.idx1-ubyte
     │
     └─train
-            train-images.idx3-ubyte
-            train-labels.idx1-ubyte
+           train-images.idx3-ubyte
+           train-labels.idx1-ubyte
 ```
 
 ## Running the example
@@ -30,7 +30,7 @@ This is the simple and basic tutorial for constructing a network in MindSpore.
 python train.py --data_path MNIST_Data
 ```
 
-You can get loss with each step similar to this:
+You will get the loss value of each step as following:
 
 ```bash
 epoch: 1 step: 1, loss is 2.3040335
@@ -41,17 +41,16 @@ epoch: 1 step: 1741, loss is 0.05018193
 ...
 ```
 
-Then, test LeNet according to network model
+Then, evaluate LeNet according to network model
 ```python
-# test LeNet, after 1 epoch training, the accuracy is up to 96.5%
+# evaluate LeNet, after 1 epoch training, the accuracy is up to 96.5%
 python eval.py --data_path MNIST_Data --mode test --ckpt_path checkpoint_lenet-1_1875.ckpt
 ```
 
 ## Note
-There are some optional arguments:
+Here are some optional parameters:
 
 ```bash
--h, --help           show this help message and exit
 --device_target {Ascend,GPU,CPU}
                      device where the code will be implemented (default: Ascend)
 --data_path DATA_PATH

From 8f1d140de1a00ce2ffcb924abfdcaa50aee906df Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Thu, 23 Apr 2020 04:14:47 -0400
Subject: [PATCH 019/242] change error type

---
 mindspore/_checkparam.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mindspore/_checkparam.py b/mindspore/_checkparam.py
index 3543f58cf5..dba1c13b3b 100644
--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@@ -304,9 +304,9 @@ class Validator:
         type_names = [get_typename(t) for t in valid_types]
         msg_prefix = f'For \'{prim_name}\' the' if prim_name else 'The'
         if len(valid_types) == 1:
-            raise ValueError(f'{msg_prefix} type of `{arg_name}` should be {type_names[0]},'
+            raise TypeError(f'{msg_prefix} type of `{arg_name}` should be {type_names[0]},'
                              f' but got {get_typename(arg_type)}.')
-        raise ValueError(f'{msg_prefix} type of `{arg_name}` should be one of {type_names},'
+        raise TypeError(f'{msg_prefix} type of `{arg_name}` should be one of {type_names},'
                          f' but got {get_typename(arg_type)}.')
 
     @staticmethod
@@ -417,7 +417,7 @@ class ParamValidator:
             """func for raising error message when check failed"""
             type_names = [t.__name__ for t in valid_types]
             num_types = len(valid_types)
-            raise ValueError(f'The type of `{arg_name}` should be {"one of " if num_types > 1 else ""}'
+            raise TypeError(f'The type of `{arg_name}` should be {"one of " if num_types > 1 else ""}'
                              f'{type_names if num_types > 1 else type_names[0]}, but got {type(arg_value).__name__}.')
 
         if isinstance(arg_value, type(mstype.tensor)):

From f90629a01ddcfc1f3130d5c10bccfa3747fa6c25 Mon Sep 17 00:00:00 2001
From: wsc <wangshaocong1@huawei.com>
Date: Tue, 21 Apr 2020 20:01:18 +0800
Subject: [PATCH 020/242] Add readme file of BERT model

---
 example/Bert_NEZHA_cnwiki/README.md | 131 ++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
 create mode 100644 example/Bert_NEZHA_cnwiki/README.md

diff --git a/example/Bert_NEZHA_cnwiki/README.md b/example/Bert_NEZHA_cnwiki/README.md
new file mode 100644
index 0000000000..cd86b3bdd6
--- /dev/null
+++ b/example/Bert_NEZHA_cnwiki/README.md
@@ -0,0 +1,131 @@
+# BERT Example
+## Description
+This example implements pre-training, fine-tuning and evaluation of [BERT-base](https://github.com/google-research/bert)(the base version of BERT model) and [BERT-NEZHA](https://github.com/huawei-noah/Pretrained-Language-Model)(a Chinese pretrained language model developed by Huawei, which introduced a improvement of Functional Relative Positional Encoding as an effective positional encoding scheme).
+
+## Requirements
+- Install [MindSpore](https://www.mindspore.cn/install/en).
+- Download the zhwiki dataset from <https://dumps.wikimedia.org/zhwiki> for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wiliextractor). Convert the dataset to TFRecord format and move the files to a specified path.
+- Download the CLUE dataset from <https://www.cluebenchmarks.com> for fine-tuning and evaluation.
+>  Notes:
+   If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file.
+
+## Running the Example
+### Pre-Training
+- Set options in `config.py`. Make sure the 'DATA_DIR'(path to the dataset) and 'SCHEMA_DIR'(path to the json schema file) are set to your own path. Click [here](https://www.mindspore.cn/tutorial/zh-CN/master/use/data_preparation/loading_the_datasets.html#tfrecord) for more information about dataset and the json schema file.
+
+- Run `run_pretrain.py` for pre-training of BERT-base and BERT-NEZHA model.
+
+    ``` bash
+    python run_pretrain.py --backend=ms
+    ```
+
+### Fine-Tuning
+- Set options in `finetune_config.py`. Make sure the 'data_file', 'schema_file' and 'ckpt_file' are set to your own path, set the 'pre_training_ckpt' to save the checkpoint files generated.
+
+- Run `finetune.py` for fine-tuning of BERT-base and BERT-NEZHA model.
+
+    ```bash
+    python finetune.py --backend=ms
+    ```
+
+### Evaluation
+- Set options in `evaluation_config.py`. Make sure the 'data_file', 'schema_file' and 'finetune_ckpt' are set to your own path.
+
+- Run `evaluation.py` for evaluation of BERT-base and BERT-NEZHA model.
+
+    ```bash
+    python evaluation.py --backend=ms
+    ```
+
+## Usage
+### Pre-Training
+``` 
+usage: run_pretrain.py [--backend BACKEND]
+
+optional parameters:
+    --backend, BACKEND            MindSpore backend: ms
+```
+
+## Options and Parameters
+It contains of parameters of BERT model and options for training, which is set in file `config.py`, `finetune_config.py` and `evaluation_config.py` respectively.
+### Options:
+```
+Pre-Training:
+    bert_network                    version of BERT model: base | large, default is base
+    epoch_size                      repeat counts of training: N, default is 40
+    dataset_sink_mode               use dataset sink mode or not: True | False, default is True
+    do_shuffle                      shuffle the dataset or not: True | False, default is True
+    do_train_with_lossscale         use lossscale or not: True | False, default is True
+    loss_scale_value                initial value of loss scale: N, default is 2^32
+    scale_factor                    factor used to update loss scale: N, default is 2
+    scale_window                    steps for once updatation of loss scale: N, default is 1000
+    save_checkpoint_steps           steps to save a checkpoint: N, default is 2000
+    keep_checkpoint_max             numbers to save checkpoint: N, default is 1
+    init_ckpt                       checkpoint file to load: PATH, default is ""
+    data_dir                        dataset file to load: PATH, default is "/your/path/cn-wiki-128"
+    schema_dir                      dataset schema file to load: PATH, default is "your/path/datasetSchema.json"
+    optimizer                       optimizer used in the network: AdamWerigtDecayDynamicLR | Lamb | Momentum, default is "Lamb"
+
+Fine-Tuning:
+    task                            task type: NER | XNLI | LCQMC | SENTI
+    data_file                       dataset file to load: PATH, default is "/your/path/cn-wiki-128"
+    schema_file                     dataset schema file to load: PATH, default is "/your/path/datasetSchema.json"
+    epoch_num                       repeat counts of training: N, default is 40
+    ckpt_prefix                     prefix used to save checkpoint files: PREFIX, default is "bert"
+    ckpt_dir                        path to save checkpoint files: PATH, default is None
+    pre_training_ckpt               checkpoint file to load: PATH, default is "/your/path/pre_training.ckpt"
+    optimizer                       optimizer used in the network: AdamWeigtDecayDynamicLR | Lamb | Momentum, default is "Lamb"
+
+Evaluation:
+    task                            task type: NER | XNLI | LCQMC | SENTI
+    data_file                       dataset file to load: PATH, default is "/your/path/evaluation.tfrecord"
+    schema_file                     dataset schema file to load: PATH, default is "/your/path/schema.json"
+    finetune_ckpt                   checkpoint file to load: PATH, default is "/your/path/your.ckpt"
+```
+
+### Parameters:
+```
+Parameters for dataset and network (Pre-Training/Fine-Tuning/Evaluation):
+    batch_size                      batch size of input dataset: N, default is 16
+    seq_length                      length of input sequence: N, default is 128
+    vocab_size                      size of each embedding vector: N, default is 21136
+    hidden_size                     size of bert encoder layers: N, default is 768
+    num_hidden_layers               number of hidden layers: N, default is 12
+    num_attention_heads             number of attention heads: N, default is 12
+    intermediate_size               size of intermediate layer: N, default is 3072
+    hidden_act                      activation function used: ACTIVATION, default is "gelu"
+    hidden_dropout_prob             dropout probability for BertOutput: Q, default is 0.1
+    attention_probs_dropout_prob    dropout probability for BertAttention: Q, default is 0.1
+    max_position_embeddings         maximum length of sequences: N, default is 512
+    type_vocab_size                 size of token type vocab: N, default is 16
+    initializer_range               initialization value of TruncatedNormal: Q, default is 0.02
+    use_relative_positions          use relative positions or not: True | False, default is False
+    input_mask_from_dataset         use the input mask loaded form dataset or not: True | False, default is True
+    token_type_ids_from_dataset     use the token type ids loaded from dataset or not: True | False, default is True
+    dtype                           data type of input: mstype.float16 | mstype.float32, default is mstype.float32
+    compute_type                    compute type in BertTransformer: mstype.float16 | mstype.float32, default is mstype.float16
+
+Parameters for optimizer:
+    AdamWeightDecayDynamicLR:
+    decay_steps                     steps of the learning rate decay: N, default is 12276*3
+    learning_rate                   value of learning rate: Q, default is 1e-5
+    end_learning_rate               value of end learning rate: Q, default is 0.0
+    power                           power: Q, default is 10.0
+    warmup_steps                    steps of the learning rate warm up: N, default is 2100
+    weight_decay                    weight decay: Q, default is 1e-5
+    eps                             term added to the denominator to improve numerical stability: Q, default is 1e-6
+
+    Lamb:
+    decay_steps                     steps of the learning rate decay: N, default is 12276*3
+    learning_rate                   value of learning rate: Q, default is 1e-5
+    end_learning_rate               value of end learning rate: Q, default is 0.0
+    power                           power: Q, default is 5.0
+    warmup_steps                    steps of the learning rate warm up: N, default is 2100
+    weight_decay                    weight decay: Q, default is 1e-5
+    decay_filter                    function to determine whether to apply weight decay on parameters: FUNCTION, default is lambda x: False
+
+    Momentum:
+    learning_rate                   value of learning rate: Q, default is 2e-5
+    momentum                        momentum for the moving average: Q, default is 0.9
+```
+

From ff57caceb98c1261642c7f48a60a3116435f3c8d Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Thu, 23 Apr 2020 04:31:14 -0400
Subject: [PATCH 021/242] change error type

---
 mindspore/_checkparam.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mindspore/_checkparam.py b/mindspore/_checkparam.py
index dba1c13b3b..9ecf0c9e24 100644
--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@@ -305,9 +305,9 @@ class Validator:
         msg_prefix = f'For \'{prim_name}\' the' if prim_name else 'The'
         if len(valid_types) == 1:
             raise TypeError(f'{msg_prefix} type of `{arg_name}` should be {type_names[0]},'
-                             f' but got {get_typename(arg_type)}.')
+                            f' but got {get_typename(arg_type)}.')
         raise TypeError(f'{msg_prefix} type of `{arg_name}` should be one of {type_names},'
-                         f' but got {get_typename(arg_type)}.')
+                        f' but got {get_typename(arg_type)}.')
 
     @staticmethod
     def check_float_legal_value(arg_name, arg_value, prim_name):
@@ -418,7 +418,7 @@ class ParamValidator:
             type_names = [t.__name__ for t in valid_types]
             num_types = len(valid_types)
             raise TypeError(f'The type of `{arg_name}` should be {"one of " if num_types > 1 else ""}'
-                             f'{type_names if num_types > 1 else type_names[0]}, but got {type(arg_value).__name__}.')
+                            f'{type_names if num_types > 1 else type_names[0]}, but got {type(arg_value).__name__}.')
 
         if isinstance(arg_value, type(mstype.tensor)):
             arg_value = arg_value.element_type()

From 14df77117539ce3fa13655733545314439cc48bb Mon Sep 17 00:00:00 2001
From: huanghui <huanghui44@huawei.com>
Date: Thu, 23 Apr 2020 16:45:06 +0800
Subject: [PATCH 022/242] fix confusion_softmax_grad_rule pass

---
 .../ascend/ir_fusion/confusion_softmax_grad_rule.cc             | 2 +-
 .../gtest_input/pre_activate/confusion_softmax_grad_rule.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc
index 8078247c2a..a524d694e6 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_softmax_grad_rule.cc
@@ -47,7 +47,7 @@ void SetAttrsForFusionNode(const AnfNodePtr &sub_anf, const AnfNodePtr &fusion_n
 
 const BaseRef ConfusionSoftmaxGradRule::DefinePattern() const {
   return VectorRef(
-    {prim::kPrimSub, input0_, VectorRef({prim::kPrimReduceSum, VectorRef({prim::kPrimMul, input0_, input1_})})});
+    {prim::kPrimSub, input0_, VectorRef({prim::kPrimReduceSum, VectorRef({prim::kPrimMul, input1_, input0_})})});
 }
 
 const AnfNodePtr ConfusionSoftmaxGradRule::Process(const FuncGraphPtr &graph, const AnfNodePtr &node,
diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/confusion_softmax_grad_rule.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/confusion_softmax_grad_rule.py
index 2727ef641d..cd71eb5d0b 100644
--- a/tests/ut/cpp/python_input/gtest_input/pre_activate/confusion_softmax_grad_rule.py
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/confusion_softmax_grad_rule.py
@@ -41,7 +41,7 @@ def test_confusion_softmax_grad_rule(tag):
 
     @fns
     def before(input0, input1):
-        res = mul(input0, input1)
+        res = mul(input1, input0)
         # input axis will be convert to attr in ConstructKernelGraph step
         res = reduce_sum(res, axis)
         res = sub(input0, res)

From 17adba5eb56260e8bd9ba7dad87f748aca58cb5d Mon Sep 17 00:00:00 2001
From: fary86 <fary.fanrui@huawei.com>
Date: Thu, 23 Apr 2020 15:23:59 +0800
Subject: [PATCH 023/242] Optimize flow of exproting onnx

---
 mindspore/ccsrc/pipeline/pipeline.cc | 40 ++++++++++++++++++----------
 mindspore/ccsrc/pipeline/pipeline.h  |  1 +
 mindspore/ccsrc/utils/profile.cc     |  4 +--
 mindspore/common/api.py              |  6 +++--
 mindspore/train/serialization.py     |  2 +-
 5 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc
index fca105d13c..7524fb9d53 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -294,6 +294,30 @@ void ExecutorPy::SaveCompiledGraph(const std::string &phase_s) {
   MS_LOG(INFO) << "End save compiled func graph!";
 }
 
+void ExecutorPy::SaveCompiledGraphToPb(const std::string &phase_s) {
+#ifdef ENABLE_DUMP_IR
+  // save the graph to file in protobuf format
+  FuncGraphPtr func_graph = info_[phase_s]->resource->func_graph();
+  MS_EXCEPTION_IF_NULL(func_graph);
+  std::string name_prefix = phase_s.substr(0, phase_s.find("."));
+  std::string pb_filename = std::string("ms_output_") + name_prefix + ".pb";
+  std::string filename = GetFilePathName(pb_filename);
+
+  MS_LOG(INFO) << "Begin saving graph to file <<'" << filename << "' in protobuf formart.";
+  ChangeFileMode(filename, S_IRWXU);
+  std::ofstream ofs(filename);
+  if (!ofs.is_open()) {
+    MS_LOG(ERROR) << "Open file '" << filename << "' failed!";
+    return;
+  }
+  ofs << GetFuncGraphProtoString(func_graph);
+  ofs.close();
+  // set file mode to read only by user
+  ChangeFileMode(filename, S_IRUSR);
+  MS_LOG(INFO) << "End saving graph to file in protobuf format";
+#endif
+}
+
 bool ExecutorPy::ChangeExportGeirUseVmFlag(bool use_vm, const std::string &phase_s) const {
   std::string phase_prefix = GetPhasePrefix(phase_s);
 
@@ -365,6 +389,8 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
   info_[phase_s] = executor_info;
   pip->Run();
 
+  // save compile graph to file in protobuf format
+  SaveCompiledGraphToPb(phase_s);
   // save the run graph func to MsPipeLine
   SaveCompiledGraph(phase_s);
 
@@ -557,20 +583,6 @@ void Pipeline::Run() {
     std::string user_graph_file = GetFilePathName("ModelDigraph.dot");
     MS_LOG(DEBUG) << "Save user graph to: " << user_graph_file;
     draw::DrawUserFuncGraph(user_graph_file, user_graph);
-
-#ifdef ENABLE_DUMP_IR
-    std::string filename = GetFilePathName("ms_output.pb");
-    ChangeFileMode(filename, S_IRWXU);
-    std::ofstream ofs(filename);
-    if (!ofs.is_open()) {
-      MS_LOG(ERROR) << "Open file '" << filename << "' failed!";
-      return;
-    }
-    ofs << GetFuncGraphProtoString(user_graph);
-    ofs.close();
-    // set file mode to read only by user
-    ChangeFileMode(filename, S_IRUSR);
-#endif
   }
   MS_LOG(INFO) << "End";
 }
diff --git a/mindspore/ccsrc/pipeline/pipeline.h b/mindspore/ccsrc/pipeline/pipeline.h
index 865c961ac1..38d4f1937f 100644
--- a/mindspore/ccsrc/pipeline/pipeline.h
+++ b/mindspore/ccsrc/pipeline/pipeline.h
@@ -70,6 +70,7 @@ class ExecutorPy : public std::enable_shared_from_this<ExecutorPy> {
   ~ExecutorPy();
 
   void SaveCompiledGraph(const std::string &phase_s);
+  void SaveCompiledGraphToPb(const std::string &phase_s);
   bool CompileInner(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm);
   bool Compile(const py::object &obj, const py::tuple &args, const py::object &phase, bool use_vm);
 
diff --git a/mindspore/ccsrc/utils/profile.cc b/mindspore/ccsrc/utils/profile.cc
index e9e7920e0c..9fb9dc9f1a 100644
--- a/mindspore/ccsrc/utils/profile.cc
+++ b/mindspore/ccsrc/utils/profile.cc
@@ -158,7 +158,7 @@ void Profile::Print(void) {
   std::ostringstream oss;
   PrintProfile(oss, *ctx_ptr_->time_info_);
   std::string text = oss.str();
-  // the length of text is too long to use MS_LOGINFO, use printf to print it
+  // here use printf to output profile info, not use MS_LOG(INFO) since when open log, it affects performace
   (void)printf("%s", text.c_str());
   (void)fflush(stdout);
 }
@@ -358,7 +358,7 @@ void MsProfile::Print() {
     PrintTimeStat(oss, groups[i], prefix);
   }
   std::string text = oss.str();
-  // the length of text is too long to use MS_LOGINFO, use printf to print it
+  // here use printf to output profile info, not use MS_LOG(INFO) since when open log, it affects performace
   (void)printf("\nTime group info:\n%s", text.c_str());
   (void)fflush(stdout);
 }
diff --git a/mindspore/common/api.py b/mindspore/common/api.py
index b5450bc5a3..5016dd58bf 100644
--- a/mindspore/common/api.py
+++ b/mindspore/common/api.py
@@ -328,7 +328,7 @@ class _Executor:
             raise TypeError('Parameters need OrderedDict type, but got {}'.
                             format(type(params)))
 
-    def compile(self, obj, *args, phase='predict', params=None):
+    def compile(self, obj, *args, phase='predict', params=None, do_convert=True):
         """
         Compiles graph.
 
@@ -337,6 +337,7 @@ class _Executor:
             args (tuple): Function or cell input arguments.
             phase (str): The name of compile phase. Default: 'predict'.
             params (OrderedDict): The parameters dictionary used for init data graph. Default: None.
+            do_convert (bool): When set to True, convert ME graph to GE graph after compiling graph.
 
         Return:
             Str, the full phase of the cell.
@@ -368,7 +369,8 @@ class _Executor:
 
         if graph is None:
             logger.error("%r graph compile failed.", phase)
-
+        if not do_convert:
+            return phase, True
         if not enable_debug_runtime or enable_ge:
             if _get_parallel_mode() in ["auto_parallel", "semi_auto_parallel"]:
                 obj.parameter_layout_dict = self._executor.get_parameter_layout(phase)
diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py
index 49cc5318fa..5c1cc5eba0 100644
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -450,7 +450,7 @@ def export(net, *inputs, file_name, file_format='GEIR'):
         _executor.export(net, file_name, file_format)
     elif file_format == 'ONNX':  # file_format is 'ONNX'
         phase_name = 'export_onnx'
-        graph_id, _ = _executor.compile(net, *inputs, phase=phase_name)
+        graph_id, _ = _executor.compile(net, *inputs, phase=phase_name, do_convert=False)
         onnx_stream = _executor._get_func_graph_proto(graph_id)
         with open(file_name, 'wb') as f:
             os.chmod(file_name, stat.S_IWUSR | stat.S_IRUSR)

From 60971f26d8095ba47ffad0083cd235b02362f2eb Mon Sep 17 00:00:00 2001
From: dinghao <dinghao7@huawei.com>
Date: Thu, 23 Apr 2020 10:48:40 +0800
Subject: [PATCH 024/242] fix clipbynorm in pynative mode

---
 mindspore/nn/layer/basic.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py
index 2449eea9b4..2f8b38e818 100644
--- a/mindspore/nn/layer/basic.py
+++ b/mindspore/nn/layer/basic.py
@@ -23,6 +23,7 @@ from mindspore.ops import functional as F
 from mindspore.ops.functional import identity
 from mindspore.common.parameter import Parameter
 from mindspore._extends import cell_attr_register
+from mindspore.common.api import ms_function
 from ..cell import Cell
 from .activation import get_activation
 from ..._checkparam import Validator as validator
@@ -261,7 +262,9 @@ class ClipByNorm(Cell):
         self.expand_dims = P.ExpandDims()
         self.dtype = P.DType()
 
+    @ms_function
     def construct(self, x, clip_norm):
+        """add ms_function decorator for pynative mode"""
         mul_x = F.square(x)
         l2sum = self.cast(self.reduce_sum(mul_x, self.axis), mstype.float32)
         cond = self.greater_(l2sum, self.zero)

From f6a4f3d155d026c1a84d1d7778f4cfdef80f912c Mon Sep 17 00:00:00 2001
From: zhousiyi <zhousiyi@huawei.com>
Date: Wed, 8 Apr 2020 02:45:50 +0000
Subject: [PATCH 025/242] [static_analysis]: remove the TrivialPrimEvaluator
 cache.  add cache for PythonPrimEvaluator. Be careful that  the infer
 function of PythonPrimitive in python code should  be idempotent.

---
 mindspore/ccsrc/optimizer/optimizer.h         | 21 ++++++++++++----
 mindspore/ccsrc/parallel/step_parallel.cc     | 16 ++++++++++---
 mindspore/ccsrc/pipeline/parse/resolve.cc     |  3 +--
 mindspore/ccsrc/pipeline/pass.cc              |  8 +++----
 .../pipeline/static_analysis/evaluator.cc     |  1 -
 .../ccsrc/pipeline/static_analysis/prim.cc    |  5 ++++
 .../static_analysis/static_analysis.cc        | 24 ++++++++++++++++++-
 .../static_analysis/static_analysis.h         |  1 +
 tests/ut/cpp/optimizer/optimizer_test.cc      |  3 +--
 9 files changed, 64 insertions(+), 18 deletions(-)

diff --git a/mindspore/ccsrc/optimizer/optimizer.h b/mindspore/ccsrc/optimizer/optimizer.h
index f67466efba..c4455484c4 100644
--- a/mindspore/ccsrc/optimizer/optimizer.h
+++ b/mindspore/ccsrc/optimizer/optimizer.h
@@ -17,7 +17,9 @@
 #ifndef MINDSPORE_CCSRC_OPTIMIZER_OPTIMIZER_H_
 #define MINDSPORE_CCSRC_OPTIMIZER_OPTIMIZER_H_
 
+#include <algorithm>
 #include <functional>
+#include <iterator>
 #include <memory>
 #include <string>
 #include <vector>
@@ -129,29 +131,38 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
     return optimizer;
   }
 
-  FuncGraphPtr step(FuncGraphPtr func_graph, const abstract::AbstractBasePtrList &args_spec, bool use_profile = true) {
+  FuncGraphPtr step(FuncGraphPtr func_graph, bool use_profile = true) {
     // Optimizer step counter;
     int counter = 1;
     bool changes = true;
 
     while (changes) {
       changes = false;
-      auto run_runc = [&counter, &func_graph, &args_spec, &changes, use_profile, this]() {
+      auto run_runc = [&counter, &func_graph, &changes, use_profile, this]() {
         for (size_t i = 0; i < passes_.size(); ++i) {
           const OptPass &opt = passes_[i];
-          auto opt_func = [&func_graph, &args_spec, &changes, &opt, this]() {
+          auto opt_func = [&func_graph, &changes, &opt, this]() {
             if (opt.is_renormalize()) {
               auto resource_ptr = std::dynamic_pointer_cast<pipeline::Resource>(resource_);
               if (resource_ptr != nullptr) {
+                // StepParallel may replace the AbstractValue of the parameters of func_graph,
+                // So generate the args_spec from parameters.
+                abstract::AbstractBasePtrList maybe_new_args_spec;
                 if (is_watch_renormalize_) {
                   if (untyped_nodes_.size() > 0) {
-                    func_graph = pipeline::Renormalize(resource_ptr, func_graph, args_spec);
+                    std::transform(func_graph->parameters().begin(), func_graph->parameters().end(),
+                                   std::back_inserter(maybe_new_args_spec),
+                                   [](AnfNodePtr param) -> AbstractBasePtr { return param->abstract(); });
+                    func_graph = pipeline::Renormalize(resource_ptr, func_graph, maybe_new_args_spec);
                     clear_untyped_nodes();
                   } else {
                     MS_LOG(INFO) << "Optimizer::step: Skipping Renormalize because untyped_nodes_ is empty.";
                   }
                 } else {
-                  func_graph = pipeline::Renormalize(resource_ptr, func_graph, args_spec);
+                  std::transform(func_graph->parameters().begin(), func_graph->parameters().end(),
+                                 std::back_inserter(maybe_new_args_spec),
+                                 [](AnfNodePtr param) -> AbstractBasePtr { return param->abstract(); });
+                  func_graph = pipeline::Renormalize(resource_ptr, func_graph, maybe_new_args_spec);
                 }
               }
             } else if (opt(func_graph, shared_from_this())) {
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index c24c14abf6..39b1325f76 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -1230,7 +1230,11 @@ void SetParallelShape(const AnfNodePtr &parameter, const std::pair<AnfNodePtr, i
                 << MakeValue(slice_shape)->ToString();
   std::shared_ptr<abstract::BaseShape> parallel_shape = std::make_shared<abstract::Shape>(slice_shape);
   MS_EXCEPTION_IF_NULL(parallel_shape);
-  abstract->set_shape(parallel_shape);
+  // Don't modify it in-place as the pointer of this AbstractValue may used as cache key in StaticAnalysis.
+  auto cloned_abstract = abstract->Clone();
+  MS_EXCEPTION_IF_NULL(cloned_abstract);
+  cloned_abstract->set_shape(parallel_shape);
+  parameter->set_abstract(cloned_abstract);
   TensorLayout tensor_layout = tensorinfo_in.tensor_layout();
   ParameterPtr parameter_ptr = parameter->cast<ParameterPtr>();
   MS_EXCEPTION_IF_NULL(parameter_ptr);
@@ -1330,7 +1334,10 @@ void SetClonedTensorShapeForOptimizer(const FuncGraphPtr &root) {
       cloned_parameter->set_tensor_layout(cloned_from_parameter->tensor_layout());
       MS_EXCEPTION_IF_NULL(cloned_parameter_node->abstract());
       MS_EXCEPTION_IF_NULL(cloned_from_node->abstract());
-      cloned_parameter_node->abstract()->set_shape(cloned_from_node->abstract()->GetShapeTrack());
+      auto cloned_abstract = cloned_parameter_node->abstract()->Clone();
+      MS_EXCEPTION_IF_NULL(cloned_abstract);
+      cloned_abstract->set_shape(cloned_from_node->abstract()->GetShapeTrack());
+      cloned_parameter_node->set_abstract(cloned_abstract);
       MS_LOG(INFO) << "The parameter: " << cloned_parameter->name()
                    << " is cloned, the be cloned parameter is: " << cloned_from_parameter->name()
                    << ", clone index is:  " << cloned_index;
@@ -1743,7 +1750,10 @@ void SplitSens(const AnfNodePtr &grad_sens_node, const TensorLayout &loss_grad_l
       auto slice_shape = loss_grad_layout.slice_shape().array();
       std::shared_ptr<abstract::BaseShape> parallel_shape = std::make_shared<abstract::Shape>(slice_shape);
       MS_EXCEPTION_IF_NULL(parallel_shape);
-      abstract->set_shape(parallel_shape);
+      auto cloned_abstract = abstract->Clone();
+      MS_EXCEPTION_IF_NULL(cloned_abstract);
+      cloned_abstract->set_shape(parallel_shape);
+      sens_tensor_node->set_abstract(cloned_abstract);
       auto sens_tensor_param = sens_tensor_node->cast<ParameterPtr>();
       sens_tensor_param->set_tensor_layout(std::make_shared<TensorLayout>(loss_grad_layout));
       return;
diff --git a/mindspore/ccsrc/pipeline/parse/resolve.cc b/mindspore/ccsrc/pipeline/parse/resolve.cc
index 284512c943..18f186dbb1 100644
--- a/mindspore/ccsrc/pipeline/parse/resolve.cc
+++ b/mindspore/ccsrc/pipeline/parse/resolve.cc
@@ -276,9 +276,8 @@ bool ResolveFuncGraph(const FuncGraphPtr &func_graph, const pipeline::ResourceBa
 
   (void)parse::python_adapter::set_python_scoped();
 
-  abstract::AbstractBasePtrList args_spec;
   MS_EXCEPTION_IF_NULL(opt_resolve);
-  (void)opt_resolve->step(func_graph, args_spec, use_profile);
+  (void)opt_resolve->step(func_graph, use_profile);
   return true;
 }
 
diff --git a/mindspore/ccsrc/pipeline/pass.cc b/mindspore/ccsrc/pipeline/pass.cc
index 6cdf641443..6ce6c4603d 100644
--- a/mindspore/ccsrc/pipeline/pass.cc
+++ b/mindspore/ccsrc/pipeline/pass.cc
@@ -205,14 +205,15 @@ bool OptPassGroup(const ResourcePtr &res, const std::string &name) {
     return false;
   }
 
-  abstract::AbstractBasePtrList args = res->args_spec();
   FuncGraphPtr func_graph = res->func_graph();
   MS_LOG(DEBUG) << "Start " << name << " func graph:" << func_graph->ToString() << ", "
                 << func_graph->get_return()->DebugString(true);
   InitOpt(res);
   if (g_pass_opts.find(name) != g_pass_opts.end()) {
-    res->set_func_graph(g_pass_opts[name]->step(func_graph, args));
+    res->set_func_graph(g_pass_opts[name]->step(func_graph));
   }
+  // Note: StepParallel may modify the AbstractValue of the parameters of func_graph, but they are not updated to
+  // res->args_spec_ yet. So if any later pass or action want to use that variable, it should be set here.
   return true;
 }
 
@@ -255,10 +256,9 @@ bool ValidatePass(const ResourcePtr &res) {
 bool InferenceOptPreparePass(const ResourcePtr &res) {
   FuncGraphPtr func_graph = res->func_graph();
   MS_EXCEPTION_IF_NULL(func_graph);
-  abstract::AbstractBasePtrList args_spec = res->args_spec();
   auto prepare_map = GetInferenceOptPreparePhases();
   auto infer_opt_prepare = opt::Optimizer::MakeOptimizer("inference_prepare", res, prepare_map);
-  (void)infer_opt_prepare->step(func_graph, args_spec, false);
+  (void)infer_opt_prepare->step(func_graph, false);
   return true;
 }
 
diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
index 5bad1634d5..402ef98001 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
@@ -260,7 +260,6 @@ AbstractBasePtr TrivialPrimEvaluator::Run(AnalysisEnginePtr engine, const Config
                          return conf->GetEvaluatedValue();
                        });
   AbstractBasePtr ret = EvalPrim(engine, args_spec_list);
-  (*cache_)[args_spec_list] = ret;
   return ret;
 }
 
diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
index 46e088ab11..1115cd9978 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
@@ -405,6 +405,10 @@ AbstractBasePtr PyInferRes2Abstract(const PrimitivePyPtr &prim_py, const py::dic
 AbstractBasePtr PythonPrimEvaluator::EvalPrim(const AnalysisEnginePtr &, const AbstractBasePtrList &args) {
   MS_LOG(DEBUG) << "Eval for:" << prim_py_->ToString();
 
+  const auto &iter = cache_->find(args);
+  if (iter != cache_->end()) {
+    return iter->second;
+  }
   auto py_args = PreparePyInputs(prim_py_, args);
 
   auto pyobj = prim_py_->GetPyObj();
@@ -418,6 +422,7 @@ AbstractBasePtr PythonPrimEvaluator::EvalPrim(const AnalysisEnginePtr &, const A
   auto res_spec = PyInferRes2Abstract(prim_py_, output);
 
   MS_LOG(DEBUG) << "Python InferTensor result spec: " << res_spec->ToString() << ".";
+  (*cache_)[args] = res_spec;
   return res_spec;
 }
 
diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
index 6230df44a5..4afc3509ba 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
@@ -271,6 +271,18 @@ void AnalysisEngine::ClearEvaluatorCache() {
     MS_EXCEPTION_IF_NULL(evaluator->cache());
     evaluator->cache()->clear();
   }
+  for (auto &element : prim_constructors_) {
+    EvaluatorPtr evaluator = element.second;
+    MS_EXCEPTION_IF_NULL(evaluator);
+    MS_EXCEPTION_IF_NULL(evaluator->cache());
+    evaluator->cache()->clear();
+  }
+  for (auto &element : prim_py_evaluators_) {
+    EvaluatorPtr evaluator = element.second;
+    MS_EXCEPTION_IF_NULL(evaluator);
+    MS_EXCEPTION_IF_NULL(evaluator->cache());
+    evaluator->cache()->clear();
+  }
 }
 
 void AnalysisEngine::Clear() {
@@ -296,7 +308,17 @@ EvaluatorPtr GetPrimEvaluator(const PrimitivePtr &prim, const AnalysisEnginePtr
   if (prim->HasPyEvaluator()) {
     auto prim_py = dyn_cast<PrimitivePy>(prim);
     if (prim_py != nullptr) {
-      return std::make_shared<PythonPrimEvaluator>(prim_py);
+      if (engine == nullptr) {
+        return std::make_shared<PythonPrimEvaluator>(prim_py);
+      }
+
+      const auto &iter = engine->prim_py_evaluators_.find(prim_py);
+      if (iter != engine->prim_py_evaluators_.end()) {
+        return iter->second;
+      }
+      evaluator = std::make_shared<PythonPrimEvaluator>(prim_py);
+      engine->prim_py_evaluators_[prim_py] = evaluator;
+      return evaluator;
     }
     MS_LOG(EXCEPTION) << "The primitive with python evaluator should be a python primitive.";
   }
diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h
index ef4f78e619..80c6320493 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h
+++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h
@@ -194,6 +194,7 @@ class AnalysisEngine : public std::enable_shared_from_this<AnalysisEngine> {
   const PrimEvaluatorMap &PrimConstructors() const { return prim_constructors_; }
 
   AnalysisCache cache_;
+  std::unordered_map<PrimitivePyPtr, EvaluatorPtr> prim_py_evaluators_;
 
  private:
   const PrimEvaluatorMap &prim_constructors_;
diff --git a/tests/ut/cpp/optimizer/optimizer_test.cc b/tests/ut/cpp/optimizer/optimizer_test.cc
index d700225894..ca7c589d47 100644
--- a/tests/ut/cpp/optimizer/optimizer_test.cc
+++ b/tests/ut/cpp/optimizer/optimizer_test.cc
@@ -57,8 +57,7 @@ TEST_F(TestOptOptimizer, test_step_opt) {
                                                                   true);
   EXPECT_TRUE(optimizer.get() != nullptr);
 
-  abstract::AbstractBasePtrList args;
-  auto after = optimizer->step(before, args);
+  auto after = optimizer->step(before);
 
   draw::Draw("optimizer_test_expendJ_before.dot", before);
   draw::Draw("optimizer_test_expendJ_after.dot", after);

From 8963e395163fabecf8637a19729bbc14ff52c2fd Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Thu, 23 Apr 2020 05:29:26 -0400
Subject: [PATCH 026/242] change error type

---
 tests/ut/python/ops/test_array_ops.py | 4 ++--
 tests/ut/python/ops/test_math_ops.py  | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/ut/python/ops/test_array_ops.py b/tests/ut/python/ops/test_array_ops.py
index faaa9d5402..01e7e32d50 100644
--- a/tests/ut/python/ops/test_array_ops.py
+++ b/tests/ut/python/ops/test_array_ops.py
@@ -228,10 +228,10 @@ def test_exec():
 
 raise_set = [
     ('Squeeze_1_Error', {
-        'block': (lambda x: P.Squeeze(axis=1.2), {'exception': ValueError}),
+        'block': (lambda x: P.Squeeze(axis=1.2), {'exception': TypeError}),
         'desc_inputs': [Tensor(np.ones(shape=[3, 1, 5]))]}),
     ('Squeeze_2_Error', {
-        'block': (lambda x: P.Squeeze(axis=((1.2, 1.3))), {'exception': ValueError}),
+        'block': (lambda x: P.Squeeze(axis=((1.2, 1.3))), {'exception': TypeError}),
         'desc_inputs': [Tensor(np.ones(shape=[3, 1, 5]))]}),
     ('ReduceSum_Error', {
         'block': (lambda x: P.ReduceSum(keep_dims=1), {'exception': TypeError}),
diff --git a/tests/ut/python/ops/test_math_ops.py b/tests/ut/python/ops/test_math_ops.py
index 7f8717d4e6..b866c7c556 100755
--- a/tests/ut/python/ops/test_math_ops.py
+++ b/tests/ut/python/ops/test_math_ops.py
@@ -401,16 +401,16 @@ def test_exec():
 
 raise_set = [
     ('StridedSlice_1_Error', {
-        'block': (lambda x: P.StridedSlice(begin_mask="1"), {'exception': ValueError}),
+        'block': (lambda x: P.StridedSlice(begin_mask="1"), {'exception': TypeError}),
         'desc_inputs': [0]}),
     ('StridedSlice_2_Error', {
-        'block': (lambda x: P.StridedSlice(end_mask="1"), {'exception': ValueError}),
+        'block': (lambda x: P.StridedSlice(end_mask="1"), {'exception': TypeError}),
         'desc_inputs': [0]}),
     ('StridedSlice_3_Error', {
-        'block': (lambda x: P.StridedSlice(ellipsis_mask=1.1), {'exception': ValueError}),
+        'block': (lambda x: P.StridedSlice(ellipsis_mask=1.1), {'exception': TypeError}),
         'desc_inputs': [0]}),
     ('StridedSlice_4_Error', {
-        'block': (lambda x: P.StridedSlice(new_axis_mask="1.1"), {'exception': ValueError}),
+        'block': (lambda x: P.StridedSlice(new_axis_mask="1.1"), {'exception': TypeError}),
         'desc_inputs': [0]}),
 ]
 

From f806b724472c238b89583f474bbba2a2c19c7a2f Mon Sep 17 00:00:00 2001
From: ch-l <ch.l@huawei.com>
Date: Tue, 21 Apr 2020 12:30:36 +0200
Subject: [PATCH 027/242] use DeviceMemory for memory control

---
 .../auto_parallel/rec_core/rec_partition.cc   | 41 +++++++++++--------
 .../auto_parallel/rec_core/rec_partition.h    |  4 +-
 .../ccsrc/parallel/ops_info/operator_info.h   |  3 --
 .../ccsrc/parallel/step_auto_parallel.cc      |  3 +-
 mindspore/ccsrc/parallel/step_parallel.cc     |  9 ++--
 .../auto_parallel/rec_partition_test.cc       |  9 ++--
 6 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
index 24ad8ac203..81e0eaa2dd 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
@@ -29,52 +29,55 @@
 
 namespace mindspore {
 namespace parallel {
-#define DEVICE_MEMORY 1024.0 * 1024.0 * 1024.0  // 1GB
 
 // Get the target node's weight for sorting.
 double GetWeights(const Graph::NodeType &node) {
   const OperatorRec &op = node.apply;
 
-  if (op.op_type == 0) {
+  if (op.op_type == OperatorType::kRecMatMul) {
     // For MatMul
     auto cost_ptr = std::make_shared<CostMatMul>();
 
     return cost_ptr->GetMinCostIn(op);
-  } else if (op.op_type == 1) {
+  } else if (op.op_type == OperatorType::kRecConvolution) {
     // For Convolution
     auto cost_ptr = std::make_shared<CostConvolution>();
 
     return cost_ptr->GetMinCostIn(node);
-  } else if (op.op_type == 2) {
+  } else if (op.op_type == OperatorType::kRecPooling) {
     // For Pooling
     auto cost_ptr = std::make_shared<CostPooling>();
 
     return cost_ptr->GetMinCostIn();
-  } else if (op.op_type == 3) {
+  } else if (op.op_type == OperatorType::kRecAdd) {
     // For Add
     auto cost_ptr = std::make_shared<CostAdd>();
 
     return cost_ptr->GetMinCostIn();
-  } else if (op.op_type == 4 || op.op_type == 7 || op.op_type == 9) {
+  } else if (op.op_type == OperatorType::kRecSoftmax || op.op_type == OperatorType::kRecReLU ||
+             op.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
     // For Softmax & || Activation
     auto cost_ptr = std::make_shared<CostCommon>();
 
     return cost_ptr->GetMinCostIn();
-  } else if (op.op_type == 5) {
+  } else if (op.op_type == OperatorType::kRecReshape) {
     // For Reshape
     auto cost_ptr = std::make_shared<CostReshape>();
 
     return cost_ptr->GetMinCostIn();
-  } else if (op.op_type == 6) {
+  } else if (op.op_type == OperatorType::kRecBiasAdd) {
     // For BiasAdd
     auto cost_ptr = std::make_shared<CostBiasAdd>();
 
     return cost_ptr->GetMinCostIn();
-  } else if (op.op_type == 8) {
+  } else if (op.op_type == OperatorType::kRecBatchNorm) {
     // For BatchNorm
     auto cost_ptr = std::make_shared<CostBatchNorm>();
 
     return cost_ptr->GetMinCostIn();
+  } else if (op.op_type == OperatorType::kRecUnkownType) {
+    // For unknown type
+    return 0.0;
   } else {
     MS_LOG(EXCEPTION) << "Failure: GetOperatorWeight failed.";
   }
@@ -155,13 +158,17 @@ StrategyRec PartitionNode(const Graph::NodeType &node,
     auto cost_ptr = std::make_shared<CostBatchNorm>();
 
     return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
+  } else if (node.apply.op_type == 10) {
+    // For unknown type
+    StrategyRec default_strategy;
+    return default_strategy;
   } else {
     MS_LOG(EXCEPTION) << "Failure: Partition Operator failed.";
   }
 }
 
 // Parttion graph into all devices.
-Status PartitionForAllDevices(const size_t num_device, std::shared_ptr<Graph> graph) {
+Status PartitionForAllDevices(const size_t num_device, const double device_memory, std::shared_ptr<Graph> graph) {
   if (num_device < 1) {
     MS_LOG(EXCEPTION) << "ERROR: Number of devices can't be " << num_device << ".";
   }
@@ -207,7 +214,7 @@ Status PartitionForAllDevices(const size_t num_device, std::shared_ptr<Graph> gr
   }
 
   InferUndecideStrategy(graph);
-  if (DevicesMemoryControl(graph) != SUCCESS) {
+  if (DevicesMemoryControl(device_memory, graph) != SUCCESS) {
     return FAILED;
   } else {
     return SUCCESS;
@@ -306,15 +313,15 @@ void ApplyNextStrategy(const uint64_t node_index, std::shared_ptr<Graph> graph)
   }
 }
 
-Status DevicesMemoryControl(std::shared_ptr<Graph> graph) {
+Status DevicesMemoryControl(const double device_memory, std::shared_ptr<Graph> graph) {
   MS_EXCEPTION_IF_NULL(graph);
 
   uint64_t iter_nodes = graph->nodes.size();
+  double used_memory = 0.0;
 
   for (uint64_t i_node = 0; i_node < iter_nodes; i_node++) {
     if (graph->nodes[i_node].info == 0) {
       Graph::NodeType &Node = graph->nodes[i_node];
-      double used_memory = 0.0;
 
       for (int index = 0; index < 2; index++) {
         used_memory += Node.apply.arguments[index].tensor_str.str_n * Node.apply.arguments[index].tensor_shape.shape_n *
@@ -329,12 +336,12 @@ Status DevicesMemoryControl(std::shared_ptr<Graph> graph) {
                      Node.tensor_parm.tensor_str.str_h * Node.tensor_parm.tensor_shape.shape_h *
                      Node.tensor_parm.tensor_str.str_w * Node.tensor_parm.tensor_shape.shape_w *
                      GetDataTypeSize(Node.tensor_parm.tensor_type);
-      if (DEVICE_MEMORY < used_memory) {
-        MS_LOG(EXCEPTION) << "Failure: Out of memory!";
-        return FAILED;
-      }
     }
   }
+  if (device_memory < used_memory) {
+    MS_LOG(EXCEPTION) << "Failure: Out of memory!";
+    return FAILED;
+  }
 
   return SUCCESS;
 }
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h
index 4f831f4f9a..e22b11542a 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h
@@ -40,7 +40,7 @@ StrategyRec PartitionNode(const Graph::NodeType &node,
                           const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                           std::shared_ptr<Graph> graph);
 
-Status PartitionForAllDevices(const size_t num_device, std::shared_ptr<Graph> graph);
+Status PartitionForAllDevices(const size_t num_device, const double device_memory, std::shared_ptr<Graph> graph);
 
 Graph::NodeType ApplyStrToTensor(Graph::NodeType Node);
 
@@ -50,7 +50,7 @@ void ApplyLastStrategy(const uint64_t node_index, std::shared_ptr<Graph> graph);
 
 void ApplyNextStrategy(const uint64_t node_index, std::shared_ptr<Graph> graph);
 
-Status DevicesMemoryControl(std::shared_ptr<Graph> graph);
+Status DevicesMemoryControl(const double device_memory, std::shared_ptr<Graph> graph);
 
 size_t GetDataTypeSize(const TensorType &type);
 }  // namespace parallel
diff --git a/mindspore/ccsrc/parallel/ops_info/operator_info.h b/mindspore/ccsrc/parallel/ops_info/operator_info.h
index 347da7e573..de95bd84ad 100644
--- a/mindspore/ccsrc/parallel/ops_info/operator_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/operator_info.h
@@ -150,14 +150,11 @@ class OperatorInfo {
   // needed by rec_parser
   void set_type(const std::string &type) { type_ = type; }
   const std::string &type() const { return type_; }
-  void set_cnode_name(const std::string &cnode_name) { cnode_name_ = cnode_name; }
-  const std::string &cnode_name() const { return cnode_name_; }
   const std::unordered_map<std::string, ValuePtr> &attrs() const { return attrs_; }
 
  protected:
   // needed by rec_parser
   std::string type_;
-  std::string cnode_name_;
   virtual Status CheckStrategy(const StrategyPtr &strategy) = 0;
   virtual Status InferTensorMap() = 0;
   virtual Status InferForwardCommunication() = 0;
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc
index 8a95232aa4..7d37bafe98 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc
@@ -935,7 +935,8 @@ Status ParallelStrategyRecSearch(const std::vector<AnfNodePtr> &all_nodes, const
   std::shared_ptr<Graph> graph = ParseGraph(ops, input_tensor_names);
 
   size_t num_device = g_device_manager->DeviceNum();
-  if (PartitionForAllDevices(num_device, graph) == SUCCESS) {
+  double device_memory = entire_costgraph->GetDeviceMemory();
+  if (PartitionForAllDevices(num_device, device_memory, graph) == SUCCESS) {
     MS_LOG(INFO) << "Partition Success With " << num_device << " devices.";
   } else {
     MS_LOG(ERROR) << "PartitionForAllDevices failed.";
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index d1390db899..08f4c56d9f 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -2263,13 +2263,10 @@ std::vector<std::string> ExtractInputsTensorName(const CNodePtr &node) {
   std::vector<AnfNodePtr> all_inputs = node->inputs();
   std::vector<AnfNodePtr> node_inputs{all_inputs.begin() + 1, all_inputs.end()};
 
+  std::string node_id = node->UniqueId();
+  name_inputs.push_back(node_id);
   for (auto &input : node_inputs) {
-    std::string name;
-    if (IsValueNode<Tensor>(input) || input->isa<CNode>() || input->isa<Parameter>()) {
-      name = input->ToString();
-    } else {
-      continue;
-    }
+    std::string name = input->UniqueId();
     name_inputs.push_back(name);
   }
 
diff --git a/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc b/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
index 509b00f428..1eb65b468f 100644
--- a/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
+++ b/tests/ut/cpp/parallel/auto_parallel/rec_partition_test.cc
@@ -227,19 +227,22 @@ TEST_F(TestPartition, test_PartitionNode) {
 
 TEST_F(TestPartition, test_PartitionForAllDevices) {
   std::shared_ptr<Graph> graph = MakeMatMulData(9);
-  ASSERT_EQ(PartitionForAllDevices(1024, graph), SUCCESS);
+  double device_memory = 1024.0 * 1024.0 * 1024.0 * 16.0;
+  ASSERT_EQ(PartitionForAllDevices(1024, device_memory, graph), SUCCESS);
 }
 
 TEST_F(TestPartition, test_PartitionForAllDevices2) {
   std::shared_ptr<Graph> graph = MakeMatMulData(9);
-  ASSERT_EQ(PartitionForAllDevices(2, graph), SUCCESS);
+  double device_memory = 1024.0 * 1024.0 * 1024.0 * 16.0;
+  ASSERT_EQ(PartitionForAllDevices(2, device_memory, graph), SUCCESS);
 }
 
 // Negative case: parition on 0 device
 TEST_F(TestPartition, test_PartitionForAllDevices0) {
   std::shared_ptr<Graph> graph = MakeMatMulData(9);
+  double device_memory = 1024.0 * 1024.0 * 1024.0 * 16.0;
   // Throw Exception "Number of devices can't be 0"
-  EXPECT_ANY_THROW(PartitionForAllDevices(0, graph));
+  EXPECT_ANY_THROW(PartitionForAllDevices(0, device_memory, graph));
 }
 
 TEST_F(TestPartition, test_ApplyStrToTensor) {

From 0b6b5e5123f9c9f73284c25fe1027bb6e434056a Mon Sep 17 00:00:00 2001
From: liubuyu <liubuyu1@huawei.com>
Date: Thu, 23 Apr 2020 17:03:42 +0800
Subject: [PATCH 028/242] fix codedex warning

---
 mindspore/ccsrc/common/trans.cc               | 44 ++++++++-----------
 mindspore/ccsrc/common/trans.h                | 10 ++---
 .../device/ascend/ascend_device_address.cc    | 18 ++++----
 3 files changed, 31 insertions(+), 41 deletions(-)

diff --git a/mindspore/ccsrc/common/trans.cc b/mindspore/ccsrc/common/trans.cc
index 1174be1f48..3e8d922971 100644
--- a/mindspore/ccsrc/common/trans.cc
+++ b/mindspore/ccsrc/common/trans.cc
@@ -101,13 +101,20 @@ const std::map<std::pair<TypeId, TypeId>, DataTypeTransMode> mode_map{
   {std::pair<TypeId, TypeId>(kNumberTypeInt64, kNumberTypeInt32), FROM_INT64_TO_INT32},
   {std::pair<TypeId, TypeId>(kNumberTypeUInt16, kNumberTypeInt32), FROM_UINT16_TO_INT32}};
 
-template <typename SrcT, typename DstT>
-void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) {
-  auto src_id = TypeIdSize(args.src_type);
-  auto dst_id = TypeIdSize(args.dst_type);
-  if (args.src_size / src_id != args.src_shape_size || args.dst_size / dst_id != args.dst_shape_size) {
+void CheckMemSize(const TypeIdArgs &args) {
+  auto src_type_size = TypeIdSize(args.host_data_type);
+  auto dst_type_size = TypeIdSize(args.device_data_type);
+  if (src_type_size < 1 || dst_type_size < 1) {
+    MS_LOG(EXCEPTION) << "Invalid src or dst data type.";
+  }
+  if (args.data_size / src_type_size != args.host_shape_size) {
     MS_LOG(EXCEPTION) << "Invalid src or dst data size.";
   }
+}
+
+template <typename SrcT, typename DstT>
+void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size) {
+  CheckMemSize(args);
   for (size_t idx = 0; idx != data_size; idx++) {
     SrcT src_data = static_cast<const SrcT *>(args.data)[idx];
     static_cast<DstT *>(dst)[idx] = static_cast<DstT>(src_data);
@@ -116,11 +123,7 @@ void TransDataSrc2Dst(const TypeIdArgs &args, void *dst, const size_t data_size)
 
 template <typename SrcT>
 void TransDataSrc2Fp16(const TypeIdArgs &args, void *dst, const size_t data_size) {
-  auto src_id = TypeIdSize(args.src_type);
-  auto dst_id = TypeIdSize(args.dst_type);
-  if (args.src_size / src_id != args.src_shape_size || args.dst_size / dst_id != args.dst_shape_size) {
-    MS_LOG(EXCEPTION) << "Invalid src or dst data size.";
-  }
+  CheckMemSize(args);
   auto src_data = static_cast<const SrcT *>(args.data);
   auto half_data = static_cast<Eigen::half *>(dst);
   for (size_t i = 0; i < data_size; i++) {
@@ -394,27 +397,18 @@ bool CheckArgs(const FormatArgs &args, size_t *size, size_t *total_size) {
 }
 
 bool TransDataType(const TypeIdArgs &args, void *result) {
-  MS_LOG(DEBUG) << "Begin trans datatype from " << TypeIdLabel(args.src_type) << " to " << TypeIdLabel(args.dst_type);
+  MS_LOG(DEBUG) << "Begin trans datatype from " << TypeIdLabel(args.host_data_type) << " to "
+                << TypeIdLabel(args.device_data_type);
   MS_EXCEPTION_IF_NULL(result);
-  std::pair<TypeId, TypeId> type_info(args.src_type, args.dst_type);
+  std::pair<TypeId, TypeId> type_info(args.host_data_type, args.device_data_type);
   auto iter = mode_map.find(type_info);
   if (iter == mode_map.end()) {
-    MS_LOG(ERROR) << "Unsupported datatype trans. src_type :" << TypeIdLabel(args.src_type)
-                  << ", dst_type:" << TypeIdLabel(args.dst_type);
+    MS_LOG(ERROR) << "Unsupported datatype trans. src_type :" << TypeIdLabel(args.host_data_type)
+                  << ", dst_type:" << TypeIdLabel(args.device_data_type);
     return false;
   }
   auto trans_mode = iter->second;
-  auto src_id = TypeIdSize(args.src_type);
-  auto dst_id = TypeIdSize(args.dst_type);
-  if (src_id < 1 || dst_id < 1) {
-    MS_LOG(ERROR) << "Invalid src or dst data type.";
-    return false;
-  }
-  if (args.src_size / src_id != args.src_shape_size || args.dst_size / dst_id != args.dst_shape_size) {
-    MS_LOG(ERROR) << "Invalid src or dst data size.";
-    return false;
-  }
-  if (!CastKernel(args, result, args.dst_shape_size, trans_mode)) {
+  if (!CastKernel(args, result, args.host_shape_size, trans_mode)) {
     MS_LOG(ERROR) << "Failed to trans datatype..";
     return false;
   }
diff --git a/mindspore/ccsrc/common/trans.h b/mindspore/ccsrc/common/trans.h
index e6e81ed359..0593466c38 100644
--- a/mindspore/ccsrc/common/trans.h
+++ b/mindspore/ccsrc/common/trans.h
@@ -31,12 +31,10 @@ namespace mindspore {
 namespace trans {
 struct TypeIdArgs {
   const void *data;
-  size_t src_size;
-  size_t dst_size;
-  TypeId src_type;
-  TypeId dst_type;
-  size_t src_shape_size;
-  size_t dst_shape_size;
+  size_t host_shape_size;  // Multiply each dimension elements. [a, b, c, d] => a*b*c*d
+  TypeId host_data_type;
+  TypeId device_data_type;
+  size_t data_size;
 };
 
 struct FormatArgs {
diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/device/ascend/ascend_device_address.cc
index df49400341..1f452ce9e2 100644
--- a/mindspore/ccsrc/device/ascend/ascend_device_address.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_device_address.cc
@@ -104,10 +104,10 @@ bool AscendDeviceAddress::SyncDeviceToHost(const std::vector<int> &shape, size_t
     } else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) {
       sync_ok = SyncDeviceToHostAndFloatToFloat64(host_ptr, size, ptr_, size_);
     } else {
-      auto host_size = trans::ShapeSize(host_shape);
+      auto shape_size = trans::ShapeSize(host_shape);
       auto host = std::vector<uint8_t>(size_);
       SyncMemory(host.data(), ptr_, size_, RT_MEMCPY_DEVICE_TO_HOST);
-      const trans::TypeIdArgs type_args{host.data(), size_, size, type_id_, type, host_size, host_size};
+      const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type, size};
       sync_ok = trans::TransDataType(type_args, host_ptr);
       if (!sync_ok) {
         MS_LOG(ERROR) << "trans data type failed.";
@@ -156,9 +156,8 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormat(const std::vector<int
       MS_LOG(ERROR) << "Trans format failed.";
       return false;
     }
-    auto host_size = trans::ShapeSize(host_shape);
-    auto device_size = trans::ShapeSize(device_shape);
-    const trans::TypeIdArgs type_args{host.data(), size_, size, type_id_, type, device_size, host_size};
+    auto shape_size = trans::ShapeSize(host_shape);
+    const trans::TypeIdArgs type_args{host.data(), shape_size, type_id_, type, size};
     sync_ok = trans::TransDataType(type_args, host_ptr);
     if (!sync_ok) {
       MS_LOG(ERROR) << "Trans format failed.";
@@ -193,8 +192,8 @@ bool AscendDeviceAddress::SyncHostToDevice(const std::vector<int> &shape, size_t
     } else if (type_id_ == kNumberTypeFloat32 && type == kNumberTypeFloat64) {
       sync_ok = Float64ToFloatAndSyncHostToDevice(ptr_, size_, host_ptr, size);
     } else {
-      auto host_size = trans::ShapeSize(host_shape);
-      const trans::TypeIdArgs type_args{host_ptr, size, size_, type, type_id_, host_size, host_size};
+      auto shape_size = trans::ShapeSize(host_shape);
+      const trans::TypeIdArgs type_args{host_ptr, shape_size, type, type_id_, size};
       auto host_tmp = std::vector<uint8_t>(size_);
       sync_ok = trans::TransDataType(type_args, host_tmp.data());
       if (!sync_ok) {
@@ -235,9 +234,8 @@ bool AscendDeviceAddress::ConvertFormatAndSyncHostToDevice(const std::vector<int
     device_shape = trans::TransShapeToDevice(host_shape, format_);
   }
   if (type_id_ != type) {
-    auto host_size = trans::ShapeSize(host_shape);
-    auto device_size = trans::ShapeSize(device_shape);
-    const trans::TypeIdArgs type_args{host_ptr, size, size_, type, type_id_, host_size, device_size};
+    auto shape_size = trans::ShapeSize(host_shape);
+    const trans::TypeIdArgs type_args{host_ptr, shape_size, type, type_id_, size};
     auto host_tmp = std::vector<uint8_t>(size_);
     sync_ok = trans::TransDataType(type_args, host_tmp.data());
     if (!sync_ok) {

From 8492f3dd7f6665c66ba05662052e1c3bcf8ad1b7 Mon Sep 17 00:00:00 2001
From: simson <526422051@qq.com>
Date: Thu, 23 Apr 2020 18:49:19 +0800
Subject: [PATCH 029/242] modify log level of context

---
 mindspore/ccsrc/utils/context/ms_context.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc
index bee5875f60..569285f5df 100644
--- a/mindspore/ccsrc/utils/context/ms_context.cc
+++ b/mindspore/ccsrc/utils/context/ms_context.cc
@@ -78,7 +78,7 @@ MsContext::MsContext(const std::string& policy, const std::string& target) {
   enable_dynamic_mem_pool_ = true;
   graph_memory_max_size_ = "0";
   variable_memory_max_size_ = "0";
-  MS_LOG(INFO) << "Create context with backend policy:" << policy << ", device target:" << target << ".";
+  MS_LOG(DEBUG) << "Create context with backend policy:" << policy << ", device target:" << target << ".";
 }
 
 std::shared_ptr<MsContext> MsContext::GetInstance() {

From b36094e3270fed6aaa7d1205f4992fee543cad4a Mon Sep 17 00:00:00 2001
From: caojian05 <caojian5@huawei.com>
Date: Thu, 23 Apr 2020 18:52:58 +0800
Subject: [PATCH 030/242] remove the parameter batch_size of VGG16, for we can
 use flatten instead of reshape.

---
 example/vgg16_cifar10/eval.py  |  2 +-
 example/vgg16_cifar10/train.py |  2 +-
 mindspore/model_zoo/vgg.py     | 13 +++++--------
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/example/vgg16_cifar10/eval.py b/example/vgg16_cifar10/eval.py
index b034183373..ca2bbd12eb 100644
--- a/example/vgg16_cifar10/eval.py
+++ b/example/vgg16_cifar10/eval.py
@@ -39,7 +39,7 @@ if __name__ == '__main__':
     context.set_context(device_id=args_opt.device_id)
     context.set_context(enable_mem_reuse=True, enable_hccl=False)
 
-    net = vgg16(batch_size=cfg.batch_size, num_classes=cfg.num_classes)
+    net = vgg16(num_classes=cfg.num_classes)
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum,
                    weight_decay=cfg.weight_decay)
     loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
diff --git a/example/vgg16_cifar10/train.py b/example/vgg16_cifar10/train.py
index 32cd344d50..a4aa587c3d 100644
--- a/example/vgg16_cifar10/train.py
+++ b/example/vgg16_cifar10/train.py
@@ -64,7 +64,7 @@ if __name__ == '__main__':
     context.set_context(device_id=args_opt.device_id)
     context.set_context(enable_mem_reuse=True, enable_hccl=False)
 
-    net = vgg16(batch_size=cfg.batch_size, num_classes=cfg.num_classes)
+    net = vgg16(num_classes=cfg.num_classes)
     lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=50000 // cfg.batch_size)
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay)
     loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
diff --git a/mindspore/model_zoo/vgg.py b/mindspore/model_zoo/vgg.py
index 6fcd075cc8..f3532fab13 100644
--- a/mindspore/model_zoo/vgg.py
+++ b/mindspore/model_zoo/vgg.py
@@ -14,7 +14,6 @@
 # ============================================================================
 """VGG."""
 import mindspore.nn as nn
-from mindspore.ops import operations as P
 from mindspore.common.initializer import initializer
 import mindspore.common.dtype as mstype
 
@@ -63,8 +62,7 @@ class Vgg(nn.Cell):
     def __init__(self, base, num_classes=1000, batch_norm=False, batch_size=1):
         super(Vgg, self).__init__()
         self.layers = _make_layer(base, batch_norm=batch_norm)
-        self.reshape = P.Reshape()
-        self.shp = (batch_size, -1)
+        self.flatten = nn.Flatten()
         self.classifier = nn.SequentialCell([
             nn.Dense(512 * 7 * 7, 4096),
             nn.ReLU(),
@@ -74,7 +72,7 @@ class Vgg(nn.Cell):
 
     def construct(self, x):
         x = self.layers(x)
-        x = self.reshape(x, self.shp)
+        x = self.flatten(x)
         x = self.classifier(x)
         return x
 
@@ -87,20 +85,19 @@ cfg = {
 }
 
 
-def vgg16(batch_size=1, num_classes=1000):
+def vgg16(num_classes=1000):
     """
     Get Vgg16 neural network with batch normalization.
 
     Args:
-        batch_size (int): Batch size. Default: 1.
         num_classes (int): Class numbers. Default: 1000.
 
     Returns:
         Cell, cell instance of Vgg16 neural network with batch normalization.
 
     Examples:
-        >>> vgg16(batch_size=1, num_classes=1000)
+        >>> vgg16(num_classes=1000)
     """
 
-    net = Vgg(cfg['16'], num_classes=num_classes, batch_norm=True, batch_size=batch_size)
+    net = Vgg(cfg['16'], num_classes=num_classes, batch_norm=True)
     return net

From 39945d0f79f330377ead94e787844aa8008466ef Mon Sep 17 00:00:00 2001
From: YuJianfeng <yujianfeng5@huawei.com>
Date: Mon, 20 Apr 2020 17:32:58 +0800
Subject: [PATCH 031/242] Add AllGather fusion pass

---
 .../ascend/ascend_backend_optimization.cc     |   3 +-
 .../pre_activate/pass/allreduce_fusion.h      |  49 -------
 ...e_fusion.cc => communication_op_fusion.cc} | 122 +++++++++++-------
 .../pass/communication_op_fusion.h            |  67 ++++++++++
 mindspore/ccsrc/session/gpu_session.cc        |   2 +-
 mindspore/ccsrc/utils/utils.h                 |   3 +
 .../common/ir_fusion/allreduce_fusion_test.cc |   2 +-
 7 files changed, 146 insertions(+), 102 deletions(-)
 delete mode 100644 mindspore/ccsrc/pre_activate/pass/allreduce_fusion.h
 rename mindspore/ccsrc/pre_activate/pass/{allreduce_fusion.cc => communication_op_fusion.cc} (62%)
 create mode 100644 mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h

diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
index 496a9b276f..73863587fc 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@@ -21,7 +21,7 @@
 #include "pre_activate/ascend/ir_fission/bn_grad_split.h"
 #include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h"
 #include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h"
-#include "pre_activate/pass/allreduce_fusion.h"
+#include "pre_activate/pass/communication_op_fusion.h"
 #include "pre_activate/ascend/ir_fusion/square_sum_fusion.h"
 #include "pre_activate/ascend/ir_fusion/clip_by_norm_no_div_square_sum_fusion.h"
 #include "pre_activate/ascend/ir_fusion/lamb_update_with_lr_rule_fusion.h"
@@ -254,6 +254,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
   auto optimizer = std::make_shared<GraphOptimizer>();
   auto other_pm = std::make_shared<PassManager>("other_pm");
   other_pm->AddPass(std::make_shared<AllReduceFusion>());
+  other_pm->AddPass(std::make_shared<AllGatherFusion>());
   other_pm->AddPass(std::make_shared<ParameterTransOpFusion>());
   other_pm->AddPass(std::make_shared<BufferFusion>());
   other_pm->AddPass(std::make_shared<GetitemTuple>());
diff --git a/mindspore/ccsrc/pre_activate/pass/allreduce_fusion.h b/mindspore/ccsrc/pre_activate/pass/allreduce_fusion.h
deleted file mode 100644
index e443767e43..0000000000
--- a/mindspore/ccsrc/pre_activate/pass/allreduce_fusion.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ALLREDUCE_FUSION_H_
-#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ALLREDUCE_FUSION_H_
-#include <vector>
-
-#include "pre_activate/common/pass.h"
-#include "ir/func_graph.h"
-#include "ir/anf.h"
-
-namespace mindspore {
-namespace opt {
-struct AllReduceInfo_t {
-  std::vector<CNodePtr> allreduce_node;
-  std::vector<float> input_grad_size;
-  std::vector<float> input_grad_time;
-};
-
-class AllReduceFusion : public Pass {
- public:
-  explicit AllReduceFusion(size_t groups = 1) : Pass("all_reduce_fusion"), groups_(groups) {}
-  ~AllReduceFusion() override = default;
-  bool Run(const FuncGraphPtr &graph) override;
-
- private:
-  bool DoFusion(const FuncGraphPtr &func_graph, const AllReduceInfo_t &allreduce_node_info, size_t segment_num,
-                const std::vector<size_t> &segment_index) const;
-  AnfNodePtr CreateFusedAllReduce(const FuncGraphPtr &func_graph, const AllReduceInfo_t &allreduce_node_info,
-                                  size_t start_index, size_t end_index) const;
-  bool GetSplitSegments(const AllReduceInfo_t &allreduce_node_info, size_t *segment_num,
-                        std::vector<size_t> *segment_index) const;
-  size_t groups_ = 1;
-};
-}  // namespace opt
-}  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_ALLREDUCE_FUSION_H_
diff --git a/mindspore/ccsrc/pre_activate/pass/allreduce_fusion.cc b/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc
similarity index 62%
rename from mindspore/ccsrc/pre_activate/pass/allreduce_fusion.cc
rename to mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc
index 70a8974eca..4bcd488f69 100644
--- a/mindspore/ccsrc/pre_activate/pass/allreduce_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.cc
@@ -13,14 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/pass/allreduce_fusion.h"
+#include "pre_activate/pass/communication_op_fusion.h"
 
 #include <vector>
-#include <string>
 #include <memory>
 #include <unordered_map>
 
-#include "utils/utils.h"
 #include "utils/graph_utils.h"
 #include "operator/ops.h"
 #include "device/kernel_info.h"
@@ -31,9 +29,12 @@
 namespace mindspore {
 namespace opt {
 namespace {
-kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const AllReduceInfo_t &allreduce_node_info, size_t start_index,
+constexpr auto kAttrDefaultGroup = "default_group";
+constexpr auto kAttrDefaultOp = "default_op";
+
+kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const CommunicationOpInfo &communication_op_info, size_t start_index,
                                                    size_t end_index) {
-  if (end_index >= allreduce_node_info.allreduce_node.size()) {
+  if (end_index >= communication_op_info.communication_op_nodes.size()) {
     MS_LOG(EXCEPTION) << "end index out of vector size";
   }
   std::vector<std::string> inputs_device_format;
@@ -43,7 +44,7 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const AllReduceInfo_t &allred
   std::vector<std::vector<size_t>> outputs_shape;
   kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
   for (size_t idx = start_index; idx <= end_index; ++idx) {
-    auto cnode = allreduce_node_info.allreduce_node[idx];
+    auto cnode = communication_op_info.communication_op_nodes[idx];
     MS_EXCEPTION_IF_NULL(cnode);
     for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); ++input_index) {
       inputs_device_format.push_back(AnfAlgo::GetInputFormat(cnode, input_index));
@@ -64,14 +65,38 @@ kernel::KernelBuildInfoPtr GenerateKernelBuildInfo(const AllReduceInfo_t &allred
   builder.SetOutputsDeviceType(outputs_device_type);
   return builder.Build();
 }
+
+std::string GetFusionGroupKey(const AnfNodePtr &node) {
+  auto primitive = AnfAlgo::GetCNodePrimitive(node);
+  MS_EXCEPTION_IF_NULL(primitive);
+  ValuePtr attr_fusion = primitive->GetAttr(kAttrFusion);
+  if (attr_fusion == nullptr) {
+    return "";
+  }
+  int fusion = GetValue<int>(attr_fusion);
+  if (fusion == 0) {
+    return "";
+  }
+  std::string group = kAttrDefaultGroup;
+  ValuePtr attr_group = primitive->GetAttr(kAttrGroup);
+  if (attr_group != nullptr) {
+    group = GetValue<std::string>(attr_group);
+  }
+  std::string op = kAttrDefaultOp;
+  ValuePtr attr_op = primitive->GetAttr(kAttrOp);
+  if (attr_op != nullptr) {
+    op = GetValue<std::string>(attr_op);
+  }
+  return group + op + std::to_string(fusion);
+}
 }  // namespace
 
-bool AllReduceFusion::GetSplitSegments(const AllReduceInfo_t &allreduce_node_info, size_t *segment_num,
-                                       std::vector<size_t> *segment_index) const {
+bool CommunicationOpFusion::GetSplitSegments(const CommunicationOpInfo &communication_op_info, size_t *segment_num,
+                                             std::vector<size_t> *segment_index) const {
   MS_EXCEPTION_IF_NULL(segment_num);
   MS_EXCEPTION_IF_NULL(segment_index);
-  size_t allreduce_node_size = allreduce_node_info.allreduce_node.size();
-  MS_LOG(INFO) << "graph all reduce node size " << allreduce_node_size;
+  size_t communication_op_node_size = communication_op_info.communication_op_nodes.size();
+  MS_LOG(INFO) << "graph " << op_name_ << " node size " << communication_op_node_size;
 
   auto parallel_context = parallel::ParallelContext::GetInstance();
   MS_EXCEPTION_IF_NULL(parallel_context);
@@ -82,30 +107,31 @@ bool AllReduceFusion::GetSplitSegments(const AllReduceInfo_t &allreduce_node_inf
     uint32_t last_index = 0;
     for (size_t i = 0; i < split_indices.size(); ++i) {
       uint32_t index = split_indices[i];
-      if (index <= last_index || index >= allreduce_node_size) {
-        MS_LOG(EXCEPTION) << "invalid allreduce split index " << i << " " << index;
+      if (index <= last_index || index >= communication_op_node_size) {
+        MS_LOG(EXCEPTION) << "invalid " << op_name_ << " split index " << i << " " << index;
       }
       segment_index->push_back(index);
       last_index = index;
       segments++;
     }
-    if (last_index != allreduce_node_size - 1) {
-      segment_index->push_back(allreduce_node_size - 1);
+    if (last_index != communication_op_node_size - 1) {
+      segment_index->push_back(communication_op_node_size - 1);
       segments++;
     }
   } else {
     segments = groups_;
     for (size_t i = 0; i < segments - 1; ++i) {
-      segment_index->push_back((i + 1) * (allreduce_node_size / segments) - 1);
+      segment_index->push_back((i + 1) * (communication_op_node_size / segments) - 1);
     }
-    segment_index->push_back(allreduce_node_size - 1);
+    segment_index->push_back(communication_op_node_size - 1);
   }
 
-  if (segments >= allreduce_node_size) {
-    MS_LOG(INFO) << "fusion not changed: segment_num=" << segments << ", allreduce_node_size=" << allreduce_node_size;
+  if (segments >= communication_op_node_size) {
+    MS_LOG(INFO) << "fusion not changed: segment_num=" << segments
+                 << ", communication_op_node_size=" << communication_op_node_size;
     return false;
   }
-  if (segment_index->at(segments - 1) != allreduce_node_size - 1) {
+  if (segment_index->at(segments - 1) != communication_op_node_size - 1) {
     MS_LOG(EXCEPTION) << "the last segment index is invalid.";
   }
   for (size_t i = 0; i < segments - 1; ++i) {
@@ -118,19 +144,19 @@ bool AllReduceFusion::GetSplitSegments(const AllReduceInfo_t &allreduce_node_inf
   return true;
 }
 
-AnfNodePtr AllReduceFusion::CreateFusedAllReduce(const FuncGraphPtr &func_graph,
-                                                 const AllReduceInfo_t &allreduce_node_info, size_t start_index,
-                                                 size_t end_index) const {
+AnfNodePtr CommunicationOpFusion::CreateFusedCommunicationOp(const FuncGraphPtr &func_graph,
+                                                             const CommunicationOpInfo &communication_op_info,
+                                                             size_t start_index, size_t end_index) const {
   MS_EXCEPTION_IF_NULL(func_graph);
-  auto prim = std::make_shared<Primitive>(kAllReduceOpName);
+  auto prim = std::make_shared<Primitive>(op_name_);
   MS_EXCEPTION_IF_NULL(prim);
   std::vector<AnfNodePtr> fusion_inputs = {NewValueNode(prim)};
   // get all inputs of current segment
-  if (end_index >= allreduce_node_info.allreduce_node.size()) {
+  if (end_index >= communication_op_info.communication_op_nodes.size()) {
     MS_LOG(EXCEPTION) << "end index out of vector size";
   }
   for (size_t idx = start_index; idx <= end_index; ++idx) {
-    auto cnode = allreduce_node_info.allreduce_node[idx];
+    auto cnode = communication_op_info.communication_op_nodes[idx];
     MS_EXCEPTION_IF_NULL(cnode);
     fusion_inputs.insert(fusion_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end());
   }
@@ -141,14 +167,14 @@ AnfNodePtr AllReduceFusion::CreateFusedAllReduce(const FuncGraphPtr &func_graph,
   fused_node->set_kernel_info(kernel_info);
   AbstractBasePtrList abstract_list;
   for (size_t idx = start_index; idx <= end_index; ++idx) {
-    auto cnode = allreduce_node_info.allreduce_node[idx];
+    auto cnode = communication_op_info.communication_op_nodes[idx];
     MS_EXCEPTION_IF_NULL(cnode);
     AnfAlgo::CopyNodeAttr("fusion", cnode, fused_node);
     AnfAlgo::CopyNodeAttr("op", cnode, fused_node);
     AnfAlgo::CopyNodeAttr("group", cnode, fused_node);
     abstract_list.push_back(cnode->abstract());
   }
-  auto kernel_build_info = GenerateKernelBuildInfo(allreduce_node_info, start_index, end_index);
+  auto kernel_build_info = GenerateKernelBuildInfo(communication_op_info, start_index, end_index);
   AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info, fused_node.get());
   auto abstract_tuple = std::make_shared<abstract::AbstractTuple>(abstract_list);
   MS_EXCEPTION_IF_NULL(abstract_tuple);
@@ -156,8 +182,8 @@ AnfNodePtr AllReduceFusion::CreateFusedAllReduce(const FuncGraphPtr &func_graph,
   return fused_node;
 }
 
-bool AllReduceFusion::DoFusion(const FuncGraphPtr &func_graph, const AllReduceInfo_t &allreduce_node_info,
-                               size_t segment_num, const std::vector<size_t> &segment_index) const {
+bool CommunicationOpFusion::DoFusion(const FuncGraphPtr &func_graph, const CommunicationOpInfo &communication_op_info,
+                                     size_t segment_num, const std::vector<size_t> &segment_index) const {
   MS_EXCEPTION_IF_NULL(func_graph);
   auto manager = func_graph->manager();
   MS_EXCEPTION_IF_NULL(manager);
@@ -169,12 +195,13 @@ bool AllReduceFusion::DoFusion(const FuncGraphPtr &func_graph, const AllReduceIn
       start_index = end_index + 1;
       continue;
     }
-    AnfNodePtr new_allreduce = CreateFusedAllReduce(func_graph, allreduce_node_info, start_index, end_index);
-    // replace old allreduce with new allreduce
+    AnfNodePtr new_communication_op =
+      CreateFusedCommunicationOp(func_graph, communication_op_info, start_index, end_index);
+    // replace old communication op with new communication op
     for (auto idx = start_index; idx <= end_index; ++idx) {
       std::vector<AnfNodePtr> tuple_getitem_input;
       tuple_getitem_input.push_back(NewValueNode(prim::kPrimTupleGetItem));
-      tuple_getitem_input.push_back(new_allreduce);
+      tuple_getitem_input.push_back(new_communication_op);
       auto index = NewValueNode(SizeToInt(idx - start_index));
       MS_EXCEPTION_IF_NULL(index);
       auto imm = std::make_shared<Int32Imm>(idx - start_index);
@@ -185,10 +212,10 @@ bool AllReduceFusion::DoFusion(const FuncGraphPtr &func_graph, const AllReduceIn
       tuple_getitem_input.push_back(index);
       AnfNodePtr tuple_getitem = func_graph->NewCNode(tuple_getitem_input);
       MS_EXCEPTION_IF_NULL(tuple_getitem);
-      auto allreduce_node_item = allreduce_node_info.allreduce_node.at(idx);
-      MS_EXCEPTION_IF_NULL(allreduce_node_item);
-      tuple_getitem->set_abstract(allreduce_node_item->abstract());
-      if (!manager->Replace(allreduce_node_item, tuple_getitem)) {
+      auto communication_op_node_item = communication_op_info.communication_op_nodes.at(idx);
+      MS_EXCEPTION_IF_NULL(communication_op_node_item);
+      tuple_getitem->set_abstract(communication_op_node_item->abstract());
+      if (!manager->Replace(communication_op_node_item, tuple_getitem)) {
         MS_LOG(EXCEPTION) << "manager replace node failed";
       }
     }
@@ -198,29 +225,24 @@ bool AllReduceFusion::DoFusion(const FuncGraphPtr &func_graph, const AllReduceIn
   return changed;
 }
 
-bool AllReduceFusion::Run(const FuncGraphPtr &func_graph) {
+bool CommunicationOpFusion::Run(const FuncGraphPtr &func_graph) {
   MS_EXCEPTION_IF_NULL(func_graph);
   const float input_grad_size_num = 0.0;
   const float input_grad_time_num = 0.0;
   // divide candidate fusion groups with same (group,op,fusion) attrs, fusion==0 means not fusion
-  std::unordered_map<std::string, AllReduceInfo_t> candidate_groups;
+  std::unordered_map<std::string, CommunicationOpInfo> candidate_groups;
   std::vector<AnfNodePtr> node_list = TopoSort(func_graph->get_return());
   for (auto &node : node_list) {
-    if (node != nullptr && node->isa<CNode>() && AnfAlgo::GetCNodeName(node) == kAllReduceOpName) {
-      auto primitive = AnfAlgo::GetCNodePrimitive(node);
-      MS_EXCEPTION_IF_NULL(primitive);
-      int fusion = GetValue<int>(primitive->GetAttr("fusion"));
-      if (fusion == 0) {
+    if (node != nullptr && node->isa<CNode>() && AnfAlgo::GetCNodeName(node) == op_name_) {
+      std::string key = GetFusionGroupKey(node);
+      if (key.empty()) {
         continue;
       }
-      std::string group = GetValue<std::string>(primitive->GetAttr("group"));
-      std::string op = GetValue<std::string>(primitive->GetAttr("op"));
-      std::string key = group + op + std::to_string(fusion);
       if (candidate_groups.find(key) == candidate_groups.end()) {
-        AllReduceInfo_t allreduce_node_info;
-        candidate_groups[key] = allreduce_node_info;
+        CommunicationOpInfo communication_op_info;
+        candidate_groups[key] = communication_op_info;
       }
-      candidate_groups[key].allreduce_node.push_back(node->cast<CNodePtr>());
+      candidate_groups[key].communication_op_nodes.push_back(node->cast<CNodePtr>());
       candidate_groups[key].input_grad_size.push_back(input_grad_size_num);
       candidate_groups[key].input_grad_time.push_back(input_grad_time_num);
     }
@@ -228,7 +250,7 @@ bool AllReduceFusion::Run(const FuncGraphPtr &func_graph) {
   // split candidate group to segments according to _group class member
   bool changed = false;
   for (auto &it : candidate_groups) {
-    if (it.second.allreduce_node.size() <= 1) {
+    if (it.second.communication_op_nodes.size() <= 1) {
       continue;
     }
     size_t segment_num = 0;
diff --git a/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h b/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h
new file mode 100644
index 0000000000..af8b557d5f
--- /dev/null
+++ b/mindspore/ccsrc/pre_activate/pass/communication_op_fusion.h
@@ -0,0 +1,67 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_COMMUNICATION_OP_FUSION_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_COMMUNICATION_OP_FUSION_H_
+#include <utility>
+#include <vector>
+#include <string>
+
+#include "pre_activate/common/pass.h"
+#include "ir/func_graph.h"
+#include "ir/anf.h"
+#include "utils/utils.h"
+
+namespace mindspore {
+namespace opt {
+struct CommunicationOpInfo {
+  std::vector<CNodePtr> communication_op_nodes;
+  std::vector<float> input_grad_size;
+  std::vector<float> input_grad_time;
+};
+
+class CommunicationOpFusion : public Pass {
+ public:
+  explicit CommunicationOpFusion(const std::string &name, std::string op_name, size_t groups = 1)
+      : Pass(name), op_name_(std::move(op_name)), groups_(groups) {}
+  ~CommunicationOpFusion() override = default;
+  bool Run(const FuncGraphPtr &graph) override;
+
+ private:
+  bool DoFusion(const FuncGraphPtr &func_graph, const CommunicationOpInfo &communication_op_info, size_t segment_num,
+                const std::vector<size_t> &segment_index) const;
+  AnfNodePtr CreateFusedCommunicationOp(const FuncGraphPtr &func_graph,
+                                        const CommunicationOpInfo &communication_op_info, size_t start_index,
+                                        size_t end_index) const;
+  bool GetSplitSegments(const CommunicationOpInfo &communication_op_info, size_t *segment_num,
+                        std::vector<size_t> *segment_index) const;
+  std::string op_name_;
+  size_t groups_ = 1;
+};
+
+class AllReduceFusion : public CommunicationOpFusion {
+ public:
+  explicit AllReduceFusion(size_t groups = 1) : CommunicationOpFusion("all_reduce_fusion", kAllReduceOpName, groups) {}
+  ~AllReduceFusion() override = default;
+};
+
+class AllGatherFusion : public CommunicationOpFusion {
+ public:
+  explicit AllGatherFusion(size_t groups = 1) : CommunicationOpFusion("all_gather_fusion", kAllGatherOpName, groups) {}
+  ~AllGatherFusion() override = default;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_PASS_COMMUNICATION_OP_FUSION_H_
diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc
index f5e8c44231..4a9506913c 100644
--- a/mindspore/ccsrc/session/gpu_session.cc
+++ b/mindspore/ccsrc/session/gpu_session.cc
@@ -20,7 +20,7 @@
 #include "device/gpu/gpu_stream_assign.h"
 #include "pre_activate/common/optimizer.h"
 #include "pre_activate/common/pass_manager.h"
-#include "pre_activate/pass/allreduce_fusion.h"
+#include "pre_activate/pass/communication_op_fusion.h"
 #include "device/kernel_runtime_manager.h"
 #include "predict/predict.h"
 #include "common/utils.h"
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index f05eda69bf..1d63cfbab1 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -154,6 +154,9 @@ constexpr auto kAttrOutputUsedNum = "output_used_num";
 constexpr auto kAttrHasBias = "has_bias";
 constexpr auto kAttrN = "n";
 constexpr auto kAttrLabelForInsertStreamActive = "label_for_insert_stream_active";
+constexpr auto kAttrFusion = "fusion";
+constexpr auto kAttrGroup = "group";
+constexpr auto kAttrOp = "op";
 
 // attr value
 constexpr auto kValueTargetSwitch = "target_switch";
diff --git a/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc b/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc
index d5f2fa636d..7f3b9d4c9d 100644
--- a/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/common/ir_fusion/allreduce_fusion_test.cc
@@ -20,7 +20,7 @@
 #include "ir/manager.h"
 #include "debug/anf_ir_dump.h"
 #include "session/anf_runtime_algorithm.h"
-#include "pre_activate/pass/allreduce_fusion.h"
+#include "pre_activate/pass/communication_op_fusion.h"
 #include "pre_activate/common/optimizer.h"
 #include "device/kernel_info.h"
 #include "pre_activate/common/pass_manager.h"

From aec761c143ca3ba3c31d8bca476bbecc97deb984 Mon Sep 17 00:00:00 2001
From: lvliang <lvliang18@huawei.com>
Date: Thu, 23 Apr 2020 19:09:54 +0800
Subject: [PATCH 032/242] pynative-clean-reviewbot-warning

---
 mindspore/ccsrc/pynative/base.h                 |  2 --
 mindspore/ccsrc/pynative/pynative_execute_ge.cc |  2 --
 mindspore/ccsrc/session/session_basic.cc        | 11 ++++-------
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/mindspore/ccsrc/pynative/base.h b/mindspore/ccsrc/pynative/base.h
index 37ff000b04..fc143da3c1 100644
--- a/mindspore/ccsrc/pynative/base.h
+++ b/mindspore/ccsrc/pynative/base.h
@@ -31,7 +31,6 @@
 
 namespace mindspore {
 namespace pynative {
-
 namespace py = pybind11;
 
 enum PynativeStatusCode {
@@ -61,7 +60,6 @@ using OpExecInfoPtr = std::shared_ptr<OpExecInfo>;
 OpExecInfoPtr GenerateOpExecInfo(const py::args &args);
 
 const std::set<std::string> ignore_infer_prim = {"partial", "make_ref"};
-
 }  // namespace pynative
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/pynative/pynative_execute_ge.cc b/mindspore/ccsrc/pynative/pynative_execute_ge.cc
index 0bf2a391f9..7357bdd710 100644
--- a/mindspore/ccsrc/pynative/pynative_execute_ge.cc
+++ b/mindspore/ccsrc/pynative/pynative_execute_ge.cc
@@ -33,7 +33,6 @@ const char SINGLE_OP_GRAPH[] = "single_op_graph";
 
 namespace mindspore {
 namespace pynative {
-
 using MeTensor = mindspore::tensor::Tensor;
 using MeTensorPtr = mindspore::tensor::TensorPtr;
 using GeOperator = ge::Operator;
@@ -307,5 +306,4 @@ py::object RunOpInGE(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat
   return std::move(result);
 }
 }  // namespace pynative
-
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc
index 0ef0ad97ea..b2daa07d71 100755
--- a/mindspore/ccsrc/session/session_basic.cc
+++ b/mindspore/ccsrc/session/session_basic.cc
@@ -226,8 +226,7 @@ void ConvertValueTupleToTensor(const py::object &input_object, std::vector<tenso
   }
   auto value_tuple = input_value->cast<ValueTuplePtr>();
   MS_EXCEPTION_IF_NULL(value_tuple);
-  tensor::TensorPtr tensor_ptr = nullptr;
-  tensor_ptr = opt::CreateTupleTensor(value_tuple);
+  tensor::TensorPtr tensor_ptr = opt::CreateTupleTensor(value_tuple);
   MS_EXCEPTION_IF_NULL(tensor_ptr);
   input_tensor->push_back(tensor_ptr);
 }
@@ -583,12 +582,9 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
   MS_EXCEPTION_IF_NULL(kernel_graph);
   auto input_nodes = kernel_graph->inputs();
   if ((inputs.size() + input_ctrl_size) - 1 != input_nodes.size()) {
-    MS_LOG(EXCEPTION) << "tensor input size:" << inputs.size()
-                      << " is not equal graph inputs size:" << input_nodes.size()
+    MS_LOG(EXCEPTION) << "tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
                       << ", input_ctrl_size:" << input_ctrl_size;
   }
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
   for (size_t i = 0; i < inputs.size(); ++i) {
     auto tensor = inputs[i];
     MS_EXCEPTION_IF_NULL(tensor);
@@ -598,7 +594,8 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
       auto pk_node = input_node->cast<ParameterPtr>();
       auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
       bool need_sync = false;
-      if (ms_context->enable_pynative_infer()) {
+      MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
+      if (MsContext::GetInstance()->enable_pynative_infer()) {
         if (tensor->device_address().get() == nullptr || tensor->device_address() != device_address) {
           need_sync = true;
         }

From ec043fcd5643396fed4d61e6290e1be0336aebf6 Mon Sep 17 00:00:00 2001
From: Xiaoda Zhang <zhangxiaoda@huawei.com>
Date: Thu, 23 Apr 2020 19:12:47 +0800
Subject: [PATCH 033/242] fix the codex and bot warnings

---
 mindspore/ccsrc/parallel/device_matrix.h                       | 1 -
 mindspore/ccsrc/parallel/step_parallel.cc                      | 1 -
 .../parallel/tensor_layout/redistribution_operator_infer.h     | 3 ++-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/mindspore/ccsrc/parallel/device_matrix.h b/mindspore/ccsrc/parallel/device_matrix.h
index 236a7fad08..295bf33836 100644
--- a/mindspore/ccsrc/parallel/device_matrix.h
+++ b/mindspore/ccsrc/parallel/device_matrix.h
@@ -26,7 +26,6 @@
 
 namespace mindspore {
 namespace parallel {
-
 using RankList = std::vector<int32_t>;
 using Shape = std::vector<int32_t>;
 
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index c24c14abf6..7731c68ca5 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -1620,7 +1620,6 @@ CNodePtr FindLossCNode(const FuncGraphPtr &func_graph) {
   auto pre_cnode = pre_node->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(pre_cnode);
   auto current_prim = GetValueNode<PrimitivePtr>(pre_cnode->input(0));
-
   // return -> cast
   if (current_prim->name() == CAST && pre_cnode->operator_info() == nullptr) {
     pre_cnode = pre_cnode->input(1)->cast<CNodePtr>();
diff --git a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h b/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h
index a96097a1d3..37a8ac3d9e 100644
--- a/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h
+++ b/mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h
@@ -39,7 +39,8 @@ using OperatorList = std::vector<OperatorC>;
 class RedistributionOperatorInfer {
  public:
   const int NONE = -1;
-  explicit RedistributionOperatorInfer(bool construct_op_flag = true) : construct_op_flag_(construct_op_flag) {}
+  explicit RedistributionOperatorInfer(bool construct_op_flag = true)
+      : construct_op_flag_(construct_op_flag), is_cost_model_(false) {}
   Status Init(const TensorLayout &tensor_layout, const Map &out_tensor_map, RankList dev_list,
               bool is_cost_model = false);
   ~RedistributionOperatorInfer() = default;

From 10994a5e7df8a298f20fdb9dc06b91a99805af38 Mon Sep 17 00:00:00 2001
From: caojian05 <caojian5@huawei.com>
Date: Tue, 21 Apr 2020 23:07:24 +0800
Subject: [PATCH 034/242] add README file for vgg16

---
 example/vgg16_cifar10/README.md | 78 +++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 example/vgg16_cifar10/README.md

diff --git a/example/vgg16_cifar10/README.md b/example/vgg16_cifar10/README.md
new file mode 100644
index 0000000000..c324673dcc
--- /dev/null
+++ b/example/vgg16_cifar10/README.md
@@ -0,0 +1,78 @@
+# VGG16 Example
+
+## Description
+
+This example is for VGG16 model training and evaluation.
+
+## Requirements
+
+- Install [MindSpore](https://www.mindspore.cn/install/en).
+
+- Download the dataset [CIFAR-10](http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz).
+
+> Unzip the CIFAR-10 dataset to any path you want and the folder structure should be as follows:
+> ```
+> .
+> ├── cifar-10-batches-bin  # train dataset
+> └── cifar-10-verify-bin   # infer dataset
+> ```
+
+## Running the Example
+
+### Training
+
+```
+python train.py --data_path=your_data_path --device_id=6 > out.train.log 2>&1 & 
+```
+The python command above will run in the background, you can view the results through the file `out.train.log`.
+
+After training, you'll get some checkpoint files under the script folder by default.
+
+You will get the loss value as following:
+```
+# grep "loss is " out.train.log
+epoch: 1 step: 781, loss is 2.093086
+epcoh: 2 step: 781, loss is 1.827582
+...
+```
+
+### Evaluation
+
+```
+python eval.py --data_path=your_data_path --device_id=6 --checkpoint_path=./train_vgg_cifar10-70-781.ckpt > out.eval.log 2>&1 & 
+```
+The above python command will run in the background, you can view the results through the file `out.eval.log`.
+
+You will get the accuracy as following:
+```
+# grep "result: " out.eval.log
+result: {'acc': 0.92}
+```
+
+
+## Usage:
+
+### Training
+```
+usage: train.py [--device_target TARGET][--data_path DATA_PATH]
+                [--device_id DEVICE_ID]
+
+parameters/options:
+  --device_target       the training backend type, default is Ascend.
+  --data_path           the storage path of dataset
+  --device_id           the device which used to train model.
+
+```
+
+### Evaluation
+
+```
+usage: eval.py [--device_target TARGET][--data_path DATA_PATH]
+                [--device_id DEVICE_ID][--checkpoint_path CKPT_PATH]
+
+parameters/options:
+  --device_target       the evaluation backend type, default is Ascend.
+  --data_path           the storage path of datasetd 
+  --device_id           the device which used to evaluate model.
+  --checkpoint_path     the checkpoint file path used to evaluate model.
+```
\ No newline at end of file

From 5b39a3ea6e83e669531355d0074331fc06c77f37 Mon Sep 17 00:00:00 2001
From: lvliang <lvliang18@huawei.com>
Date: Thu, 23 Apr 2020 20:40:00 +0800
Subject: [PATCH 035/242] fix-check-nullptr-by-calling-function

---
 mindspore/ccsrc/session/session_basic.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc
index b2daa07d71..40b69b75b3 100755
--- a/mindspore/ccsrc/session/session_basic.cc
+++ b/mindspore/ccsrc/session/session_basic.cc
@@ -572,7 +572,6 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con
 // run graph steps
 void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                                  const std::vector<tensor::TensorPtr> &inputs_const) const {
-  MS_EXCEPTION_IF_NULL(kernel_graph);
   std::vector<tensor::TensorPtr> inputs(inputs_const);
   size_t input_ctrl_size = 1;
   MS_EXCEPTION_IF_NULL(context_);
@@ -585,6 +584,8 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
     MS_LOG(EXCEPTION) << "tensor input:" << inputs.size() << " is not equal graph inputs:" << input_nodes.size()
                       << ", input_ctrl_size:" << input_ctrl_size;
   }
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
   for (size_t i = 0; i < inputs.size(); ++i) {
     auto tensor = inputs[i];
     MS_EXCEPTION_IF_NULL(tensor);
@@ -594,8 +595,7 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
       auto pk_node = input_node->cast<ParameterPtr>();
       auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
       bool need_sync = false;
-      MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
-      if (MsContext::GetInstance()->enable_pynative_infer()) {
+      if (ms_context->enable_pynative_infer()) {
         if (tensor->device_address().get() == nullptr || tensor->device_address() != device_address) {
           need_sync = true;
         }

From 0f0e8fe8744c0858050c596afca36588b196ad55 Mon Sep 17 00:00:00 2001
From: limingqi107 <limingqi@huawei.com>
Date: Thu, 23 Apr 2020 20:40:35 +0800
Subject: [PATCH 036/242] gpu dynamic memory pool can not reuse allReduce in
 multi-stream

---
 mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
index 5dd4facb25..b3b364b00c 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
@@ -322,6 +322,9 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
   MS_EXCEPTION_IF_NULL(mem_reuse_util_ptr);
   auto cnode = kernel->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(cnode);
+  if (AnfAlgo::GetCNodeName(kernel) == kAllReduceOpName) {
+    return;
+  }
   // Free the input of kernel by reference count.
   for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) {
     auto kernel_ref_count_ptr = mem_reuse_util_ptr->GetKernelInputRef(cnode, i);

From 065e9e6a4ee39701758027e7b6e1f63e1529a503 Mon Sep 17 00:00:00 2001
From: fary86 <fary.fanrui@huawei.com>
Date: Thu, 23 Apr 2020 21:13:12 +0800
Subject: [PATCH 037/242] Fix when not set GLOG_log_dir, create log file at
 /tmp

---
 mindspore/ccsrc/utils/log_adapter.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/utils/log_adapter.cc b/mindspore/ccsrc/utils/log_adapter.cc
index 0cd9b64a9b..deb4f3838e 100644
--- a/mindspore/ccsrc/utils/log_adapter.cc
+++ b/mindspore/ccsrc/utils/log_adapter.cc
@@ -252,9 +252,13 @@ void mindspore_log_init(void) {
   if (mindspore::GetEnv("GLOG_logfile_mode").empty()) {
     FLAGS_logfile_mode = 0640;
   }
+  std::string logtostderr = mindspore::GetEnv("GLOG_logtostderr");
   // default print log to screen
-  if (mindspore::GetEnv("GLOG_logtostderr").empty()) {
+  if (logtostderr.empty()) {
     FLAGS_logtostderr = true;
+  } else if (logtostderr == "0" && mindspore::GetEnv("GLOG_log_dir").empty()) {
+    FLAGS_logtostderr = true;
+    MS_LOG(WARNING) << "`GLOG_log_dir` is not set, output log to screen.";
   }
 #else
   mindspore::InitMsLogLevel();

From cd6e8d65427d452810f7c0424d11ef48379abce3 Mon Sep 17 00:00:00 2001
From: huanghui <huanghui44@huawei.com>
Date: Wed, 22 Apr 2020 14:43:25 +0800
Subject: [PATCH 038/242] fix ReluV2's mask shape in derelu fusion pass

---
 .../ascend/ascend_backend_optimization.cc     |  6 +++++-
 .../ir_fusion/confusion_mul_grad_fusion.cc    |  4 ++++
 .../ascend/ir_fusion/derelu_fusion.cc         | 20 +++++++++++++++++--
 mindspore/ccsrc/utils/utils.h                 |  2 +-
 .../confusion_mul_grad_fusion_test.cc         |  5 +++++
 .../gtest_input/pre_activate/derelu_fusion.py |  2 +-
 6 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
index a2d82525e9..4a2b5def1a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@@ -46,6 +46,8 @@
 #include "pre_activate/ascend/ir_fusion/mul_addn_fusion.h"
 #include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h"
 #include "pre_activate/ascend/ir_fusion/remove_reshape_pair.h"
+#include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h"
+#include "pre_activate/ascend/ir_fusion/derelu_fusion.h"
 #include "pre_activate/ascend/format_type/insert_trans_op.h"
 #include "pre_activate/pass/getitem_tuple.h"
 #include "pre_activate/pass/optimize_dependence.h"
@@ -94,8 +96,10 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) {
   ir_fusion_pm->AddPass(std::make_shared<MulAddNFusion>());
   ir_fusion_pm->AddPass(std::make_shared<MatmulBiasaddFusion>());
   ir_fusion_pm->AddPass(std::make_shared<AddnFission>());
-  ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>());
+  ir_fusion_pm->AddPass(std::make_shared<DereluFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<ConfusionMulGradFusion>());
   ir_fusion_pm->AddPass(std::make_shared<TransposeTransDataFusion>());
+  ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>());
 }
 }  // namespace
 
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
index 6b7f732a6a..47098379bf 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
@@ -18,6 +18,7 @@
 #include <memory>
 #include <vector>
 #include <algorithm>
+#include <string>
 #include "session/anf_runtime_algorithm.h"
 #include "ir/primitive.h"
 #include "utils/utils.h"
@@ -89,6 +90,9 @@ const AnfNodePtr ConfusionMulGradFusion::Process(const FuncGraphPtr &graph, cons
   auto reduce_sum = node->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(reduce_sum);
   auto mul1 = reduce_sum->input(1);
+  if (mul1->fullname_with_scope().find("bert/encoder") == std::string::npos) {
+    return nullptr;
+  }
   if (IsUsedByOthers(graph, mul1)) {
     MS_LOG(INFO) << "Mul1 is used by others, quit fusion!";
     return nullptr;
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc
index d5ea315de1..74b63a5b52 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/derelu_fusion.cc
@@ -50,9 +50,22 @@ CNodePtr CreateReluV2(const FuncGraphPtr &graph, const CNodePtr &relu) {
   MS_EXCEPTION_IF_NULL(new_node);
   new_node->set_scope(relu->scope());
 
-  // ReluV2's 2rd output is mask whose data type is uint8 and value is 0 or 1, so shape is an empty vector
+  // ReluV2's 2rd output is mask whose data type is uint8
   TypeId mask_dtype = kNumberTypeUInt8;
-  std::vector<size_t> mask_shape;
+  std::vector<size_t> mask_shape = AnfAlgo::GetOutputInferShape(relu, 0);
+  if (mask_shape.size() != 4) {
+    MS_LOG(WARNING) << "relu's infer shape size not equal 4";
+    return nullptr;
+  }
+  auto input_dtype = AnfAlgo::GetPrevNodeOutputInferDataType(relu, 0);
+  if (input_dtype == kNumberTypeUInt8 || input_dtype == kNumberTypeInt8) {
+    mask_shape[1] = (mask_shape[1] + 31) / 32;
+    mask_shape.push_back(4);
+  } else {
+    mask_shape[1] = (mask_shape[1] + 15) / 16;
+    mask_shape.push_back(2);
+  }
+
   auto types = {AnfAlgo::GetOutputInferDataType(relu, 0), mask_dtype};
   auto shapes = {AnfAlgo::GetOutputInferShape(relu, 0), mask_shape};
   AnfAlgo::SetOutputInferTypeAndShape(types, shapes, new_node.get());
@@ -91,6 +104,9 @@ const AnfNodePtr DereluFusion::Process(const FuncGraphPtr &graph, const AnfNodeP
   MS_EXCEPTION_IF_NULL(relu);
 
   auto relu_v2 = CreateReluV2(graph, relu);
+  if (relu_v2 == nullptr) {
+    return nullptr;
+  }
   std::vector<AnfNodePtr> relu_v2_node_outputs;
   CreateMultipleOutputsOfAnfNode(graph, relu_v2, kReluV2OutputNum, &relu_v2_node_outputs);
 
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index 6829a7e888..b967f0c355 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -120,7 +120,7 @@ constexpr auto kStreamActiveOpName = "StreamActive";
 constexpr auto kAssignAddOpName = "AssignAdd";
 constexpr auto kSendOpName = "Send";
 constexpr auto kRecvOpName = "Recv";
-constexpr auto kReluV2OpName = "ReluV2";
+constexpr auto kReluV2OpName = "ReLUV2";
 constexpr auto kReluGradV2OpName = "ReluGradV2";
 
 // attr key name
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
index e3bf09d2cb..4b5d38d375 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
@@ -32,6 +32,11 @@ class TestHWOptimizeConfusionMulGradFusion : public BackendCommon {
 TEST_F(TestHWOptimizeConfusionMulGradFusion, test_fusion) {
   FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_confusion_mul_grad_fusion", "before");
   EXPECT_NE(g, nullptr);
+  auto bert_scope = std::make_shared<Scope>("bert/encoder");
+  for (auto node : TopoSort(g->get_return())) {
+    node->set_scope(bert_scope);
+  }
+
   std::vector<int> shp{1, 1, 1, 1};
   auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
   AbstractBasePtrList args_spec_list;
diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/derelu_fusion.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/derelu_fusion.py
index 497975542b..767f85332f 100644
--- a/tests/ut/cpp/python_input/gtest_input/pre_activate/derelu_fusion.py
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/derelu_fusion.py
@@ -17,7 +17,7 @@ from mindspore.ops import Primitive
 
 relu = P.ReLU()
 relu_grad = Primitive('ReluGrad')
-relu_v2 = Primitive('ReluV2')
+relu_v2 = Primitive('ReLUV2')
 relu_grad_v2 = Primitive('ReluGradV2')
 make_tuple = Primitive('make_tuple')
 tuple_getitem = Primitive('tuple_getitem')

From 1f222ddb9eb8b897feb5896efae079fde2aeb7af Mon Sep 17 00:00:00 2001
From: liyong <liyong126@huawei.com>
Date: Thu, 23 Apr 2020 17:45:49 +0800
Subject: [PATCH 039/242] fix mindrecord c ut

---
 mindspore/ccsrc/mindrecord/io/shard_reader.cc |   3 +-
 tests/ut/cpp/mindrecord/ut_common.cc          | 369 +++++++++++++++++-
 tests/ut/cpp/mindrecord/ut_common.h           |  25 +-
 tests/ut/cpp/mindrecord/ut_shard.cc           |  10 +-
 .../ut_shard_index_generator_test.cc          |  32 --
 .../cpp/mindrecord/ut_shard_operator_test.cc  |  15 +-
 .../ut/cpp/mindrecord/ut_shard_reader_test.cc |  38 +-
 .../cpp/mindrecord/ut_shard_segment_test.cc   |  14 +-
 .../ut/cpp/mindrecord/ut_shard_writer_test.cc | 360 +----------------
 .../ut/cpp/mindrecord/ut_shard_writer_test.h  |  26 --
 10 files changed, 447 insertions(+), 445 deletions(-)
 delete mode 100644 tests/ut/cpp/mindrecord/ut_shard_writer_test.h

diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
index 9cd02d9120..dd34615f7e 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
@@ -346,7 +346,8 @@ void ShardReader::GetClassesInShard(sqlite3 *db, int shard_id, const std::string
     MS_LOG(ERROR) << "Error in select sql statement, sql:" << common::SafeCStr(sql) << ", error: " << errmsg;
     return;
   }
-  MS_LOG(INFO) << "Get" << static_cast<int>(columns.size()) << " records from shard " << shard_id << " index.";
+  MS_LOG(INFO) << "Get " << static_cast<int>(columns.size()) << " records from shard " << shard_id << " index.";
+  std::lock_guard<std::mutex> lck(shard_locker_);
   for (int i = 0; i < static_cast<int>(columns.size()); ++i) {
     categories.emplace(columns[i][0]);
   }
diff --git a/tests/ut/cpp/mindrecord/ut_common.cc b/tests/ut/cpp/mindrecord/ut_common.cc
index 76aa5fc503..2d2d69bd54 100644
--- a/tests/ut/cpp/mindrecord/ut_common.cc
+++ b/tests/ut/cpp/mindrecord/ut_common.cc
@@ -16,9 +16,9 @@
 
 #include "ut_common.h"
 
-using mindspore::MsLogLevel::ERROR;
-using mindspore::ExceptionType::NoExceptionType;
 using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::ERROR;
 
 namespace mindspore {
 namespace mindrecord {
@@ -33,23 +33,6 @@ void Common::SetUp() {}
 
 void Common::TearDown() {}
 
-void Common::LoadData(const std::string &directory, std::vector<json> &json_buffer, const int max_num) {
-  int count = 0;
-  string input_path = directory;
-  ifstream infile(input_path);
-  if (!infile.is_open()) {
-    MS_LOG(ERROR) << "can not open the file ";
-    return;
-  }
-  string temp;
-  while (getline(infile, temp) && count != max_num) {
-    count++;
-    json j = json::parse(temp);
-    json_buffer.push_back(j);
-  }
-  infile.close();
-}
-
 #ifdef __cplusplus
 #if __cplusplus
 }
@@ -70,5 +53,353 @@ const std::string FormatInfo(const std::string &message, uint32_t message_total_
   std::string right_padding(static_cast<uint64_t>(floor(padding_length / 2.0)), '=');
   return left_padding + part_message + right_padding;
 }
+
+void LoadData(const std::string &directory, std::vector<json> &json_buffer, const int max_num) {
+  int count = 0;
+  string input_path = directory;
+  ifstream infile(input_path);
+  if (!infile.is_open()) {
+    MS_LOG(ERROR) << "can not open the file ";
+    return;
+  }
+  string temp;
+  while (getline(infile, temp) && count != max_num) {
+    count++;
+    json j = json::parse(temp);
+    json_buffer.push_back(j);
+  }
+  infile.close();
+}
+
+void LoadDataFromImageNet(const std::string &directory, std::vector<json> &json_buffer, const int max_num) {
+  int count = 0;
+  string input_path = directory;
+  ifstream infile(input_path);
+  if (!infile.is_open()) {
+    MS_LOG(ERROR) << "can not open the file ";
+    return;
+  }
+  string temp;
+  string filename;
+  string label;
+  json j;
+  while (getline(infile, temp) && count != max_num) {
+    count++;
+    std::size_t pos = temp.find(",", 0);
+    if (pos != std::string::npos) {
+      j["file_name"] = temp.substr(0, pos);
+      j["label"] = atoi(common::SafeCStr(temp.substr(pos + 1, temp.length())));
+      json_buffer.push_back(j);
+    }
+  }
+  infile.close();
+}
+
+int Img2DataUint8(const std::vector<std::string> &img_absolute_path, std::vector<std::vector<uint8_t>> &bin_data) {
+  for (auto &file : img_absolute_path) {
+    // read image file
+    std::ifstream in(common::SafeCStr(file), std::ios::in | std::ios::binary | std::ios::ate);
+    if (!in) {
+      MS_LOG(ERROR) << common::SafeCStr(file) << " is not a directory or not exist!";
+      return -1;
+    }
+
+    // get the file size
+    uint64_t size = in.tellg();
+    in.seekg(0, std::ios::beg);
+    std::vector<uint8_t> file_data(size);
+    in.read(reinterpret_cast<char *>(&file_data[0]), size);
+    in.close();
+    bin_data.push_back(file_data);
+  }
+  return 0;
+}
+
+int GetAbsoluteFiles(std::string directory, std::vector<std::string> &files_absolute_path) {
+  DIR *dir = opendir(common::SafeCStr(directory));
+  if (dir == nullptr) {
+    MS_LOG(ERROR) << common::SafeCStr(directory) << " is not a directory or not exist!";
+    return -1;
+  }
+  struct dirent *d_ent = nullptr;
+  char dot[3] = ".";
+  char dotdot[6] = "..";
+  while ((d_ent = readdir(dir)) != nullptr) {
+    if ((strcmp(d_ent->d_name, dot) != 0) && (strcmp(d_ent->d_name, dotdot) != 0)) {
+      if (d_ent->d_type == DT_DIR) {
+        std::string new_directory = directory + std::string("/") + std::string(d_ent->d_name);
+        if (directory[directory.length() - 1] == '/') {
+          new_directory = directory + string(d_ent->d_name);
+        }
+        if (-1 == GetAbsoluteFiles(new_directory, files_absolute_path)) {
+          closedir(dir);
+          return -1;
+        }
+      } else {
+        std::string absolute_path = directory + std::string("/") + std::string(d_ent->d_name);
+        if (directory[directory.length() - 1] == '/') {
+          absolute_path = directory + std::string(d_ent->d_name);
+        }
+        files_absolute_path.push_back(absolute_path);
+      }
+    }
+  }
+  closedir(dir);
+  return 0;
+}
+
+void ShardWriterImageNet() {
+  MS_LOG(INFO) << common::SafeCStr(FormatInfo("Write imageNet"));
+
+  // load binary data
+  std::vector<std::vector<uint8_t>> bin_data;
+  std::vector<std::string> filenames;
+  if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) {
+    MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------";
+    return;
+  }
+  mindrecord::Img2DataUint8(filenames, bin_data);
+
+  // init shardHeader
+  ShardHeader header_data;
+  MS_LOG(INFO) << "Init ShardHeader Already.";
+
+  // create schema
+  json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json;
+  std::shared_ptr<mindrecord::Schema> anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json);
+  if (anno_schema == nullptr) {
+    MS_LOG(ERROR) << "Build annotation schema failed";
+    return;
+  }
+
+  // add schema to shardHeader
+  int anno_schema_id = header_data.AddSchema(anno_schema);
+  MS_LOG(INFO) << "Init Schema Already.";
+
+  // create index
+  std::pair<uint64_t, std::string> index_field1(anno_schema_id, "file_name");
+  std::pair<uint64_t, std::string> index_field2(anno_schema_id, "label");
+  std::vector<std::pair<uint64_t, std::string>> fields;
+  fields.push_back(index_field1);
+  fields.push_back(index_field2);
+
+  // add index to shardHeader
+  header_data.AddIndexFields(fields);
+  MS_LOG(INFO) << "Init Index Fields Already.";
+  // load  meta data
+  std::vector<json> annotations;
+  LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 10);
+
+  // add data
+  std::map<std::uint64_t, std::vector<json>> rawdatas;
+  rawdatas.insert(pair<uint64_t, vector<json>>(anno_schema_id, annotations));
+  MS_LOG(INFO) << "Init Images Already.";
+
+  // init file_writer
+  std::vector<std::string> file_names;
+  int file_count = 4;
+  for (int i = 1; i <= file_count; i++) {
+    file_names.emplace_back(std::string("./imagenet.shard0") + std::to_string(i));
+    MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]);
+  }
+
+  MS_LOG(INFO) << "Init Output Files Already.";
+  {
+    ShardWriter fw_init;
+    fw_init.Open(file_names);
+
+    // set shardHeader
+    fw_init.SetShardHeader(std::make_shared<mindrecord::ShardHeader>(header_data));
+
+    // close file_writer
+    fw_init.Commit();
+  }
+  std::string filename = "./imagenet.shard01";
+  {
+    MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================";
+    mindrecord::ShardWriter fw;
+    fw.OpenForAppend(filename);
+    fw.WriteRawData(rawdatas, bin_data);
+    fw.Commit();
+  }
+  mindrecord::ShardIndexGenerator sg{filename};
+  sg.Build();
+  sg.WriteToDatabase();
+
+  MS_LOG(INFO) << "Done create index";
+}
+
+void ShardWriterImageNetOneSample() {
+  // load binary data
+  std::vector<std::vector<uint8_t>> bin_data;
+  std::vector<std::string> filenames;
+  if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) {
+    MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------";
+    return;
+  }
+  mindrecord::Img2DataUint8(filenames, bin_data);
+
+  // init shardHeader
+  mindrecord::ShardHeader header_data;
+  MS_LOG(INFO) << "Init ShardHeader Already.";
+
+  // create schema
+  json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json;
+  std::shared_ptr<mindrecord::Schema> anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json);
+  if (anno_schema == nullptr) {
+    MS_LOG(ERROR) << "Build annotation schema failed";
+    return;
+  }
+
+  // add schema to shardHeader
+  int anno_schema_id = header_data.AddSchema(anno_schema);
+  MS_LOG(INFO) << "Init Schema Already.";
+
+  // create index
+  std::pair<uint64_t, std::string> index_field1(anno_schema_id, "file_name");
+  std::pair<uint64_t, std::string> index_field2(anno_schema_id, "label");
+  std::vector<std::pair<uint64_t, std::string>> fields;
+  fields.push_back(index_field1);
+  fields.push_back(index_field2);
+
+  // add index to shardHeader
+  header_data.AddIndexFields(fields);
+  MS_LOG(INFO) << "Init Index Fields Already.";
+
+  // load  meta data
+  std::vector<json> annotations;
+  LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 1);
+
+  // add data
+  std::map<std::uint64_t, std::vector<json>> rawdatas;
+  rawdatas.insert(pair<uint64_t, vector<json>>(anno_schema_id, annotations));
+  MS_LOG(INFO) << "Init Images Already.";
+
+  // init file_writer
+  std::vector<std::string> file_names;
+  for (int i = 1; i <= 4; i++) {
+    file_names.emplace_back(std::string("./OneSample.shard0") + std::to_string(i));
+    MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]);
+  }
+
+  MS_LOG(INFO) << "Init Output Files Already.";
+  {
+    mindrecord::ShardWriter fw_init;
+    fw_init.Open(file_names);
+
+    // set shardHeader
+    fw_init.SetShardHeader(std::make_shared<mindrecord::ShardHeader>(header_data));
+
+    // close file_writer
+    fw_init.Commit();
+  }
+
+  std::string filename = "./OneSample.shard01";
+  {
+    MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================";
+    mindrecord::ShardWriter fw;
+    fw.OpenForAppend(filename);
+    bin_data = std::vector<std::vector<uint8_t>>(bin_data.begin(), bin_data.begin() + 1);
+    fw.WriteRawData(rawdatas, bin_data);
+    fw.Commit();
+  }
+
+  mindrecord::ShardIndexGenerator sg{filename};
+  sg.Build();
+  sg.WriteToDatabase();
+  MS_LOG(INFO) << "Done create index";
+}
+
+void ShardWriterImageNetOpenForAppend(string filename) {
+  for (int i = 1; i <= 4; i++) {
+    string filename = std::string("./OpenForAppendSample.shard0") + std::to_string(i);
+    string db_name = std::string("./OpenForAppendSample.shard0") + std::to_string(i) + ".db";
+    remove(common::SafeCStr(filename));
+    remove(common::SafeCStr(db_name));
+  }
+
+  // load binary data
+  std::vector<std::vector<uint8_t>> bin_data;
+  std::vector<std::string> filenames;
+  if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) {
+    MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------";
+    return;
+  }
+  mindrecord::Img2DataUint8(filenames, bin_data);
+
+  // init shardHeader
+  mindrecord::ShardHeader header_data;
+  MS_LOG(INFO) << "Init ShardHeader Already.";
+
+  // create schema
+  json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json;
+  std::shared_ptr<mindrecord::Schema> anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json);
+  if (anno_schema == nullptr) {
+    MS_LOG(ERROR) << "Build annotation schema failed";
+    return;
+  }
+
+  // add schema to shardHeader
+  int anno_schema_id = header_data.AddSchema(anno_schema);
+  MS_LOG(INFO) << "Init Schema Already.";
+
+  // create index
+  std::pair<uint64_t, std::string> index_field1(anno_schema_id, "file_name");
+  std::pair<uint64_t, std::string> index_field2(anno_schema_id, "label");
+  std::vector<std::pair<uint64_t, std::string>> fields;
+  fields.push_back(index_field1);
+  fields.push_back(index_field2);
+
+  // add index to shardHeader
+  header_data.AddIndexFields(fields);
+  MS_LOG(INFO) << "Init Index Fields Already.";
+
+  // load  meta data
+  std::vector<json> annotations;
+  LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 1);
+
+  // add data
+  std::map<std::uint64_t, std::vector<json>> rawdatas;
+  rawdatas.insert(pair<uint64_t, vector<json>>(anno_schema_id, annotations));
+  MS_LOG(INFO) << "Init Images Already.";
+
+  // init file_writer
+  std::vector<std::string> file_names;
+  for (int i = 1; i <= 4; i++) {
+    file_names.emplace_back(std::string("./OpenForAppendSample.shard0") + std::to_string(i));
+    MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]);
+  }
+
+  MS_LOG(INFO) << "Init Output Files Already.";
+  {
+    mindrecord::ShardWriter fw_init;
+    fw_init.Open(file_names);
+
+    // set shardHeader
+    fw_init.SetShardHeader(std::make_shared<mindrecord::ShardHeader>(header_data));
+
+    // close file_writer
+    fw_init.Commit();
+  }
+  {
+    MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================";
+    mindrecord::ShardWriter fw;
+    auto ret = fw.OpenForAppend(filename);
+    if (ret == FAILED) {
+      return;
+    }
+
+    bin_data = std::vector<std::vector<uint8_t>>(bin_data.begin(), bin_data.begin() + 1);
+    fw.WriteRawData(rawdatas, bin_data);
+    fw.Commit();
+  }
+
+  ShardIndexGenerator sg{filename};
+  sg.Build();
+  sg.WriteToDatabase();
+  MS_LOG(INFO) << "Done create index";
+}
+
+
 }  // namespace mindrecord
 }  // namespace mindspore
diff --git a/tests/ut/cpp/mindrecord/ut_common.h b/tests/ut/cpp/mindrecord/ut_common.h
index 398c59779b..8b244bf87a 100644
--- a/tests/ut/cpp/mindrecord/ut_common.h
+++ b/tests/ut/cpp/mindrecord/ut_common.h
@@ -17,6 +17,7 @@
 #ifndef TESTS_MINDRECORD_UT_UT_COMMON_H_
 #define TESTS_MINDRECORD_UT_UT_COMMON_H_
 
+#include <dirent.h>
 #include <fstream>
 #include <string>
 #include <vector>
@@ -25,7 +26,9 @@
 #include "gtest/gtest.h"
 #include "utils/log_adapter.h"
 #include "mindrecord/include/shard_index.h"
-
+#include "mindrecord/include/shard_header.h" 
+#include "mindrecord/include/shard_index_generator.h"
+#include "mindrecord/include/shard_writer.h"
 using json = nlohmann::json;
 using std::ifstream;
 using std::pair;
@@ -40,11 +43,10 @@ class Common : public testing::Test {
   std::string install_root;
 
   // every TEST_F macro will enter one
-  void SetUp();
+  virtual void SetUp();
 
-  void TearDown();
+  virtual void TearDown();
 
-  static void LoadData(const std::string &directory, std::vector<json> &json_buffer, const int max_num);
 };
 }  // namespace UT
 
@@ -55,6 +57,21 @@ class Common : public testing::Test {
 ///
 /// return the formatted string
 const std::string FormatInfo(const std::string &message, uint32_t message_total_length = 128);
+
+
+void LoadData(const std::string &directory, std::vector<json> &json_buffer, const int max_num);
+
+void LoadDataFromImageNet(const std::string &directory, std::vector<json> &json_buffer, const int max_num);
+
+int Img2DataUint8(const std::vector<std::string> &img_absolute_path, std::vector<std::vector<uint8_t>> &bin_data);
+
+int GetAbsoluteFiles(std::string directory, std::vector<std::string> &files_absolute_path);
+
+void ShardWriterImageNet();
+
+void ShardWriterImageNetOneSample();
+
+void ShardWriterImageNetOpenForAppend(string filename);
 }  // namespace mindrecord
 }  // namespace mindspore
 #endif  // TESTS_MINDRECORD_UT_UT_COMMON_H_
diff --git a/tests/ut/cpp/mindrecord/ut_shard.cc b/tests/ut/cpp/mindrecord/ut_shard.cc
index 88fdb7e167..994ff1b859 100644
--- a/tests/ut/cpp/mindrecord/ut_shard.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard.cc
@@ -29,7 +29,6 @@
 #include "mindrecord/include/shard_statistics.h"
 #include "securec.h"
 #include "ut_common.h"
-#include "ut_shard_writer_test.h"
 
 using mindspore::MsLogLevel::INFO;
 using mindspore::ExceptionType::NoExceptionType;
@@ -43,7 +42,7 @@ class TestShard : public UT::Common {
 };
 
 TEST_F(TestShard, TestShardSchemaPart) {
-  TestShardWriterImageNet();
+  ShardWriterImageNet();
 
   MS_LOG(INFO) << FormatInfo("Test schema");
 
@@ -55,6 +54,12 @@ TEST_F(TestShard, TestShardSchemaPart) {
   ASSERT_TRUE(schema != nullptr);
   MS_LOG(INFO) << "schema description: " << schema->get_desc() << ", schema:  " <<
     common::SafeCStr(schema->GetSchema().dump());
+  for (int i = 1; i <= 4; i++) {
+    string filename = std::string("./imagenet.shard0") + std::to_string(i);
+    string db_name = std::string("./imagenet.shard0") + std::to_string(i) + ".db";
+    remove(common::SafeCStr(filename));
+    remove(common::SafeCStr(db_name));
+  }
 }
 
 TEST_F(TestShard, TestStatisticPart) {
@@ -128,6 +133,5 @@ TEST_F(TestShard, TestShardHeaderPart) {
   ASSERT_EQ(resFields, fields);
 }
 
-TEST_F(TestShard, TestShardWriteImage) { MS_LOG(INFO) << FormatInfo("Test writer"); }
 }  // namespace mindrecord
 }  // namespace mindspore
diff --git a/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc b/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc
index 0c33d33ffd..140fff4166 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_index_generator_test.cc
@@ -53,38 +53,6 @@ class TestShardIndexGenerator : public UT::Common {
   TestShardIndexGenerator() {}
 };
 
-/*
-TEST_F(TestShardIndexGenerator, GetField) {
-  MS_LOG(INFO) << FormatInfo("Test ShardIndex: get field");
-
-  int max_num = 1;
-  string input_path1 = install_root + "/test/testCBGData/data/annotation.data";
-  std::vector<json> json_buffer1;  // store the image_raw_meta.data
-  Common::LoadData(input_path1, json_buffer1, max_num);
-
-  MS_LOG(INFO) << "Fetch fields: ";
-  for (auto &j : json_buffer1) {
-    auto v_name = ShardIndexGenerator::GetField("anno_tool", j);
-    auto v_attr_name = ShardIndexGenerator::GetField("entity_instances.attributes.attr_name", j);
-    auto v_entity_name = ShardIndexGenerator::GetField("entity_instances.entity_name", j);
-    vector<string> names = {"\"CVAT\""};
-    for (unsigned int i = 0; i != names.size(); i++) {
-      ASSERT_EQ(names[i], v_name[i]);
-    }
-    vector<string> attr_names = {"\"脸部评分\"", "\"特征点\"", "\"points_example\"", "\"polyline_example\"",
-                                 "\"polyline_example\""};
-    for (unsigned int i = 0; i != attr_names.size(); i++) {
-      ASSERT_EQ(attr_names[i], v_attr_name[i]);
-    }
-    vector<string> entity_names = {"\"276点人脸\"", "\"points_example\"", "\"polyline_example\"",
-                                   "\"polyline_example\""};
-    for (unsigned int i = 0; i != entity_names.size(); i++) {
-      ASSERT_EQ(entity_names[i], v_entity_name[i]);
-    }
-  }
-}
-*/
-
 TEST_F(TestShardIndexGenerator, TakeFieldType) {
   MS_LOG(INFO) << FormatInfo("Test ShardSchema: take field Type");
 
diff --git a/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc b/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc
index bfd49069b2..9c177d7a40 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_operator_test.cc
@@ -40,6 +40,17 @@ namespace mindrecord {
 class TestShardOperator : public UT::Common {
  public:
   TestShardOperator() {}
+
+  void SetUp() override { ShardWriterImageNet(); }
+
+  void TearDown() override {
+    for (int i = 1; i <= 4; i++) {
+      string filename = std::string("./imagenet.shard0") + std::to_string(i);
+      string db_name = std::string("./imagenet.shard0") + std::to_string(i) + ".db";
+      remove(common::SafeCStr(filename));
+      remove(common::SafeCStr(db_name));
+    }
+  }
 };
 
 TEST_F(TestShardOperator, TestShardSampleBasic) {
@@ -165,7 +176,7 @@ TEST_F(TestShardOperator, TestShardPkSamplerBasic) {
     auto x = dataset.GetNext();
     if (x.empty()) break;
     std::cout << "index: " << i << ", filename: " << common::SafeCStr((std::get<1>(x[0]))["file_name"])
-                 << ", label: " << common::SafeCStr((std::get<1>(x[0]))["label"].dump()) << std::endl;
+              << ", label: " << common::SafeCStr((std::get<1>(x[0]))["label"].dump()) << std::endl;
     i++;
   }
   dataset.Finish();
@@ -191,7 +202,7 @@ TEST_F(TestShardOperator, TestShardPkSamplerNumClass) {
     if (x.empty()) break;
 
     std::cout << "index: " << i << ", filename: " << common::SafeCStr((std::get<1>(x[0]))["file_name"])
-                 << ", label: " << common::SafeCStr((std::get<1>(x[0]))["label"].dump()) << std::endl;
+              << ", label: " << common::SafeCStr((std::get<1>(x[0]))["label"].dump()) << std::endl;
     i++;
   }
   dataset.Finish();
diff --git a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
index f7ed39a006..e88c2fe3d6 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_reader_test.cc
@@ -37,6 +37,16 @@ namespace mindrecord {
 class TestShardReader : public UT::Common {
  public:
   TestShardReader() {}
+  void SetUp() override { ShardWriterImageNet(); }
+
+  void TearDown() override {
+    for (int i = 1; i <= 4; i++) {
+      string filename = std::string("./imagenet.shard0") + std::to_string(i);
+      string db_name = std::string("./imagenet.shard0") + std::to_string(i) + ".db";
+      remove(common::SafeCStr(filename));
+      remove(common::SafeCStr(db_name));
+    }
+  }
 };
 
 TEST_F(TestShardReader, TestShardReaderGeneral) {
@@ -51,8 +61,8 @@ TEST_F(TestShardReader, TestShardReaderGeneral) {
   while (true) {
     auto x = dataset.GetNext();
     if (x.empty()) break;
-    for (auto& j : x) {
-      for (auto& item : std::get<1>(j).items()) {
+    for (auto &j : x) {
+      for (auto &item : std::get<1>(j).items()) {
         MS_LOG(INFO) << "key: " << item.key() << ", value: " << item.value().dump();
       }
     }
@@ -74,8 +84,8 @@ TEST_F(TestShardReader, TestShardReaderSample) {
   while (true) {
     auto x = dataset.GetNext();
     if (x.empty()) break;
-    for (auto& j : x) {
-      for (auto& item : std::get<1>(j).items()) {
+    for (auto &j : x) {
+      for (auto &item : std::get<1>(j).items()) {
         MS_LOG(INFO) << "key: " << item.key() << ", value: " << item.value().dump();
       }
     }
@@ -99,8 +109,8 @@ TEST_F(TestShardReader, TestShardReaderBlock) {
   while (true) {
     auto x = dataset.GetBlockNext();
     if (x.empty()) break;
-    for (auto& j : x) {
-      for (auto& item : std::get<1>(j).items()) {
+    for (auto &j : x) {
+      for (auto &item : std::get<1>(j).items()) {
         MS_LOG(INFO) << "key: " << item.key() << ", value: " << item.value().dump();
       }
     }
@@ -119,8 +129,8 @@ TEST_F(TestShardReader, TestShardReaderEasy) {
   while (true) {
     auto x = dataset.GetNext();
     if (x.empty()) break;
-    for (auto& j : x) {
-      for (auto& item : std::get<1>(j).items()) {
+    for (auto &j : x) {
+      for (auto &item : std::get<1>(j).items()) {
         MS_LOG(INFO) << "key: " << item.key() << ", value: " << item.value().dump();
       }
     }
@@ -140,8 +150,8 @@ TEST_F(TestShardReader, TestShardReaderColumnNotInIndex) {
   while (true) {
     auto x = dataset.GetNext();
     if (x.empty()) break;
-    for (auto& j : x) {
-      for (auto& item : std::get<1>(j).items()) {
+    for (auto &j : x) {
+      for (auto &item : std::get<1>(j).items()) {
         MS_LOG(INFO) << "key: " << item.key() << ", value: " << item.value().dump();
       }
     }
@@ -169,9 +179,9 @@ TEST_F(TestShardReader, TestShardVersion) {
   while (true) {
     auto x = dataset.GetNext();
     if (x.empty()) break;
-    for (auto& j : x) {
+    for (auto &j : x) {
       MS_LOG(INFO) << "result size: " << std::get<0>(j).size();
-      for (auto& item : std::get<1>(j).items()) {
+      for (auto &item : std::get<1>(j).items()) {
         MS_LOG(INFO) << "key: " << common::SafeCStr(item.key()) << ", value: " << common::SafeCStr(item.value().dump());
       }
     }
@@ -201,8 +211,8 @@ TEST_F(TestShardReader, TestShardReaderConsumer) {
   while (true) {
     auto x = dataset.GetNext();
     if (x.empty()) break;
-    for (auto& j : x) {
-      for (auto& item : std::get<1>(j).items()) {
+    for (auto &j : x) {
+      for (auto &item : std::get<1>(j).items()) {
         MS_LOG(INFO) << "key: " << common::SafeCStr(item.key()) << ", value: " << common::SafeCStr(item.value().dump());
       }
     }
diff --git a/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc b/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc
index c803f584aa..bf0a35df7d 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_segment_test.cc
@@ -33,15 +33,25 @@
 #include "mindrecord/include/shard_segment.h"
 #include "ut_common.h"
 
-using mindspore::MsLogLevel::INFO;
-using mindspore::ExceptionType::NoExceptionType;
 using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::INFO;
 
 namespace mindspore {
 namespace mindrecord {
 class TestShardSegment : public UT::Common {
  public:
   TestShardSegment() {}
+  void SetUp() override { ShardWriterImageNet(); }
+
+  void TearDown() override {
+    for (int i = 1; i <= 4; i++) {
+      string filename = std::string("./imagenet.shard0") + std::to_string(i);
+      string db_name = std::string("./imagenet.shard0") + std::to_string(i) + ".db";
+      remove(common::SafeCStr(filename));
+      remove(common::SafeCStr(db_name));
+    }
+  }
 };
 
 TEST_F(TestShardSegment, TestShardSegment) {
diff --git a/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc b/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc
index 18e9214b08..3fa248c2e0 100644
--- a/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc
+++ b/tests/ut/cpp/mindrecord/ut_shard_writer_test.cc
@@ -16,7 +16,6 @@
 
 #include <chrono>
 #include <cstring>
-#include <dirent.h>
 #include <iostream>
 #include <memory>
 #include <string>
@@ -30,7 +29,6 @@
 #include "mindrecord/include/shard_index_generator.h"
 #include "securec.h"
 #include "ut_common.h"
-#include "ut_shard_writer_test.h"
 
 using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
@@ -44,249 +42,10 @@ class TestShardWriter : public UT::Common {
   TestShardWriter() {}
 };
 
-void LoadDataFromImageNet(const std::string &directory, std::vector<json> &json_buffer, const int max_num) {
-  int count = 0;
-  string input_path = directory;
-  ifstream infile(input_path);
-  if (!infile.is_open()) {
-    MS_LOG(ERROR) << "can not open the file ";
-    return;
-  }
-  string temp;
-  string filename;
-  string label;
-  json j;
-  while (getline(infile, temp) && count != max_num) {
-    count++;
-    std::size_t pos = temp.find(",", 0);
-    if (pos != std::string::npos) {
-      j["file_name"] = temp.substr(0, pos);
-      j["label"] = atoi(common::SafeCStr(temp.substr(pos + 1, temp.length())));
-      json_buffer.push_back(j);
-    }
-  }
-  infile.close();
-}
-
-int Img2DataUint8(const std::vector<std::string> &img_absolute_path, std::vector<std::vector<uint8_t>> &bin_data) {
-  for (auto &file : img_absolute_path) {
-    // read image file
-    std::ifstream in(common::SafeCStr(file), std::ios::in | std::ios::binary | std::ios::ate);
-    if (!in) {
-      MS_LOG(ERROR) << common::SafeCStr(file) << " is not a directory or not exist!";
-      return -1;
-    }
-
-    // get the file size
-    uint64_t size = in.tellg();
-    in.seekg(0, std::ios::beg);
-    std::vector<uint8_t> file_data(size);
-    in.read(reinterpret_cast<char *>(&file_data[0]), size);
-    in.close();
-    bin_data.push_back(file_data);
-  }
-  return 0;
-}
-
-int GetAbsoluteFiles(std::string directory, std::vector<std::string> &files_absolute_path) {
-  DIR *dir = opendir(common::SafeCStr(directory));
-  if (dir == nullptr) {
-    MS_LOG(ERROR) << common::SafeCStr(directory) << " is not a directory or not exist!";
-    return -1;
-  }
-  struct dirent *d_ent = nullptr;
-  char dot[3] = ".";
-  char dotdot[6] = "..";
-  while ((d_ent = readdir(dir)) != nullptr) {
-    if ((strcmp(d_ent->d_name, dot) != 0) && (strcmp(d_ent->d_name, dotdot) != 0)) {
-      if (d_ent->d_type == DT_DIR) {
-        std::string new_directory = directory + std::string("/") + std::string(d_ent->d_name);
-        if (directory[directory.length() - 1] == '/') {
-          new_directory = directory + string(d_ent->d_name);
-        }
-        if (-1 == GetAbsoluteFiles(new_directory, files_absolute_path)) {
-          closedir(dir);
-          return -1;
-        }
-      } else {
-        std::string absolute_path = directory + std::string("/") + std::string(d_ent->d_name);
-        if (directory[directory.length() - 1] == '/') {
-          absolute_path = directory + std::string(d_ent->d_name);
-        }
-        files_absolute_path.push_back(absolute_path);
-      }
-    }
-  }
-  closedir(dir);
-  return 0;
-}
-
-void TestShardWriterImageNet() {
-  MS_LOG(INFO) << common::SafeCStr(FormatInfo("Write imageNet"));
-
-  // load binary data
-  std::vector<std::vector<uint8_t>> bin_data;
-  std::vector<std::string> filenames;
-  if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) {
-    MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------";
-    return;
-  }
-  mindrecord::Img2DataUint8(filenames, bin_data);
-
-  // init shardHeader
-  mindrecord::ShardHeader header_data;
-  MS_LOG(INFO) << "Init ShardHeader Already.";
-
-  // create schema
-  json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json;
-  std::shared_ptr<mindrecord::Schema> anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json);
-  if (anno_schema == nullptr) {
-    MS_LOG(ERROR) << "Build annotation schema failed";
-    return;
-  }
-
-  // add schema to shardHeader
-  int anno_schema_id = header_data.AddSchema(anno_schema);
-  MS_LOG(INFO) << "Init Schema Already.";
-
-  // create index
-  std::pair<uint64_t, std::string> index_field1(anno_schema_id, "file_name");
-  std::pair<uint64_t, std::string> index_field2(anno_schema_id, "label");
-  std::vector<std::pair<uint64_t, std::string>> fields;
-  fields.push_back(index_field1);
-  fields.push_back(index_field2);
-
-  // add index to shardHeader
-  header_data.AddIndexFields(fields);
-  MS_LOG(INFO) << "Init Index Fields Already.";
-  // load  meta data
-  std::vector<json> annotations;
-  LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 10);
-
-  // add data
-  std::map<std::uint64_t, std::vector<json>> rawdatas;
-  rawdatas.insert(pair<uint64_t, vector<json>>(anno_schema_id, annotations));
-  MS_LOG(INFO) << "Init Images Already.";
-
-  // init file_writer
-  std::vector<std::string> file_names;
-  int file_count = 4;
-  for (int i = 1; i <= file_count; i++) {
-    file_names.emplace_back(std::string("./imagenet.shard0") + std::to_string(i));
-    MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]);
-  }
-
-  MS_LOG(INFO) << "Init Output Files Already.";
-  {
-    mindrecord::ShardWriter fw_init;
-    fw_init.Open(file_names);
-
-    // set shardHeader
-    fw_init.SetShardHeader(std::make_shared<mindrecord::ShardHeader>(header_data));
-
-    // close file_writer
-    fw_init.Commit();
-  }
-  std::string filename = "./imagenet.shard01";
-  {
-    MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================";
-    mindrecord::ShardWriter fw;
-    fw.OpenForAppend(filename);
-    fw.WriteRawData(rawdatas, bin_data);
-    fw.Commit();
-  }
-  mindrecord::ShardIndexGenerator sg{filename};
-  sg.Build();
-  sg.WriteToDatabase();
-
-  MS_LOG(INFO) << "Done create index";
-}
-
-void TestShardWriterImageNetOneSample() {
-  // load binary data
-  std::vector<std::vector<uint8_t>> bin_data;
-  std::vector<std::string> filenames;
-  if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) {
-    MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------";
-    return;
-  }
-  mindrecord::Img2DataUint8(filenames, bin_data);
-
-  // init shardHeader
-  mindrecord::ShardHeader header_data;
-  MS_LOG(INFO) << "Init ShardHeader Already.";
-
-  // create schema
-  json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json;
-  std::shared_ptr<mindrecord::Schema> anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json);
-  if (anno_schema == nullptr) {
-    MS_LOG(ERROR) << "Build annotation schema failed";
-    return;
-  }
-
-  // add schema to shardHeader
-  int anno_schema_id = header_data.AddSchema(anno_schema);
-  MS_LOG(INFO) << "Init Schema Already.";
-
-  // create index
-  std::pair<uint64_t, std::string> index_field1(anno_schema_id, "file_name");
-  std::pair<uint64_t, std::string> index_field2(anno_schema_id, "label");
-  std::vector<std::pair<uint64_t, std::string>> fields;
-  fields.push_back(index_field1);
-  fields.push_back(index_field2);
-
-  // add index to shardHeader
-  header_data.AddIndexFields(fields);
-  MS_LOG(INFO) << "Init Index Fields Already.";
-
-  // load  meta data
-  std::vector<json> annotations;
-  LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 1);
-
-  // add data
-  std::map<std::uint64_t, std::vector<json>> rawdatas;
-  rawdatas.insert(pair<uint64_t, vector<json>>(anno_schema_id, annotations));
-  MS_LOG(INFO) << "Init Images Already.";
-
-  // init file_writer
-  std::vector<std::string> file_names;
-  for (int i = 1; i <= 4; i++) {
-    file_names.emplace_back(std::string("./OneSample.shard0") + std::to_string(i));
-    MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]);
-  }
-
-  MS_LOG(INFO) << "Init Output Files Already.";
-  {
-    mindrecord::ShardWriter fw_init;
-    fw_init.Open(file_names);
-
-    // set shardHeader
-    fw_init.SetShardHeader(std::make_shared<mindrecord::ShardHeader>(header_data));
-
-    // close file_writer
-    fw_init.Commit();
-  }
-
-  std::string filename = "./OneSample.shard01";
-  {
-    MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================";
-    mindrecord::ShardWriter fw;
-    fw.OpenForAppend(filename);
-    bin_data = std::vector<std::vector<uint8_t>>(bin_data.begin(), bin_data.begin() + 1);
-    fw.WriteRawData(rawdatas, bin_data);
-    fw.Commit();
-  }
-
-  mindrecord::ShardIndexGenerator sg{filename};
-  sg.Build();
-  sg.WriteToDatabase();
-  MS_LOG(INFO) << "Done create index";
-}
-
 TEST_F(TestShardWriter, TestShardWriterBench) {
   MS_LOG(INFO) << common::SafeCStr(FormatInfo("Test write imageNet"));
 
-  TestShardWriterImageNet();
+  ShardWriterImageNet();
   for (int i = 1; i <= 4; i++) {
     string filename = std::string("./imagenet.shard0") + std::to_string(i);
     string db_name = std::string("./imagenet.shard0") + std::to_string(i) + ".db";
@@ -297,7 +56,7 @@ TEST_F(TestShardWriter, TestShardWriterBench) {
 
 TEST_F(TestShardWriter, TestShardWriterOneSample) {
   MS_LOG(INFO) << common::SafeCStr(FormatInfo("Test write imageNet int32 of sample less than num of shards"));
-  TestShardWriterImageNetOneSample();
+  ShardWriterImageNetOneSample();
   std::string filename = "./OneSample.shard01";
 
   ShardReader dataset;
@@ -342,7 +101,7 @@ TEST_F(TestShardWriter, TestShardWriterShiftRawPage) {
   std::vector<std::string> image_filenames;  // save all files' path within path_dir
 
   // read image_raw_meta.data
-  Common::LoadData(input_path1, json_buffer1, kMaxNum);
+  LoadData(input_path1, json_buffer1, kMaxNum);
   MS_LOG(INFO) << "Load Meta Data Already.";
 
   // get files' pathes stored in vector<string> image_filenames
@@ -375,7 +134,7 @@ TEST_F(TestShardWriter, TestShardWriterShiftRawPage) {
   MS_LOG(INFO) << "Init Schema Already.";
 
   // create/init statistics
-  Common::LoadData(input_path3, json_buffer4, 2);
+  LoadData(input_path3, json_buffer4, 2);
   json static1_json = json_buffer4[0];
   json static2_json = json_buffer4[1];
   MS_LOG(INFO) << "Initial statistics 1 is: " << common::SafeCStr(static1_json.dump());
@@ -474,7 +233,7 @@ TEST_F(TestShardWriter, TestShardWriterTrial) {
   std::vector<std::string> image_filenames;  // save all files' path within path_dir
 
   // read image_raw_meta.data
-  Common::LoadData(input_path1, json_buffer1, kMaxNum);
+  LoadData(input_path1, json_buffer1, kMaxNum);
   MS_LOG(INFO) << "Load Meta Data Already.";
 
   // get files' pathes stored in vector<string> image_filenames
@@ -508,7 +267,7 @@ TEST_F(TestShardWriter, TestShardWriterTrial) {
   MS_LOG(INFO) << "Init Schema Already.";
 
   // create/init statistics
-  Common::LoadData(input_path3, json_buffer4, 2);
+  LoadData(input_path3, json_buffer4, 2);
   json static1_json = json_buffer4[0];
   json static2_json = json_buffer4[1];
   MS_LOG(INFO) << "Initial statistics 1 is: " << common::SafeCStr(static1_json.dump());
@@ -613,7 +372,7 @@ TEST_F(TestShardWriter, TestShardWriterTrialNoFields) {
   std::vector<std::string> image_filenames;  // save all files' path within path_dir
 
   // read image_raw_meta.data
-  Common::LoadData(input_path1, json_buffer1, kMaxNum);
+  LoadData(input_path1, json_buffer1, kMaxNum);
   MS_LOG(INFO) << "Load Meta Data Already.";
 
   // get files' pathes stored in vector<string> image_filenames
@@ -644,7 +403,7 @@ TEST_F(TestShardWriter, TestShardWriterTrialNoFields) {
   MS_LOG(INFO) << "Init Schema Already.";
 
   // create/init statistics
-  Common::LoadData(input_path3, json_buffer4, 2);
+  LoadData(input_path3, json_buffer4, 2);
   json static1_json = json_buffer4[0];
   json static2_json = json_buffer4[1];
   MS_LOG(INFO) << "Initial statistics 1 is: " << common::SafeCStr(static1_json.dump());
@@ -1357,107 +1116,24 @@ TEST_F(TestShardWriter, TestWriteOpenFileName) {
   }
 }
 
-void TestShardWriterImageNetOpenForAppend(string filename) {
-  for (int i = 1; i <= 4; i++) {
-    string filename = std::string("./OpenForAppendSample.shard0") + std::to_string(i);
-    string db_name = std::string("./OpenForAppendSample.shard0") + std::to_string(i) + ".db";
-    remove(common::SafeCStr(filename));
-    remove(common::SafeCStr(db_name));
-  }
-
-  // load binary data
-  std::vector<std::vector<uint8_t>> bin_data;
-  std::vector<std::string> filenames;
-  if (-1 == mindrecord::GetAbsoluteFiles("./data/mindrecord/testImageNetData/images", filenames)) {
-    MS_LOG(INFO) << "-- ATTN -- Missed data directory. Skip this case. -----------------";
-    return;
-  }
-  mindrecord::Img2DataUint8(filenames, bin_data);
-
-  // init shardHeader
-  mindrecord::ShardHeader header_data;
-  MS_LOG(INFO) << "Init ShardHeader Already.";
-
-  // create schema
-  json anno_schema_json = R"({"file_name": {"type": "string"}, "label": {"type": "int32"}})"_json;
-  std::shared_ptr<mindrecord::Schema> anno_schema = mindrecord::Schema::Build("annotation", anno_schema_json);
-  if (anno_schema == nullptr) {
-    MS_LOG(ERROR) << "Build annotation schema failed";
-    return;
-  }
-
-  // add schema to shardHeader
-  int anno_schema_id = header_data.AddSchema(anno_schema);
-  MS_LOG(INFO) << "Init Schema Already.";
-
-  // create index
-  std::pair<uint64_t, std::string> index_field1(anno_schema_id, "file_name");
-  std::pair<uint64_t, std::string> index_field2(anno_schema_id, "label");
-  std::vector<std::pair<uint64_t, std::string>> fields;
-  fields.push_back(index_field1);
-  fields.push_back(index_field2);
-
-  // add index to shardHeader
-  header_data.AddIndexFields(fields);
-  MS_LOG(INFO) << "Init Index Fields Already.";
-
-  // load  meta data
-  std::vector<json> annotations;
-  LoadDataFromImageNet("./data/mindrecord/testImageNetData/annotation.txt", annotations, 1);
-
-  // add data
-  std::map<std::uint64_t, std::vector<json>> rawdatas;
-  rawdatas.insert(pair<uint64_t, vector<json>>(anno_schema_id, annotations));
-  MS_LOG(INFO) << "Init Images Already.";
-
-  // init file_writer
-  std::vector<std::string> file_names;
-  for (int i = 1; i <= 4; i++) {
-    file_names.emplace_back(std::string("./OpenForAppendSample.shard0") + std::to_string(i));
-    MS_LOG(INFO) << "shard name is: " << common::SafeCStr(file_names[i - 1]);
-  }
-
-  MS_LOG(INFO) << "Init Output Files Already.";
-  {
-    mindrecord::ShardWriter fw_init;
-    fw_init.Open(file_names);
-
-    // set shardHeader
-    fw_init.SetShardHeader(std::make_shared<mindrecord::ShardHeader>(header_data));
-
-    // close file_writer
-    fw_init.Commit();
-  }
-  {
-    MS_LOG(INFO) << "=============== images " << bin_data.size() << " ============================";
-    mindrecord::ShardWriter fw;
-    auto ret = fw.OpenForAppend(filename);
-    if (ret == FAILED) {
-      return;
-    }
-
-    bin_data = std::vector<std::vector<uint8_t>>(bin_data.begin(), bin_data.begin() + 1);
-    fw.WriteRawData(rawdatas, bin_data);
-    fw.Commit();
-  }
-
-  mindrecord::ShardIndexGenerator sg{filename};
-  sg.Build();
-  sg.WriteToDatabase();
-  MS_LOG(INFO) << "Done create index";
-}
-
 TEST_F(TestShardWriter, TestOpenForAppend) {
   MS_LOG(INFO) << "start ---- TestOpenForAppend\n";
   string filename = "./";
-  TestShardWriterImageNetOpenForAppend(filename);
+  ShardWriterImageNetOpenForAppend(filename);
 
   string filename1 = "./▒AppendSample.shard01";
-  TestShardWriterImageNetOpenForAppend(filename1);
+  ShardWriterImageNetOpenForAppend(filename1);
   string filename2 = "./ä\xA9ü";
 
-  TestShardWriterImageNetOpenForAppend(filename2);
+  ShardWriterImageNetOpenForAppend(filename2);
+
   MS_LOG(INFO) << "end ---- TestOpenForAppend\n";
+  for (int i = 1; i <= 4; i++) {
+    string filename = std::string("./OpenForAppendSample.shard0") + std::to_string(i);
+    string db_name = std::string("./OpenForAppendSample.shard0") + std::to_string(i) + ".db";
+    remove(common::SafeCStr(filename));
+    remove(common::SafeCStr(db_name));
+  }
 }
 
 }  // namespace mindrecord
diff --git a/tests/ut/cpp/mindrecord/ut_shard_writer_test.h b/tests/ut/cpp/mindrecord/ut_shard_writer_test.h
deleted file mode 100644
index f665297b17..0000000000
--- a/tests/ut/cpp/mindrecord/ut_shard_writer_test.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef TESTS_MINDRECORD_UT_SHARDWRITER_H
-#define TESTS_MINDRECORD_UT_SHARDWRITER_H
-
-namespace mindspore {
-namespace mindrecord {
-void TestShardWriterImageNet();
-}  // namespace mindrecord
-}  // namespace mindspore
-
-#endif  // TESTS_MINDRECORD_UT_SHARDWRITER_H

From c4aeb5a0b84d671ce2f91a839cd78a0906527d3a Mon Sep 17 00:00:00 2001
From: lianliguang <lianliguang@huawei.com>
Date: Thu, 23 Apr 2020 18:48:46 +0800
Subject: [PATCH 040/242] add format chooice when kernel selecting reduce or
 raise precision

---
 mindspore/ccsrc/debug/anf_ir_dump.cc          |   8 +
 mindspore/ccsrc/debug/anf_ir_dump.h           |   4 +-
 .../device/ascend/kernel_select_ascend.cc     | 266 ++++++++----------
 mindspore/ccsrc/kernel/kernel_build_info.cc   |   5 +-
 4 files changed, 138 insertions(+), 145 deletions(-)

diff --git a/mindspore/ccsrc/debug/anf_ir_dump.cc b/mindspore/ccsrc/debug/anf_ir_dump.cc
index e977084ab8..1fd3096e7c 100644
--- a/mindspore/ccsrc/debug/anf_ir_dump.cc
+++ b/mindspore/ccsrc/debug/anf_ir_dump.cc
@@ -91,6 +91,14 @@ void PrintNodeInputType(std::ostringstream &buffer, const AnfNodePtr &nd) {
   }
 }
 
+void PrintInputAndOutputInferType(std::ostringstream &buffer, const AnfNodePtr &nd) {
+  buffer << "      : (";
+  PrintNodeInputType(buffer, nd);
+  buffer << ") -> (";
+  PrintNodeOutputType(buffer, nd);
+  buffer << ")";
+}
+
 struct SubGraphIRInfo {
   int32_t local_var;
   std::ostringstream buffer;
diff --git a/mindspore/ccsrc/debug/anf_ir_dump.h b/mindspore/ccsrc/debug/anf_ir_dump.h
index a53888348d..9fa447046f 100644
--- a/mindspore/ccsrc/debug/anf_ir_dump.h
+++ b/mindspore/ccsrc/debug/anf_ir_dump.h
@@ -18,12 +18,14 @@
 
 #include <string>
 #include <vector>
+#include "ir/dtype/type.h"
 #include "ir/anf.h"
 
 namespace mindspore {
 constexpr char PARALLEL_STRATEGY[] = "strategy";
 void DumpIR(const std::string &filename, const FuncGraphPtr &func_graph, bool dump_full_name = false);
-
+void PrintInputAndOutputInferType(std::ostringstream &buffer, const AnfNodePtr &nd);
+const std::string ToShortString(const TypeId &typeId);
 }  // namespace mindspore
 
 #endif  // MINDSPORE_CCSRC_DEBUG_ANF_IR_DUMP_H_
diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
index 36c622cbc5..549b97b61b 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
+++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
@@ -18,14 +18,15 @@
 #include <string>
 #include <vector>
 #include <memory>
-#include <set>
-#include <unordered_map>
+#include <utility>
+#include <map>
 #include "kernel/oplib/oplib.h"
 #include "kernel/kernel_query.h"
 #include "session/anf_runtime_algorithm.h"
 #include "kernel/kernel_build_info.h"
 #include "utils/context/ms_context.h"
 #include "operator/ops.h"
+#include "debug/anf_ir_dump.h"
 
 namespace mindspore {
 namespace device {
@@ -180,6 +181,7 @@ void SetTensorDeviceInfo(const kernel::KernelBuildInfo &selected_kernel_info, co
 }
 
 void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector<int> *support_index) {
+  MS_EXCEPTION_IF_NULL(support_index);
   int index = kUnSupportMixedDataTypeIndex;
   switch (data_type) {
     case kNumberTypeFloat16:
@@ -197,6 +199,7 @@ void AddSupportMixedPrecisionDataTypeIndex(TypeId data_type, std::vector<int> *s
 
 void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t input_index,
                                    std::vector<int> *support_datatype_index, std::vector<TypeId> *support_datatype) {
+  MS_EXCEPTION_IF_NULL(support_datatype);
   auto data_type = kernel_build_info.GetInputDeviceType(input_index);
   support_datatype->push_back(data_type);
   AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index);
@@ -204,6 +207,7 @@ void AddKernelInputSupportDataType(const kernel::KernelBuildInfo &kernel_build_i
 
 void AddKernelOutputSupportDataType(const kernel::KernelBuildInfo &kernel_build_info, size_t output_index,
                                     std::vector<int> *support_datatype_index, std::vector<TypeId> *support_datatype) {
+  MS_EXCEPTION_IF_NULL(support_datatype);
   auto data_type = kernel_build_info.GetOutputDeviceType(output_index);
   support_datatype->push_back(data_type);
   AddSupportMixedPrecisionDataTypeIndex(data_type, support_datatype_index);
@@ -238,8 +242,8 @@ void AddNodeOutputDataType(const CNodePtr &kernel_node, size_t output_index,
 
 void CheckDataTypeInputs(const std::vector<int> &node_mix_precision_datatype_index,
                          const std::vector<TypeId> &node_mix_precision_datatype,
-                         const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
-                         std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
+                         const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
+                         std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
   if (node_mix_precision_datatype_index.size() != node_mix_precision_datatype.size()) {
     MS_LOG(EXCEPTION) << "node datatype index size " << node_mix_precision_datatype_index.size() << " != datatype size "
                       << node_mix_precision_datatype.size();
@@ -251,10 +255,11 @@ void CheckDataTypeInputs(const std::vector<int> &node_mix_precision_datatype_ind
   }
 }
 
-int RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index,
-                                 const std::vector<TypeId> &node_mix_precision_datatype,
-                                 const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
-                                 std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
+bool RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index,
+                                  const std::vector<TypeId> &node_mix_precision_datatype,
+                                  const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
+                                  std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
+  MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx);
   CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes,
                       kernel_match_datatype_idx);
   for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) {
@@ -289,40 +294,16 @@ int RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_data
       }
     }
   }
-
-  if (kernel_match_datatype_idx->size() >= 1) {
-    return SizeToInt(kernel_match_datatype_idx->begin()->first);
-  }
-  return -1;
-}
-
-int GetMinReducePrecisionCountIndex(std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx,
-                                    const std::unordered_map<size_t, size_t> &precision_reduce_count) {
-  int selected_index = -1;
-  size_t min_reduce_precision_count = kMaxCount;
-  auto iter = kernel_match_datatype_idx->begin();
-  while (iter != kernel_match_datatype_idx->end()) {
-    auto find_iter = precision_reduce_count.find(iter->first);
-    if (find_iter == precision_reduce_count.end()) {
-      continue;
-    }
-    if (min_reduce_precision_count > find_iter->second) {
-      selected_index = SizeToInt(iter->first);
-      min_reduce_precision_count = find_iter->second;
-    }
-    ++iter;
-  }
-  return selected_index;
+  return !kernel_match_datatype_idx->empty();
 }
 
-int RaiseOrReduceDataTypePrecisionSelect(
-  const std::vector<int> &node_mix_precision_datatype_index, const std::vector<TypeId> &node_mix_precision_datatype,
-  const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
-  std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
+bool RaiseOrReduceDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index,
+                                          const std::vector<TypeId> &node_mix_precision_datatype,
+                                          const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
+                                          std::map<size_t, std::vector<int>> *kernel_match_datatype_idx) {
+  MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx);
   CheckDataTypeInputs(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatypes,
                       kernel_match_datatype_idx);
-  // reduce / raise
-  std::unordered_map<size_t, size_t> precision_reduce_count;
   for (size_t i = 0; i < node_mix_precision_datatype_index.size(); ++i) {
     if (node_mix_precision_datatype[i] == kTypeUnknown) {
       continue;
@@ -351,26 +332,18 @@ int RaiseOrReduceDataTypePrecisionSelect(
       if (datatype_indexes[i] == kUnSupportMixedDataTypeIndex) {
         iter = kernel_match_datatype_idx->erase(iter);
       } else {
-        if (datatype_indexes[i] < node_mix_precision_datatype_index[i]) {
-          auto count_iter = precision_reduce_count.find(iter->first);
-          if (count_iter != precision_reduce_count.end()) {
-            count_iter->second++;
-          } else {
-            precision_reduce_count[iter->first] = 1;
-          }
-        }
         ++iter;
       }
     }
   }
-
-  return GetMinReducePrecisionCountIndex(kernel_match_datatype_idx, precision_reduce_count);
+  return !kernel_match_datatype_idx->empty();
 }
 
 void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelBuildInfo &kernel_build_info,
                               std::vector<int> *support_indexes, std::vector<TypeId> *node_mix_precision_datatype,
                               std::vector<TypeId> *support_datatypes,
                               std::vector<int> *node_mix_precision_datatype_index) {
+  MS_EXCEPTION_IF_NULL(node_mix_precision_datatype);
   bool add_node_datatype_flag = false;
   if (node_mix_precision_datatype->size() == 0) {
     add_node_datatype_flag = true;
@@ -390,104 +363,58 @@ void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelB
   }
 }
 
-int PrecisionReduce(const std::vector<int> &node_mix_precision_datatype_index,
-                    const std::vector<TypeId> &node_mix_precision_datatype,
-                    const std::unordered_map<size_t, std::vector<TypeId>> &kernel_support_datatype,
-                    std::unordered_map<size_t, std::vector<int>> *kernel_match_datatype_idx, bool *precision_reduce) {
+void PrecisionReduce(const std::vector<int> &node_mix_precision_datatype_index,
+                     const std::vector<TypeId> &node_mix_precision_datatype,
+                     const std::map<size_t, std::vector<TypeId>> &kernel_support_datatype,
+                     std::map<size_t, std::vector<int>> *kernel_match_datatype_idx, bool *precision_reduce) {
+  MS_EXCEPTION_IF_NULL(kernel_match_datatype_idx);
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   MS_EXCEPTION_IF_NULL(precision_reduce);
-  std::unordered_map<size_t, std::vector<int>> kernel_match_datatype_idx_copy = *kernel_match_datatype_idx;
+  std::map<size_t, std::vector<int>> kernel_match_datatype_idx_copy = *kernel_match_datatype_idx;
   // raise precision
-  int selected_index = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype,
-                                                    kernel_support_datatype, kernel_match_datatype_idx);
-  if (selected_index != -1) {
-    int max_match = 0;
-    auto iter = kernel_match_datatype_idx->begin();
-    int match_count = 0;
-    while (iter != kernel_match_datatype_idx->end()) {
-      auto kernel_datatypes = kernel_support_datatype.find(iter->first);
-      if (kernel_datatypes == kernel_support_datatype.end()) {
-        MS_LOG(EXCEPTION) << "Can not find kernel index" << iter->first << "'s datatype.";
-      }
-      if (kernel_datatypes->second.size() < node_mix_precision_datatype.size()) {
-        MS_LOG(EXCEPTION) << "Kernel datatype size is not equal to node datatype size!";
-      }
-      for (size_t i = 0; i < node_mix_precision_datatype.size(); ++i) {
-        if (node_mix_precision_datatype[i] == kernel_datatypes->second[i]) {
-          ++match_count;
-        }
-      }
-      if (match_count > max_match) {
-        selected_index = SizeToInt(iter->first);
-      }
-      ++iter;
-    }
+  bool selected_ret = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype,
+                                                   kernel_support_datatype, kernel_match_datatype_idx);
+  if (selected_ret) {
+    return;
   }
-  if (selected_index == -1 && context_ptr->enable_reduce_precision()) {
-    selected_index =
-      RaiseOrReduceDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype,
-                                           kernel_support_datatype, &kernel_match_datatype_idx_copy);
-    if (selected_index != -1) {
-      *precision_reduce = true;
-    }
+  if (context_ptr->enable_reduce_precision()) {
+    selected_ret = RaiseOrReduceDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype,
+                                                        kernel_support_datatype, &kernel_match_datatype_idx_copy);
+  }
+  if (selected_ret) {
+    *precision_reduce = true;
+    *kernel_match_datatype_idx = kernel_match_datatype_idx_copy;
   }
-  return selected_index;
 }
 
-void SelectKernel(const CNodePtr &kernel_node, bool precision_reduce, const std::vector<TypeId> &node_datatype,
-                  const std::shared_ptr<kernel::KernelBuildInfo> &selected_kernel_info_ptr) {
-  MS_EXCEPTION_IF_NULL(selected_kernel_info_ptr);
+void PrintRaiseOrReducePrecisionSelectedInfo(const CNodePtr &cnode,
+                                             const std::shared_ptr<kernel::KernelBuildInfo> &selected_kernel_build_info,
+                                             bool precision_reduce) {
+  MS_EXCEPTION_IF_NULL(selected_kernel_build_info);
+  MS_EXCEPTION_IF_NULL(cnode);
+  std::ostringstream buffer;
+  buffer << cnode->DebugString();
   if (precision_reduce) {
-    std::ostringstream datatype;
-    size_t input_num = selected_kernel_info_ptr->GetInputNum();
-    size_t i = 0;
-    datatype << "(";
-    for (; i < input_num && i < node_datatype.size(); ++i) {
-      datatype << static_cast<int>(node_datatype[i]);
-      if (i < input_num - 1) {
-        datatype << ", ";
-      }
-    }
-    datatype << ") -> (";
-    for (; i < node_datatype.size(); ++i) {
-      datatype << static_cast<int>(node_datatype[i]);
-      if (i < node_datatype.size() - 1) {
-        datatype << ", ";
-      }
-    }
-    datatype << ")";
-    MS_LOG(WARNING) << kernel_node->DebugString() << " reduce precision, node datatype: " << datatype.str()
-                    << ", select kernel: %s" << selected_kernel_info_ptr->ToString();
+    buffer << " reduce precision, node datatype: ";
+  } else {
+    buffer << " raise precision, node datatype: ";
   }
-  AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info_ptr, kernel_node.get());
-  // Set format and data type for input tensor.
-  SetTensorDeviceInfo(*selected_kernel_info_ptr, kernel_node);
+  PrintInputAndOutputInferType(buffer, cnode);
+  buffer << ", select kernel:" << selected_kernel_build_info->ToString();
+  MS_LOG(INFO) << buffer.str();
 }
-}  // namespace
 
-void SelectKernelInfo(const CNodePtr &kernel_node) {
-  std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
-  MS_EXCEPTION_IF_NULL(kernel_node);
-  kernel::KernelQuery(kernel_node, &kernel_info_list);
+std::shared_ptr<kernel::KernelBuildInfo> ChooseMatchedKernelInfo(
+  const CNodePtr &kernel_node, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) {
+  if (kernel_info_list.empty()) {
+    return nullptr;
+  }
   std::vector<int> most_match_counts = {-1, -1, -1, -1};
-  int selected_index = -1;
-  std::unordered_map<size_t, std::vector<int>> kernel_match_datatype_idx;
-  std::unordered_map<size_t, std::vector<TypeId>> kernel_support_datatype;
-  std::vector<int> node_mix_precision_datatype_index;
-  std::vector<TypeId> node_mix_precision_datatype;
+  size_t selected_index = 0;
   for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) {
     std::vector<int> cur_kernel_info_match_counts = {0, 0, 0, 0};
     auto kernel_build_info = *(kernel_info_list[info_index]);
-    std::vector<int> support_indexes;
-    std::vector<TypeId> support_datatypes;
-    AddNodeAndKernelDataType(kernel_node, kernel_build_info, &support_indexes, &node_mix_precision_datatype,
-                             &support_datatypes, &node_mix_precision_datatype_index);
-    kernel_match_datatype_idx[info_index] = support_indexes;
-    kernel_support_datatype[info_index] = support_datatypes;
-    if (!MatchInferOutputDataType(kernel_node, kernel_build_info)) {
-      continue;
-    }
     std::shared_ptr<kernel::KernelBuildInfo> kernel_info_ptr = kernel_info_list[info_index];
     UpdateCurMatchCounts(*kernel_info_ptr, kernel_node, &cur_kernel_info_match_counts);
     // Currently the selection policy is the match format count first, and then is datatype counts.
@@ -495,22 +422,77 @@ void SelectKernelInfo(const CNodePtr &kernel_node) {
       selected_index = SizeToInt(info_index);
     }
   }
+  return kernel_info_list[selected_index];
+}
 
-  bool precision_reduce = false;
-  if (selected_index == -1) {
-    selected_index = PrecisionReduce(node_mix_precision_datatype_index, node_mix_precision_datatype,
-                                     kernel_support_datatype, &kernel_match_datatype_idx, &precision_reduce);
+std::vector<std::shared_ptr<kernel::KernelBuildInfo>> GetAllMatchedFilteredKernelInfo(
+  const CNodePtr &cnode, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) {
+  std::vector<std::shared_ptr<kernel::KernelBuildInfo>> result;
+  for (const auto &kernel_build_info : kernel_info_list) {
+    MS_EXCEPTION_IF_NULL(kernel_build_info);
+    if (!MatchInferOutputDataType(cnode, *kernel_build_info)) {
+      continue;
+    }
+    result.push_back(kernel_build_info);
   }
-  if (selected_index == -1) {
-    MS_LOG(EXCEPTION) << kernel_node->DebugString() << "Cannot find valid kernel Info !";
+  return result;
+}
+
+std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecisionMatchedKernelInfo(
+  const CNodePtr &cnode, const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list,
+  bool *precision_reduce) {
+  std::vector<std::shared_ptr<kernel::KernelBuildInfo>> filtered_kernel_info_list;
+  std::map<size_t, std::vector<int>> kernel_match_datatype_idx;
+  std::map<size_t, std::vector<TypeId>> kernel_support_datatype;
+  std::vector<int> node_mix_precision_datatype_index;
+  std::vector<TypeId> node_mix_precision_datatype;
+  for (size_t info_index = 0; info_index < kernel_info_list.size(); ++info_index) {
+    std::vector<int> support_indexes;
+    std::vector<TypeId> support_datatypes;
+    MS_EXCEPTION_IF_NULL(kernel_info_list[info_index]);
+    AddNodeAndKernelDataType(cnode, *kernel_info_list[info_index], &support_indexes, &node_mix_precision_datatype,
+                             &support_datatypes, &node_mix_precision_datatype_index);
+    kernel_match_datatype_idx[info_index] = support_indexes;
+    kernel_support_datatype[info_index] = support_datatypes;
   }
-  auto index = IntToSize(selected_index);
-  if (index >= kernel_info_list.size()) {
-    MS_LOG(EXCEPTION) << "index outof range";
+  PrecisionReduce(node_mix_precision_datatype_index, node_mix_precision_datatype, kernel_support_datatype,
+                  &kernel_match_datatype_idx, precision_reduce);
+  std::transform(
+    kernel_match_datatype_idx.begin(), kernel_match_datatype_idx.end(), std::back_inserter(filtered_kernel_info_list),
+    [&](const std::pair<size_t, std::vector<int>> &matched_idx) -> std::shared_ptr<kernel::KernelBuildInfo> {
+      return kernel_info_list[matched_idx.first];
+    });
+  return filtered_kernel_info_list;
+}
+}  // namespace
+
+void SelectKernelInfo(const CNodePtr &kernel_node) {
+  std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  bool precision_reduce = false;
+  std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info = nullptr;
+  kernel::KernelQuery(kernel_node, &kernel_info_list);
+  // filter kernel info matched with me infered type
+  auto filtered_kernel_info_list = GetAllMatchedFilteredKernelInfo(kernel_node, kernel_info_list);
+  if (!filtered_kernel_info_list.empty()) {
+    selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list);
+  } else {
+    // selected kernel info using raised precision or reduce precision
+    filtered_kernel_info_list =
+      FilterRaisedOrReducePrecisionMatchedKernelInfo(kernel_node, kernel_info_list, &precision_reduce);
+    selected_kernel_info = ChooseMatchedKernelInfo(kernel_node, filtered_kernel_info_list);
+    if (selected_kernel_info == nullptr) {
+      std::ostringstream buffer;
+      PrintInputAndOutputInferType(buffer, kernel_node);
+      MS_LOG(EXCEPTION) << "The node [" << kernel_node->DebugString()
+                        << "] cannot find valid kernel info, not supported the type" << buffer.str();
+    } else {
+      PrintRaiseOrReducePrecisionSelectedInfo(kernel_node, selected_kernel_info, precision_reduce);
+    }
   }
-  std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info_ptr = kernel_info_list[index];
-  MS_EXCEPTION_IF_NULL(selected_kernel_info_ptr);
-  SelectKernel(kernel_node, precision_reduce, node_mix_precision_datatype, selected_kernel_info_ptr);
+  AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info, kernel_node.get());
+  // Set format and data type for input tensor.
+  SetTensorDeviceInfo(*selected_kernel_info, kernel_node);
 }
 
 bool CheckKernelAccuracySupported(const CNodePtr &kernel_node,
diff --git a/mindspore/ccsrc/kernel/kernel_build_info.cc b/mindspore/ccsrc/kernel/kernel_build_info.cc
index 038c06d8ed..279a62bad6 100644
--- a/mindspore/ccsrc/kernel/kernel_build_info.cc
+++ b/mindspore/ccsrc/kernel/kernel_build_info.cc
@@ -17,6 +17,7 @@
 #include "kernel/kernel_build_info.h"
 #include <algorithm>
 #include "utils/log_adapter.h"
+#include "debug/anf_ir_dump.h"
 namespace mindspore {
 namespace kernel {
 std::string KernelBuildInfo::GetInputFormat(size_t input_index) const {
@@ -82,14 +83,14 @@ std::string KernelBuildInfo::ToString() const {
     if (index != 0) {
       output_buffer << ", ";
     }
-    output_buffer << "<" << static_cast<int>(GetInputDeviceType(index)) << "x" << GetInputFormat(index) << ">";
+    output_buffer << "<" << ToShortString(GetInputDeviceType(index)) << "x" << GetInputFormat(index) << ">";
   }
   output_buffer << ") -> (";
   for (size_t index = 0; index < GetOutputNum(); ++index) {
     if (index != 0) {
       output_buffer << ", ";
     }
-    output_buffer << "<" << static_cast<int>(GetOutputDeviceType(index)) << "x" << GetOutputFormat(index) << ">";
+    output_buffer << "<" << ToShortString(GetOutputDeviceType(index)) << "x" << GetOutputFormat(index) << ">";
   }
   output_buffer << ")";
   return output_buffer.str();

From 246fc290d0a8f463cb02a8d09a8c41019c29469f Mon Sep 17 00:00:00 2001
From: caifubi <caifubi1@huawei.com>
Date: Thu, 23 Apr 2020 19:14:19 +0800
Subject: [PATCH 041/242] clean runtime codex

---
 .../ccsrc/device/ascend/ascend_kernel_runtime.cc  | 15 ++++++++-------
 .../device/ascend/profiling/profiling_utils.cc    |  2 +-
 .../device/ascend/profiling/profiling_utils.h     |  2 +-
 .../device/ascend/tasksink/task_generator.cc      |  6 ++++--
 mindspore/ccsrc/kernel/hccl/hcom_util.cc          |  2 +-
 5 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
index 44cf3f8fa8..10517299cc 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
@@ -453,25 +453,26 @@ bool AscendKernelRuntime::HcclInit() {
   }
 
   MS_LOG(INFO) << "do hcom init";
-  std::string path;
   const char *config_path_str = std::getenv("MINDSPORE_HCCL_CONFIG_PATH");
   if (config_path_str == nullptr) {
     MS_LOG(ERROR) << "get hccl json config failed, please set env MINDSPORE_HCCL_CONFIG_PATH";
     return false;
   }
-  path = config_path_str;
-  char fullPath[PATH_MAX] = {0};
-  if (path.size() > PATH_MAX || realpath(path.c_str(), fullPath) == nullptr) {
-    MS_LOG(ERROR) << "file " << path << " is not exist";
+  auto full_path = realpath(config_path_str, nullptr);
+  if (full_path == nullptr) {
+    MS_LOG(ERROR) << "file path " << config_path_str << " does not exist";
     return false;
   }
+
   const char *identify = std::getenv("RANK_ID");
   if (identify == nullptr) {
     MS_LOG(ERROR) << "get hccl rankid failed, please set env RANK_ID";
+    free(full_path);
     return false;
   }
-  MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << fullPath << ", RANK_ID: " << identify;
-  hcclResult_t res = hcom_init(fullPath, identify);
+  MS_LOG(INFO) << "MINDSPORE_HCCL_CONFIG_PATH : " << full_path << ", RANK_ID: " << identify;
+  hcclResult_t res = hcom_init(full_path, identify);
+  free(full_path);
   if (res != HCCL_SUCCESS) {
     MS_LOG(ERROR) << "hcom init failed, res is " << static_cast<int>(res);
     return false;
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
index fdfff96fde..c1478915b7 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
@@ -33,7 +33,7 @@ constexpr char kIterEndNode[] = "PROFILING_ITER_END";
 std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
 uint32_t ProfilingUtils::custom_node_index_ = 1;
 
-ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr) {
+ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(const NotNull<session::KernelGraph *> graph_ptr) {
   MS_LOG(INFO) << "get env start";
   custom_node_index_ = 1;
   auto &cnode_exec_order = graph_ptr->execution_order();
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
index 1f7815b320..99245b2c57 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
@@ -94,7 +94,7 @@ class ProfilingUtils {
   // And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode'
   // GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode'
   // The variable i in PROFILING_CUSTOM_i should start from 1 without interruption.
-  static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr);
+  static ProfilingTraceInfo GetProfilingTraceFromEnv(const NotNull<session::KernelGraph *> graph_ptr);
 
   // Insert two profiling trace points, one in front and one behind
   static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
index 62cf809c21..7b2a7dad9f 100644
--- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
+++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
@@ -121,8 +121,10 @@ bool TaskGenerator::LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_i
     LaunchAddrCleanKernel(anf_node_ptr, &kernel_inputs);
   }
 
-  std::vector<TaskInfoPtr> task_info_ptrs = dynamic_cast<kernel::AscendKernelMod *>(kernel_mod)
-                                              ->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id);
+  auto ascend_kernel_mod = dynamic_cast<kernel::AscendKernelMod *>(kernel_mod);
+  MS_EXCEPTION_IF_NULL(ascend_kernel_mod);
+  std::vector<TaskInfoPtr> task_info_ptrs =
+    ascend_kernel_mod->GenTask(kernel_inputs, kernel_workspaces, kernel_outputs, stream_id);
   task_info_list->insert(task_info_list->end(), task_info_ptrs.begin(), task_info_ptrs.end());
   return true;
 }
diff --git a/mindspore/ccsrc/kernel/hccl/hcom_util.cc b/mindspore/ccsrc/kernel/hccl/hcom_util.cc
index d1c0a30113..5665475c84 100644
--- a/mindspore/ccsrc/kernel/hccl/hcom_util.cc
+++ b/mindspore/ccsrc/kernel/hccl/hcom_util.cc
@@ -136,7 +136,7 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector<hcclDataTyp
     }
   }
 
-  if (total_size % type_size != 0) {
+  if (type_size == 0 || total_size % type_size != 0) {
     MS_LOG(ERROR) << "Total_size[" << total_size << "],Type_size[" << type_size << "] != 0, fail!";
     return false;
   }

From 73d4cf77d438f47f2df554f74348b1dd9f9cac56 Mon Sep 17 00:00:00 2001
From: caojian05 <caojian5@huawei.com>
Date: Thu, 23 Apr 2020 23:39:40 +0800
Subject: [PATCH 042/242] add model parameters for vgg16 to open mixed
 precision.

---
 example/vgg16_cifar10/train.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/example/vgg16_cifar10/train.py b/example/vgg16_cifar10/train.py
index a4aa587c3d..87cea2af03 100644
--- a/example/vgg16_cifar10/train.py
+++ b/example/vgg16_cifar10/train.py
@@ -68,7 +68,8 @@ if __name__ == '__main__':
     lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=50000 // cfg.batch_size)
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay)
     loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
-    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
+    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
+                  amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)
 
     dataset = dataset.create_dataset(args_opt.data_path, cfg.epoch_size)
     batch_num = dataset.get_dataset_size()

From a43b60f99f317367f03d09931b88080b5fe640c4 Mon Sep 17 00:00:00 2001
From: Amir Lashkari <amir.lashkari1@huawei.com>
Date: Thu, 23 Apr 2020 13:57:52 -0400
Subject: [PATCH 043/242] Fixed not in list error

---
 mindspore/dataset/transforms/vision/py_transforms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/dataset/transforms/vision/py_transforms.py b/mindspore/dataset/transforms/vision/py_transforms.py
index 51bea80b21..77957bff47 100644
--- a/mindspore/dataset/transforms/vision/py_transforms.py
+++ b/mindspore/dataset/transforms/vision/py_transforms.py
@@ -1485,4 +1485,4 @@ class UniformAugment:
         Returns:
             img (PIL Image), Transformed image.
         """
-        return util.uniform_augment(img, self.transforms, self.num_ops)
+        return util.uniform_augment(img, self.transforms.copy(), self.num_ops)

From f29eacbb341badc63e83385421e2960e0b8d2999 Mon Sep 17 00:00:00 2001
From: Adel Shafiei <adel.shafiei@huawei.com>
Date: Thu, 23 Apr 2020 17:14:34 -0400
Subject: [PATCH 044/242] fixed an input validation error for uniform augment
 op

---
 .../dataset/kernels/image/uniform_aug_op.cc   | 20 +++++++------------
 .../dataset/transforms/vision/validators.py   |  5 +++--
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc
index 5725c10908..1214345c37 100644
--- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc
+++ b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc
@@ -42,34 +42,28 @@ Status UniformAugOp::Compute(const std::vector<std::shared_ptr<Tensor>> &input,
                              std::vector<std::shared_ptr<Tensor>> *output) {
   IO_CHECK_VECTOR(input, output);
 
-  // variables to generate random number to select ops from the list
-  std::vector<int> random_indexes;
-
   // variables to copy the result to output if it is not already
   std::vector<std::shared_ptr<Tensor>> even_out;
   std::vector<std::shared_ptr<Tensor>> *even_out_ptr = &even_out;
   int count = 1;
 
-  // select random indexes for candidates to be applied
-  for (int i = 0; i < num_ops_; ++i) {
-    random_indexes.insert(random_indexes.end(),
-                          std::uniform_int_distribution<int>(0, tensor_op_list_.size() - 1)(rnd_));
-  }
+  // randomly select ops to be applied
+  std::vector<std::shared_ptr<TensorOp>> selected_tensor_ops;
+  std::sample(tensor_op_list_.begin(), tensor_op_list_.end(), std::back_inserter(selected_tensor_ops), num_ops_, rnd_);
 
-  for (auto it = random_indexes.begin(); it != random_indexes.end(); ++it) {
+  for (auto tensor_op = selected_tensor_ops.begin(); tensor_op != selected_tensor_ops.end(); ++tensor_op) {
     // Do NOT apply the op, if second random generator returned zero
     if (std::uniform_int_distribution<int>(0, 1)(rnd_)) {
       continue;
     }
-    std::shared_ptr<TensorOp> tensor_op = tensor_op_list_[*it];
 
     // apply python/C++ op
     if (count == 1) {
-      (*tensor_op).Compute(input, output);
+      (**tensor_op).Compute(input, output);
     } else if (count % 2 == 0) {
-      (*tensor_op).Compute(*output, even_out_ptr);
+      (**tensor_op).Compute(*output, even_out_ptr);
     } else {
-      (*tensor_op).Compute(even_out, output);
+      (**tensor_op).Compute(even_out, output);
     }
     count++;
   }
diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py
index 713d9c5714..2c299b077b 100644
--- a/mindspore/dataset/transforms/vision/validators.py
+++ b/mindspore/dataset/transforms/vision/validators.py
@@ -17,11 +17,12 @@
 import numbers
 from functools import wraps
 
+from mindspore._c_dataengine import TensorOp
+
 from .utils import Inter, Border
 from ...transforms.validators import check_pos_int32, check_pos_float32, check_value, check_uint8, FLOAT_MAX_INTEGER, \
     check_bool, check_2tuple, check_range, check_list, check_type, check_positive, INT32_MAX
 
-
 def check_inter_mode(mode):
     if not isinstance(mode, Inter):
         raise ValueError("Invalid interpolation mode.")
@@ -836,7 +837,7 @@ def check_uniform_augmentation(method):
         if not isinstance(operations, list):
             raise ValueError("operations is not a python list")
         for op in operations:
-            if not callable(op):
+            if not callable(op) and not isinstance(op, TensorOp):
                 raise ValueError("non-callable op in operations list")
 
         kwargs["num_ops"] = num_ops

From 4ff418084c9a98a8ecc6292b1ce1ac5f02bd782d Mon Sep 17 00:00:00 2001
From: lirongzhen <lirongzhen1@huawei.com>
Date: Wed, 22 Apr 2020 19:23:45 +0800
Subject: [PATCH 045/242] enable/disable allreduce_fusion

---
 .../allreduce_fusion/step_allreduce_fusion.cc   |  3 ++-
 mindspore/ccsrc/parallel/context.cc             |  1 +
 mindspore/ccsrc/parallel/context.h              |  5 +++++
 mindspore/ccsrc/pipeline/init.cc                |  4 ++++
 mindspore/parallel/_auto_parallel_context.py    | 17 +++++++++++++++++
 mindspore/parallel/_utils.py                    |  5 +++++
 tests/ut/python/parallel/__init__.py            |  2 ++
 .../ut/python/parallel/test_allreduce_fusion.py |  3 ++-
 8 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc b/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc
index 8ab0895216..23ec9da87b 100644
--- a/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc
+++ b/mindspore/ccsrc/parallel/allreduce_fusion/step_allreduce_fusion.cc
@@ -31,10 +31,11 @@ bool StepAllreduceFusion(const FuncGraphPtr &root, const opt::OptimizerPtr &opti
   MS_EXCEPTION_IF_NULL(optimizer);
   MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance());
   std::string parallel_mode = ParallelContext::GetInstance()->parallel_mode();
+  bool enable_all_reduce_fusion = ParallelContext::GetInstance()->enable_all_reduce_fusion();
   // assume no change to graph
   bool changes = false;
   // control whether use model_parallel mode
-  if (((parallel_mode != AUTO_PARALLEL) && (parallel_mode != SEMI_AUTO_PARALLEL)) ||
+  if (((parallel_mode != AUTO_PARALLEL) && (parallel_mode != SEMI_AUTO_PARALLEL)) || (!enable_all_reduce_fusion) ||
       (root->has_flag(ALLREDUCE_FUSION_RUN_ONCE_ONLY))) {
     return changes;
   }
diff --git a/mindspore/ccsrc/parallel/context.cc b/mindspore/ccsrc/parallel/context.cc
index bc4aca896b..4eb79772dd 100644
--- a/mindspore/ccsrc/parallel/context.cc
+++ b/mindspore/ccsrc/parallel/context.cc
@@ -55,6 +55,7 @@ void ParallelContext::Reset() {
   parallel_mode_ = STAND_ALONE;
   parameter_broadcast_ = false;
   parameter_broadcast_is_set_ = false;
+  enable_all_reduce_fusion_ = false;
 }
 
 void ParallelContext::set_device_num(int32_t device_num) {
diff --git a/mindspore/ccsrc/parallel/context.h b/mindspore/ccsrc/parallel/context.h
index 64261cb964..095a50f7b3 100644
--- a/mindspore/ccsrc/parallel/context.h
+++ b/mindspore/ccsrc/parallel/context.h
@@ -80,6 +80,10 @@ class ParallelContext {
   const std::vector<uint32_t> all_reduce_fusion_split_indices() const;
   void set_all_reduce_fusion_split_sizes(const std::vector<uint32_t> sizes);
   const std::vector<uint32_t> all_reduce_fusion_split_sizes() const;
+  void set_enable_all_reduce_fusion(bool enable_all_reduce_fusion) {
+    enable_all_reduce_fusion_ = enable_all_reduce_fusion;
+  }
+  bool enable_all_reduce_fusion() const { return enable_all_reduce_fusion_; }
 
   void Reset();
 
@@ -98,6 +102,7 @@ class ParallelContext {
   bool device_num_is_set_;
   bool global_rank_is_set_;
   bool parameter_broadcast_is_set_;
+  bool enable_all_reduce_fusion_;
   std::vector<uint32_t> all_reduce_fusion_split_indices_;
   std::vector<uint32_t> all_reduce_fusion_split_sizes_;
 };
diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc
index 86e6d436b7..98874f857d 100644
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/init.cc
@@ -183,6 +183,10 @@ PYBIND11_MODULE(_c_expression, m) {
          "Set all reduce fusion split sizes.")
     .def("get_all_reduce_fusion_split_sizes", &ParallelContext::all_reduce_fusion_split_sizes,
          "Get all reduce fusion split sizes.")
+    .def("set_enable_all_reduce_fusion", &ParallelContext::set_enable_all_reduce_fusion,
+         "Set enable/disable all reduce fusion.")
+    .def("get_enable_all_reduce_fusion", &ParallelContext::enable_all_reduce_fusion,
+         "Get enable/disable all reduce fusion.")
     .def("get_parameter_broadcast", &ParallelContext::parameter_broadcast, "Get parameter broadcast.")
     .def("get_parameter_broadcast_is_set", &ParallelContext::parameter_broadcast_is_set,
          "Get parameter broadcast is set.")
diff --git a/mindspore/parallel/_auto_parallel_context.py b/mindspore/parallel/_auto_parallel_context.py
index bf4b99085e..0608989d94 100644
--- a/mindspore/parallel/_auto_parallel_context.py
+++ b/mindspore/parallel/_auto_parallel_context.py
@@ -259,6 +259,23 @@ class _AutoParallelContext:
         self.check_context_handle()
         return self._context_handle.get_all_reduce_fusion_split_sizes()
 
+    def set_enable_all_reduce_fusion(self, enable_all_reduce_fusion):
+        """
+        Set enable/disable all reduce fusion.
+
+        Args:
+            enable_all_reduce_fusion (bool): Enable/disable all reduce fusion.
+        """
+        self.check_context_handle()
+        if not isinstance(enable_all_reduce_fusion, bool):
+            raise TypeError('enable_all_reduce_fusion is invalid type')
+        self._context_handle.set_enable_all_reduce_fusion(enable_all_reduce_fusion)
+
+    def get_enable_all_reduce_fusion(self):
+        """Get all reduce fusion flag."""
+        self.check_context_handle()
+        return self._context_handle.get_enable_all_reduce_fusion()
+
     def get_device_num_is_set(self):
         """Get device number is set or not."""
         self.check_context_handle()
diff --git a/mindspore/parallel/_utils.py b/mindspore/parallel/_utils.py
index 3ce5463edf..cb3a0c0ac7 100644
--- a/mindspore/parallel/_utils.py
+++ b/mindspore/parallel/_utils.py
@@ -117,6 +117,7 @@ _cast_before_mirror = None
 _loss_repeated_mean = None
 _communication_backend = None
 _has_checkpointed = False
+_enable_all_reduce_fusion = None
 
 
 def _checkpoint_auto_parallel_context():
@@ -133,6 +134,7 @@ def _checkpoint_auto_parallel_context():
     global _cast_before_mirror
     global _loss_repeated_mean
     global _communication_backend
+    global _enable_all_reduce_fusion
     _parallel_mode = auto_parallel_context().get_parallel_mode()
     _device_num = _get_device_num()
     _global_rank = _get_global_rank()
@@ -141,6 +143,7 @@ def _checkpoint_auto_parallel_context():
     _cast_before_mirror = auto_parallel_context().get_cast_before_mirror()
     _loss_repeated_mean = auto_parallel_context().get_loss_repeated_mean()
     _communication_backend = auto_parallel_context().get_communication_backend()
+    _enable_all_reduce_fusion = auto_parallel_context().get_enable_all_reduce_fusion()
     _has_checkpointed = True
 
 
@@ -154,10 +157,12 @@ def _restore_auto_parallel_context():
     global _cast_before_mirror
     global _loss_repeated_mean
     global _communication_backend
+    global _enable_all_reduce_fusion
     _set_auto_parallel_context(parallel_mode=_parallel_mode, device_num=_device_num, global_rank=_global_rank,
                                parameter_broadcast=_parameter_broadcast, mirror_mean=_mirror_mean,
                                cast_before_mirror=_cast_before_mirror, loss_repeated_mean=_loss_repeated_mean)
     auto_parallel_context().set_communication_backend(_communication_backend)
+    auto_parallel_context().set_enable_all_reduce_fusion(_enable_all_reduce_fusion)
 
 
 def _reset_checkpoint_auto_parallel_context():
diff --git a/tests/ut/python/parallel/__init__.py b/tests/ut/python/parallel/__init__.py
index c08f8e247b..b26962bc3a 100644
--- a/tests/ut/python/parallel/__init__.py
+++ b/tests/ut/python/parallel/__init__.py
@@ -13,10 +13,12 @@
 # limitations under the License.
 
 import mindspore.context as context
+from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.parallel._utils import _reset_op_id
 
 
 def setup_module(module):
+    auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True)
     context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
     _reset_op_id()
 
diff --git a/tests/ut/python/parallel/test_allreduce_fusion.py b/tests/ut/python/parallel/test_allreduce_fusion.py
index fcbee10587..b8bf9ccc0f 100644
--- a/tests/ut/python/parallel/test_allreduce_fusion.py
+++ b/tests/ut/python/parallel/test_allreduce_fusion.py
@@ -23,7 +23,7 @@ from tests.dataset_mock import MindData
 from mindspore import context
 from mindspore.common.api import _executor
 from mindspore.parallel import _cost_model_context as cost_model_context
-
+from mindspore.parallel._auto_parallel_context import auto_parallel_context
 
 
 class Dataset(MindData):
@@ -105,6 +105,7 @@ def train_common(net):
     epoch_size = 2
     device_num=4
     context.reset_auto_parallel_context()
+    auto_parallel_context().set_enable_all_reduce_fusion(enable_all_reduce_fusion=True)
     context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, device_num=device_num, parameter_broadcast=False)
     context.set_context(mode=context.GRAPH_MODE)
 

From dfde76af8852b9a9fa942c573579a4f1f548809f Mon Sep 17 00:00:00 2001
From: Xiaoda Zhang <zhangxiaoda@huawei.com>
Date: Fri, 24 Apr 2020 10:17:37 +0800
Subject: [PATCH 046/242] delete the 'simplify_cal' attribute in
 'set_algo_parameters' and 'get_algo_parameters' interface

---
 mindspore/ccsrc/pipeline/init.cc              |  4 ----
 mindspore/parallel/algo_parameter_config.py   | 20 +++++--------------
 .../parallel/test_auto_parallel_two_matmul.py | 11 ++--------
 3 files changed, 7 insertions(+), 28 deletions(-)

diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc
index 86e6d436b7..3d170b8e75 100644
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/init.cc
@@ -206,10 +206,6 @@ PYBIND11_MODULE(_c_expression, m) {
          "Set the parameter cost_model_gamma of the DP algorithm")
     .def("get_costmodel_gamma", &CostModelContext::costmodel_gamma,
          "Get the parameter cost_model_gamma of the DP algorithm.")
-    .def("set_simplify_cal", &CostModelContext::set_costmodel_simplify_cal,
-         "Set the parameter cost_model_simplify_cal of the DP algorithm.")
-    .def("get_simplify_cal", &CostModelContext::costmodel_simplify_cal,
-         "Get the parameter cost_model_simplify_cal of the DP algorithm.")
     .def("set_costmodel_communi_threshold", &CostModelContext::set_costmodel_communi_threshold,
          "Set the parameter cost_model_communi_threshold of the DP algorithm.")
     .def("get_costmodel_communi_threshold", &CostModelContext::costmodel_communi_threshold,
diff --git a/mindspore/parallel/algo_parameter_config.py b/mindspore/parallel/algo_parameter_config.py
index 244156da33..5c13c13153 100644
--- a/mindspore/parallel/algo_parameter_config.py
+++ b/mindspore/parallel/algo_parameter_config.py
@@ -45,14 +45,6 @@ class _AlgoParameterConfig():
         if self._config_handle is None:
             raise ValueError("Config handle is none!!!")
 
-    def set_simplify_cal(self, simplify_cal):
-        self.check_config_handle()
-        self._config_handle.set_simplify_cal(simplify_cal)
-
-    def get_simplify_cal(self):
-        self.check_config_handle()
-        return self._config_handle.get_simplify_cal()
-
     def set_fully_use_devices(self, not_fully):
         self.check_config_handle()
         self._config_handle.set_fully_use_devices(not_fully)
@@ -118,7 +110,6 @@ def _algo_parameter_config():
 
 
 set_algo_parameters_config_func_map = {
-    "simplify_cal": _algo_parameter_config().set_simplify_cal,
     "fully_use_devices": _algo_parameter_config().set_fully_use_devices,
     "elementwise_op_strategy_follow": _algo_parameter_config().set_elementwise_op_strategy_follow,
     "tensor_slice_align_enable": _algo_parameter_config().set_tensor_slice_align_enable,
@@ -126,14 +117,13 @@ set_algo_parameters_config_func_map = {
 
 
 get_algo_parameters_config_func_map = {
-    "simplify_cal": _algo_parameter_config().get_simplify_cal,
     "fully_use_devices": _algo_parameter_config().get_fully_use_devices,
     "elementwise_op_strategy_follow": _algo_parameter_config().get_elementwise_op_strategy_follow,
     "tensor_slice_align_enable": _algo_parameter_config().get_tensor_slice_align_enable,
     "tensor_slice_align_size": _algo_parameter_config().get_tensor_slice_align_size}
 
 
-@args_type_check(simplify_cal=bool, tensor_slice_align_enable=bool, tensor_slice_align_size=int,
+@args_type_check(tensor_slice_align_enable=bool, tensor_slice_align_size=int,
                  fully_use_devices=bool, elementwise_op_strategy_follow=bool)
 def set_algo_parameters(**kwargs):
     """
@@ -143,10 +133,10 @@ def set_algo_parameters(**kwargs):
         Attribute name is needed.
 
     Args:
-        simplify_cal (bool): Whether simplifying calculations in strategy-searching algorithm. Default: True
-        tensor_slice_align_enable (bool): Whether checking tensor slice shape. Default: False
-        tensor_slice_align_size (int): The minimum tensor slice shape, the value must be in [1, 1024]. Default: 16
-        fully_use_devices (bool): Whether generating strategies that fully use all available devices. Default: True
+        tensor_slice_align_enable (bool): Whether checking tensor slice shape for MatMul. Default: False
+        tensor_slice_align_size (int): The minimum tensor slice shape of MatMul, the value must be in [1, 1024].
+            Default: 16
+        fully_use_devices (bool): Whether ONLY generating strategies that fully use all available devices. Default: True
         elementwise_op_strategy_follow (bool): Whether the elementwise operator have the same strategies as its
             subsequent operators. Default: False
 
diff --git a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
index 848c8025cb..2e2ddd8f32 100644
--- a/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
+++ b/tests/ut/python/parallel/test_auto_parallel_two_matmul.py
@@ -97,13 +97,8 @@ def test_two_matmul():
     assert costmodel_communi_bias == 1024.0
 
 
-    set_algo_parameters(simplify_cal=True,
-                                          tensor_slice_align_enable=False,
-                                          tensor_slice_align_size=32,
-                                          fully_use_devices=False,
-                                          elementwise_op_strategy_follow=False)
-    para_simplify_cal = get_algo_parameters("simplify_cal")
-    assert para_simplify_cal == True
+    set_algo_parameters(tensor_slice_align_enable=False, tensor_slice_align_size=32,
+                        fully_use_devices=False, elementwise_op_strategy_follow=False)
     para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable")
     assert para_slice_align_enable == False
     para_slice_align_size = get_algo_parameters("tensor_slice_align_size")
@@ -114,8 +109,6 @@ def test_two_matmul():
     assert elementwise_op_strategy_follow == False
 
     reset_algo_parameters()
-    para_simplify_cal = get_algo_parameters("simplify_cal")
-    assert para_simplify_cal == True
     para_slice_align_enable = get_algo_parameters("tensor_slice_align_enable")
     assert para_slice_align_enable == False
     para_slice_align_size = get_algo_parameters("tensor_slice_align_size")

From ee7a64018c1282468692bddf1473e13ac38c5ab3 Mon Sep 17 00:00:00 2001
From: VectorSL <shiliang10@huawei.com>
Date: Thu, 23 Apr 2020 19:42:31 +0800
Subject: [PATCH 047/242] gpu update conv kernel for auto-mixed-precision

---
 .../ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h      |  2 +-
 mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h     | 11 +++++++++--
 .../kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h     | 11 +++++++++--
 .../kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h      | 11 +++++++++--
 4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h
index b929bbee50..3bf141fc0b 100644
--- a/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h
@@ -218,7 +218,7 @@ class BinaryOpGpuKernel : public GpuKernel {
       }
     }
     CHECK_CUDNN_RET_WITH_EXCEPT(
-      cudnnSetOpTensorDescriptor(opTensor_descriptor_, tensor_op_, cudnn_data_type_, CUDNN_NOT_PROPAGATE_NAN),
+      cudnnSetOpTensorDescriptor(opTensor_descriptor_, tensor_op_, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN),
       "cudnnSetOpTensorDescriptor failed");
     return;
   }
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
index 75b2a97cf8..546e706d2b 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
@@ -142,10 +142,14 @@ class Conv2dGpuFwdKernel : public GpuKernel {
       }
       CHECK_CUDNN_RET_WITH_EXCEPT(
         cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_,
-                                        CUDNN_CROSS_CORRELATION, cudnn_data_type_),
+                                        CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
         "cudnnSetConvolution2dDescriptor failed");
       input_descriptor_real = input_desc_;
     }
+    if (cudnn_data_type_ == CUDNN_DATA_HALF) {
+      CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH),
+                                  "cudnnSetConvolutionMathType failed.")
+    }
     SelectAlgorithm(input_descriptor_real);
     InitSizeLists();
     return true;
@@ -240,7 +244,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
                                 "cudnnSetTensor4dDescriptor failed");
     CHECK_CUDNN_RET_WITH_EXCEPT(
       cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_,
-                                      dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_),
+                                      dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
       "cudnnSetConvolution2dDescriptor failed");
   }
 
@@ -276,6 +280,9 @@ class Conv2dGpuFwdKernel : public GpuKernel {
         "cudnnGetConvolutionForwardAlgorithm_v7 failed");
       conv_algorithm_ = perf_results.algo;
     }
+    if (cudnn_data_type_ == CUDNN_DATA_HALF) {
+      conv_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
+    }
   }
   cudnnHandle_t cudnn_handle_;
   cudnnTensorDescriptor_t input_desc_;
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
index e481fd448e..6b7ee657c2 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
@@ -141,10 +141,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
       }
       CHECK_CUDNN_RET_WITH_EXCEPT(
         cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_,
-                                        CUDNN_CROSS_CORRELATION, cudnn_data_type_),
+                                        CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
         "GetConvolution2dDescriptor failed");
       x_desc_real = x_desc_;
     }
+    if (cudnn_data_type_ == CUDNN_DATA_HALF) {
+      CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH),
+                                  "cudnnSetConvolutionMathType failed.")
+    }
     SelectAlgorithm(x_desc_real);
     InitSizeLists();
     return true;
@@ -239,7 +243,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
                                 "cudnnSetTensor4dDescriptor failed");
     CHECK_CUDNN_RET_WITH_EXCEPT(
       cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_,
-                                      dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_),
+                                      dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
       "cudnnSetConvolution2dDescriptor failed");
   }
   void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) {
@@ -258,6 +262,9 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
         "GetConvolutionBackwardFilterAlgorithm failed");
       algo_ = perf_results.algo;
     }
+    if (cudnn_data_type_ == CUDNN_DATA_HALF) {
+      algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
+    }
   }
   void GetFilterShape(const CNodePtr &kernel_node, std::vector<int> *filter_shape) {
     auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("filter_sizes")->cast<ValueTuplePtr>()->value();
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
index 008abcc658..af12083364 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
@@ -142,10 +142,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
       }
       CHECK_CUDNN_RET_WITH_EXCEPT(
         cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_,
-                                        CUDNN_CROSS_CORRELATION, cudnn_data_type_),
+                                        CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
         "cudnnSetConvolution2dDescriptor failed");
       dx_desc_real = dx_desc_;
     }
+    if (cudnn_data_type_ == CUDNN_DATA_HALF) {
+      CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH),
+                                  "cudnnSetConvolutionMathType failed.")
+    }
     SelectAlgorithm(dx_desc_real);
     InitSizeLists();
     return true;
@@ -239,7 +243,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
                                 "cudnnSetTensor4dDescriptor failed");
     CHECK_CUDNN_RET_WITH_EXCEPT(
       cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_,
-                                      dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_),
+                                      dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
       "cudnnSetConvolution2dDescriptor failed");
   }
   void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) {
@@ -258,6 +262,9 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
         "cudnnGetConvolutionBackwardDataAlgorithm_v7 failed");
       algo_ = perf_results.algo;
     }
+    if (cudnn_data_type_ == CUDNN_DATA_HALF) {
+      algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
+    }
   }
   void GetInputShape(const CNodePtr &kernel_node, std::vector<int> *input_shape) {
     auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("input_sizes")->cast<ValueTuplePtr>()->value();

From 425276d43df1b7f2943188300f45cbb24a9f978e Mon Sep 17 00:00:00 2001
From: yao_yf <yaoyifan1@huawei.com>
Date: Fri, 24 Apr 2020 11:35:50 +0800
Subject: [PATCH 048/242] auto parallel prelu support prelu

---
 .../ccsrc/parallel/ops_info/prelu_info.cc      | 10 +++++++---
 tests/ut/python/parallel/test_prelu.py         | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc b/mindspore/ccsrc/parallel/ops_info/prelu_info.cc
index fed361616b..14483e97a1 100644
--- a/mindspore/ccsrc/parallel/ops_info/prelu_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/prelu_info.cc
@@ -32,7 +32,7 @@ namespace parallel {
  * prelu has 2 input
  *  A: A float tensor of shape [NCHW] representing the output of the preview layer.
  *  w: Float Tensor, w > 0: there is only two shapes are legitimate: 1, or the number of channels at input.
- *  the strategy of w should equal to the channel dimension of strategy of A
+ *  the strategy of w should equal to the channel dimension of strategy of A, or equal to 1
  */
 Status PReLUInfo::CheckStrategy(const StrategyPtr &strategy) {
   if (CheckStrategyValue(strategy, inputs_shape_, is_auto_parallel_) != SUCCESS) {
@@ -52,7 +52,7 @@ Status PReLUInfo::CheckStrategy(const StrategyPtr &strategy) {
     }
     return FAILED;
   }
-  if (stra[0][PRELU_CHANNEL_INDEX] != stra[1][0]) {
+  if (stra[0][PRELU_CHANNEL_INDEX] != stra[1][0] && inputs_shape_[1][0] != 1) {
     if (is_auto_parallel_) {
       MS_LOG(DEBUG) << name_ << ": Invalid channel strategy.";
     } else {
@@ -107,7 +107,11 @@ Status PReLUInfo::InferTensorMap() {
   }
 
   TensorMap param_tensor_map;
-  param_tensor_map.push_back(input_tensor_map.at(1));
+  if (inputs_shape_[1][0] == 1) {
+    param_tensor_map.push_back(-1);
+  } else {
+    param_tensor_map.push_back(input_tensor_map.at(1));
+  }
   inputs_tensor_map_.push_back(input_tensor_map);
   inputs_tensor_map_.push_back(param_tensor_map);
   outputs_tensor_map_.push_back(input_tensor_map);
diff --git a/tests/ut/python/parallel/test_prelu.py b/tests/ut/python/parallel/test_prelu.py
index d3ad1cc710..5638c9cdbd 100755
--- a/tests/ut/python/parallel/test_prelu.py
+++ b/tests/ut/python/parallel/test_prelu.py
@@ -166,3 +166,21 @@ def test_prelu_parallel_success4():
     w = Tensor(np.random.rand(16),dtype=ms.float32)
     net = GradWrap(NetWithLoss(Net(strategy)))
     _executor.compile(net, x, w)
+
+def test_prelu_parallel_success5():
+    class Net(nn.Cell):
+        def __init__(self, strategy):
+            super().__init__()
+            self.prelu = P.PReLU().set_strategy(strategy)
+        def construct(self, x, y):
+            out = self.prelu(x, y)
+            return out
+    context.reset_auto_parallel_context()
+    context.set_auto_parallel_context(device_num=64, global_rank=0)
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
+    strategy = ((2, 4, 4, 2), (1, ))
+    x = Tensor(np.random.rand(4, 16, 32, 64),dtype=ms.float32)
+    w = Tensor(np.random.rand(1),dtype=ms.float32)
+    net = GradWrap(NetWithLoss(Net(strategy)))
+    _executor.compile(net, x, w)
+

From c8a1a24ce36927198e3fd9e3eae6e233c50beb36 Mon Sep 17 00:00:00 2001
From: buxue <yiren19920727@163.com>
Date: Thu, 23 Apr 2020 21:21:15 +0800
Subject: [PATCH 049/242] fix the infer of TruncatedNormal and a bug of
 structure output and a bug of tensorslice ellipsis

---
 .../ccsrc/operator/composite/composite.cc     | 13 ++++---
 mindspore/ccsrc/pipeline/pipeline_ge.cc       | 31 +++++++++-------
 .../composite/multitype_ops/getitem_impl.py   | 15 ++++++++
 mindspore/ops/operations/array_ops.py         | 35 ++++++++-----------
 tests/ut/python/ops/test_ops.py               |  2 +-
 tests/ut/python/ops/test_tensor_slice.py      |  7 ++--
 6 files changed, 60 insertions(+), 43 deletions(-)

diff --git a/mindspore/ccsrc/operator/composite/composite.cc b/mindspore/ccsrc/operator/composite/composite.cc
index 11ab31a292..88db8b8ff8 100644
--- a/mindspore/ccsrc/operator/composite/composite.cc
+++ b/mindspore/ccsrc/operator/composite/composite.cc
@@ -1084,6 +1084,7 @@ int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple,
   std::vector<unsigned int> shrink;
   auto slice_tuple_eles = slice_tuple->elements();
   size_t ellipsis_num = 0;
+
   for (size_t index = 0; index < slice_tuple_size; index++) {
     if (slice_tuple_eles[index]->isa<AbstractSlice>()) {
       AbstractSlicePtr slice = dyn_cast<AbstractSlice>(slice_tuple_eles[index]);
@@ -1118,12 +1119,13 @@ int GenerateStridedSliceParametersFromTuple(const AbstractTuplePtr &slice_tuple,
                       << slice_tuple_eles[index]->ToString();
   }
 
-  for (size_t index = slice_tuple_size; index < shape_size; index++) {
-    begin->push_back(0);
-    end->push_back(shape[index]);
-    strides->push_back(1);
+  if (ellipsis_num == 0) {
+    for (size_t index = slice_tuple_size; index < shape_size; index++) {
+      begin->push_back(0);
+      end->push_back(shape[index]);
+      strides->push_back(1);
+    }
   }
-
   return ConvertBinaryToDecimal(shrink);
 }
 
@@ -1199,6 +1201,7 @@ FuncGraphPtr TensorSlice::GenerateFuncGraph(const AbstractBasePtrList &args_spec
       if (scalar_ptr->BuildValue()->cast<BoolImmPtr>()->value()) {
         return ExpandADim(ret_graph, tensor_node);
       }
+      MS_LOG(EXCEPTION) << "TensorSlice not support the index is False.";
     }
     shrink_axis_mask = GenerateStridedSliceParametersFromNumber(scalar_ptr, shape, &begin, &end, &strides);
   } else if (args_spec_list[1]->isa<AbstractEllipsis>()) {
diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc
index 1da85b5699..3d4b8b3e2a 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.cc
+++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc
@@ -319,19 +319,24 @@ void RunGEInitGraph(const py::dict &init_params, const std::string &phase) {
 
 py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::tuple &data, size_t *count) {
   MS_EXCEPTION_IF_NULL(cnode_data);
-  if (*count >= data.size()) {
-    MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
-                      << " less than the number of elements required. ";
-  }
 
   if (cnode_data->isa<AbstractTensor>()) {
+    if (*count >= data.size()) {
+      MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
+                        << " less than the number of elements required. ";
+    }
+
     BaseShapePtr shape = cnode_data->BuildShape();
-    auto shape_act = shape->cast<abstract::ShapePtr>()->shape();
-    Tensor tensor_exp = py::cast<Tensor>(data[*count]);
-    if (shape_act != tensor_exp.shape()) {
-      MS_LOG(EXCEPTION) << "The shape of the tensor returned from GE is not the same as "
-                           "the shape of the tensor derived from ME.";
+    if (!shape->isa<abstract::Shape>()) {
+      MS_LOG(EXCEPTION) << "The shape of the tensor derived is not Shape, is " << shape->ToString();
+    }
+    auto shape_me = shape->cast<abstract::ShapePtr>()->shape();
+    auto shape_ge = py::cast<Tensor>(data[*count]).shape();
+    if (shape_ge != shape_me) {
+      MS_LOG(EXCEPTION) << "The shape of the " << *count << "th tensor returned: " << shape_ge
+                        << " is not the same as the shape of the tensor derived: " << shape_me;
     }
+
     return data[(*count)++];
   }
 
@@ -357,11 +362,11 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data,
     return ValuePtrToPyData(GetValueNode(output_node));
   }
 
-  if (*count >= data.size()) {
-    MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
-                      << " less than the number of elements required. ";
-  }
   if (output_node->isa<Parameter>()) {
+    if (*count >= data.size()) {
+      MS_LOG(EXCEPTION) << "The number of elements in the outputs : " << data.size()
+                        << " less than the number of elements required. ";
+    }
     return data[(*count)++];
   }
 
diff --git a/mindspore/ops/composite/multitype_ops/getitem_impl.py b/mindspore/ops/composite/multitype_ops/getitem_impl.py
index 56617c06a8..540dd28b37 100644
--- a/mindspore/ops/composite/multitype_ops/getitem_impl.py
+++ b/mindspore/ops/composite/multitype_ops/getitem_impl.py
@@ -147,6 +147,21 @@ def _tensor_getitem_by_number(data, number_index):
     return _tensor_slice(data, number_index)
 
 
+@getitem.register("Tensor", "None")
+def _tensor_getitem_by_none(data, index):
+    """
+    Getting item of tensor by None.
+
+    Inputs:
+        data (Tensor): A tensor.
+        index (None): None.
+
+    Outputs:
+        Tensor, element type is as same as the element type of data.
+    """
+    return _tensor_slice(data, index)
+
+
 @getitem.register("Tensor", "Slice")
 def _tensor_getitem_by_slice(data, slice_index):
     """
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index 43398a5f29..6f51dd0a1c 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -633,7 +633,7 @@ class TruncatedNormal(PrimitiveWithInfer):
         dtype (:class:`mindspore.dtype`): Data type. Default: mindspore.float32.
 
     Inputs:
-        - **shape** (Tensor) - Shape of output tensor. The shape is a 1-D tensor, and type is int.
+        - **shape** (tuple[int]) - Shape of output tensor, is a tuple of positive int.
 
     Outputs:
         Tensor, type of output tensor is same as attribute `dtype`.
@@ -651,16 +651,10 @@ class TruncatedNormal(PrimitiveWithInfer):
         validator.check_typename('dtype', dtype, mstype.number_type)
 
     def __infer__(self, shape):
-        shape_t = shape['value']
-        validator.check_subclass("shape", shape['dtype'], mstype.tensor)
-        shape_n = shape_t.asnumpy()
-        if shape_n.ndim != 1:
-            raise ValueError('The rank of input shape must be 1.')
-        if shape_n.dtype not in (np.int32, np.int64):
-            raise TypeError('The type of input shape must be int32 or int64.')
-        for i, item in enumerate(shape_n):
-            validator.check_integer(f"shape[{i}]", item.item(), 0, Rel.GT)
-        out = {'shape': tuple(shape_n),
+        shape_value = shape['value']
+        for i, value in enumerate(shape_value):
+            validator.check_integer(f'{i}th value of shape', value, 0, Rel.GT)
+        out = {'shape': shape_value,
                'dtype': mstype.tensor_type(self.dtype),
                'value': None}
         return out
@@ -1648,20 +1642,19 @@ class StridedSlice(PrimitiveWithInfer):
         validator.check_type('shrink_axis_mask', shrink_axis_mask, [int])
 
     def __infer__(self, x, begin, end, strides):
-        begin_shape, end_shape, strides_shape = begin['shape'], end['shape'], strides['shape']
-        if begin_shape != strides_shape or end_shape != strides_shape:
-            raise ValueError("The shape of begin, end and strides in 'StridedSlice' must be equal.")
-
-        validator.check_const_input("begin", begin['value'])
-        validator.check_const_input("end", end['value'])
-        validator.check_const_input("strides", strides['value'])
+        x_shape = x['shape']
+        x_shp_len = len(x_shape)
+        begin_v, end_v, strides_v = begin['value'], end['value'], strides['value']
+        validator.check_const_input("begin", begin_v)
+        validator.check_const_input("end", end_v)
+        validator.check_const_input("strides", strides_v)
         validator.check_type("begin", begin['value'], [tuple])
         validator.check_type("end", end['value'], [tuple])
         validator.check_type("strides", strides['value'], [tuple])
+        if len(begin_v) != x_shp_len or len(end_v) != x_shp_len or len(strides_v) != x_shp_len:
+            raise ValueError(f"The length of begin index{begin_v}, end index{end_v} and strides{strides_v} "
+                             f"must be equal to the dims({x_shp_len}) of input.")
 
-        x_shape = x['shape']
-        x_shp_len = len(x_shape)
-        begin_v, end_v, strides_v = begin['value'], end['value'], strides['value']
         ret_shape = []
         append_dimensions = []
         shrink_pos = bin(self.shrink_axis_mask)[::-1]
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 8b14ea2366..22df3d1fd3 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -372,7 +372,7 @@ test_case_math_ops = [
         'desc_bprop': [[3]]}),
     ('TruncatedNormal', {
         'block': P.TruncatedNormal(),
-        'desc_const': [Tensor(np.array([1, 2, 3]))],
+        'desc_const': [[1, 2, 3]],
         'desc_inputs': [],
         'skip': ['backward'],
         'add_fake_input': True}),
diff --git a/tests/ut/python/ops/test_tensor_slice.py b/tests/ut/python/ops/test_tensor_slice.py
index 08ba143de8..b547fdc06b 100644
--- a/tests/ut/python/ops/test_tensor_slice.py
+++ b/tests/ut/python/ops/test_tensor_slice.py
@@ -52,8 +52,9 @@ class NetWorkSliceEllipsis(Cell):
     def construct(self, tensor):
         ret0 = tensor[0:4:2, ..., 1] + self.tensor_ret0
         ret1 = tensor[...] + self.tensor_ret1
-        ret2 = tensor[True] + self.tensor_ret2
-        return ret0, ret1, ret2
+        ret2 = tensor[None] + self.tensor_ret2
+        ret3 = tensor[True] + self.tensor_ret2
+        return ret0, ret1, ret2, ret3
 
 
 class NetWorkReduceDimension(Cell):
@@ -305,7 +306,7 @@ test_cases = [
         'block': NetWorkReduceToScalar(),
         'desc_inputs': [Tensor(np.ones([6, 8, 10], np.int32))],
     }),
-    ('NetWorkSliceEllipsis', {
+    ('TensorSliceEllipsis', {
         'block': NetWorkSliceEllipsis(),
         'desc_inputs': [Tensor(np.ones([6, 7, 8, 9], np.int32))],
     }),

From bb527bc5cf544e80db0d0dd47d507648afff45e0 Mon Sep 17 00:00:00 2001
From: Ziyan <gongziyan1@huawei.com>
Date: Fri, 24 Apr 2020 15:47:10 +0800
Subject: [PATCH 050/242] add LARSUpdate example

---
 mindspore/ops/operations/nn_ops.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 9827975fd0..55bf88fcd8 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -2488,6 +2488,27 @@ class LARSUpdate(PrimitiveWithInfer):
 
     Outputs:
         Tensor, representing the new gradient.
+
+    Examples:
+        >>> from mindspore import Tensor
+        >>> from mindspore.ops import operations as P
+        >>> from mindspore.ops import functional as F
+        >>> import mindspore.nn as nn
+        >>> import numpy as np
+        >>> class Net(nn.Cell):
+        >>>     def __init__(self):
+        >>>         super(Net, self).__init__()
+        >>>         self.lars = P.LARSUpdate()
+        >>>         self.reduce = P.ReduceSum()
+        >>>     def construct(self, weight, gradient):
+        >>>         w_square_sum = self.reduce(F.square(weight))
+        >>>         grad_square_sum = self.reduce(F.square(gradient))
+        >>>         grad_t = self.lars(weight, gradient, w_square_sum, grad_square_sum, 0.0, 1.0)
+        >>>         return grad_t
+        >>> weight = np.random.random(size=(2, 3)).astype(np.float32)
+        >>> gradient = np.random.random(size=(2, 3)).astype(np.float32)
+        >>> net = Net()
+        >>> ms_output = net(Tensor(weight), Tensor(gradient))
     """
 
     @prim_attr_register

From 2ab211ae041afd007917c7112fbc1c7af48bb8e7 Mon Sep 17 00:00:00 2001
From: lichenever <lichentrue@163.com>
Date: Thu, 23 Apr 2020 19:36:27 +0800
Subject: [PATCH 051/242] support reshape parameter

---
 mindspore/ccsrc/parallel/step_parallel.cc     | 25 ++++++-
 mindspore/context.py                          |  7 +-
 .../python/parallel/test_reshape_parameter.py | 75 +++++++++++++++++++
 3 files changed, 104 insertions(+), 3 deletions(-)
 create mode 100644 tests/ut/python/parallel/test_reshape_parameter.py

diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index c24c14abf6..12341a4759 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -1523,9 +1523,32 @@ std::shared_ptr<TensorLayout> FindPrevParallelCareNodeLayout(const AnfNodePtr &n
   return nullptr;
 }
 
+std::shared_ptr<TensorLayout> CreateParameterLayout(const AnfNodePtr &node) {
+  // Create DataParallel tensor layout for parameter(support WideDeep).
+  CheckGlobalDeviceManager();
+  int32_t dev_num = SizeToInt(g_device_manager->GetDeviceListByStageId(0).size());
+  TensorLayout input_tensor_layout;
+  // create input_shape
+  Shapes inputs_shape = GetNodeShape(node);
+  Shape input_shape_array = inputs_shape[0];
+  if (input_shape_array.empty()) {
+    MS_LOG(EXCEPTION) << "Don't support reshape a scalar parameter.";
+  }
+  // create tensor_map
+  size_t shape_size = input_shape_array.size();
+  TensorMap input_tensor_map_array(SizeToInt(shape_size) - 1, -1);
+  input_tensor_map_array.insert(input_tensor_map_array.begin(), 0);
+  // create dev_matrix
+  Shape dev_matrix_array = {dev_num};
+  if (input_tensor_layout.InitFromVector(dev_matrix_array, input_tensor_map_array, input_shape_array) != SUCCESS) {
+    MS_LOG(EXCEPTION) << "Create tensor layout for parameter failed.";
+  }
+  return std::make_shared<TensorLayout>(input_tensor_layout);
+}
+
 std::shared_ptr<TensorLayout> FindPrevLayout(const AnfNodePtr &node) {
   if (node->isa<Parameter>()) {
-    MS_LOG(EXCEPTION) << "Failure: parameter before reshape is not supported temporary";
+    return CreateParameterLayout(node);
   }
   if (!node->isa<CNode>()) {
     return nullptr;
diff --git a/mindspore/context.py b/mindspore/context.py
index 159522a87a..18f24d20d1 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -415,8 +415,11 @@ def set_auto_parallel_context(**kwargs):
     Args:
         device_num (int): Available device number, the value must be in [1, 4096]. Default: 1.
         global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0.
-        mirror_mean (bool): Whether to perform mean operator after all-reduce of mirror. Default: False.
-        cast_before_mirror (bool): Insert Mirror Op after the cast if this flag is True. Default: True.
+        mirror_mean (bool): Whether to perform mean operator after all-reduce of mirror.
+                     "stand_alone" do not support mirror_mean. Default: False.
+        cast_before_mirror (bool): Insert Mirror Op after the cast if this flag is True.
+                     "stand_alone", "data_parallel" and "hybrid_parallel" do not support
+                     cast_before_mirror. Default: True.
         parallel_mode (str): There are five kinds of parallel modes, "stand_alone", "data_parallel",
                      "hybrid_parallel", "semi_auto_parallel" and "auto_parallel". Default: "stand_alone".
 
diff --git a/tests/ut/python/parallel/test_reshape_parameter.py b/tests/ut/python/parallel/test_reshape_parameter.py
new file mode 100644
index 0000000000..be35e8f43a
--- /dev/null
+++ b/tests/ut/python/parallel/test_reshape_parameter.py
@@ -0,0 +1,75 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import mindspore as ms
+import mindspore.nn as nn
+from mindspore.ops import operations as P
+from mindspore.ops import composite as C
+from mindspore import Tensor
+from mindspore import context
+from mindspore.common.api import _executor
+from tests.ut.python.ops.test_math_ops import VirtualLoss
+import numpy as np
+
+
+class NetWithLoss(nn.Cell):
+    def __init__(self, network):
+        super(NetWithLoss, self).__init__()
+        self.loss = VirtualLoss()
+        self.network = network
+
+    def construct(self, x, y):
+        predict = self.network(x, y)
+        return self.loss(predict)
+
+
+class GradWrap(nn.Cell):
+    def __init__(self, network):
+        super(GradWrap, self).__init__()
+        self.network = network
+
+    def construct(self, x, y):
+        return C.grad_all(self.network)(x, y)
+
+
+class Net(nn.Cell):
+    def __init__(self, strategy):
+        super().__init__()
+        self.reshape = P.Reshape()
+        self.mul = P.Mul().set_strategy(strategy)
+        self.relu = P.ReLU()
+
+    def construct(self, x, y):
+        out = self.reshape(x, (10000, 36, 1))
+        out = self.mul(out, y)
+        out = self.relu(out)
+        return out
+
+
+def test_reshape_parameter_data_parallel():
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
+    strategy = ((8, 1, 1), (8, 1, 1))
+    net = GradWrap(NetWithLoss(Net(strategy)))
+    x = Tensor(np.ones([10000, 36]), dtype=ms.float32)
+    y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32)
+    _executor.compile(net, x, y)
+
+
+def test_reshape_parameter_model_parallel():
+    context.set_auto_parallel_context(device_num=8, global_rank=0, parallel_mode="semi_auto_parallel")
+    strategy = ((4, 2, 1), (4, 2, 1))
+    net = GradWrap(NetWithLoss(Net(strategy)))
+    x = Tensor(np.ones([10000, 36]), dtype=ms.float32)
+    y = Tensor(np.ones([10000, 36, 1]), dtype=ms.float32)
+    _executor.compile(net, x, y)

From 31aae36106043189f613f1efba97ed02445b84f7 Mon Sep 17 00:00:00 2001
From: candanzg <zhangshucheng@huawei.com>
Date: Thu, 23 Apr 2020 17:33:51 +0800
Subject: [PATCH 052/242] Tensor assign with integer

Signed-off-by: candanzg <zhangshucheng@huawei.com>
---
 .../multitype_ops/_multitype_ops_util.py      |  73 +++++----
 .../composite/multitype_ops/setitem_impl.py   | 146 ++++++++++--------
 tests/ut/python/ops/test_tensor_slice.py      |  52 ++++++-
 3 files changed, 183 insertions(+), 88 deletions(-)

diff --git a/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py b/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py
index 49773ff8ad..3a44b1e483 100644
--- a/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py
+++ b/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py
@@ -15,6 +15,7 @@
 
 """constexpr util"""
 
+from functools import reduce
 import numpy as np
 from ...primitive import constexpr
 from ....common.tensor import Tensor
@@ -23,26 +24,27 @@ from ...._extends.utils import Slice
 
 @constexpr
 def check_equal(param1, param2, msg="{},{}"):
+    """Checks whether the two parameters are equal or not."""
     if param1 != param2:
         raise ValueError(msg.format(param1, param2))
     return param1
 
 @constexpr
 def check_tensor_setitem_index(index, element_type=None):
-    """Check tuple index type of tensor assignment."""
+    """Checks tuple index type of tensor assignment."""
     if index is None:
         raise ValueError("Tensor's index cannot be None.")
     # eg. Tensor[Slice] = u
     if isinstance(index, Slice):
         return True
-    # eg. Tensor[Tuple] = u
+    # eg. Tensor[tuple] = u
     if isinstance(index, tuple):
         if not index:
             raise ValueError("Tensor's index cannot be empty.")
-        # eg. Tensor[Tuple(Slice...)] = u
-        if not isinstance(index[0], Slice):
-            raise ValueError("Index of type '{}' is not supported yet.".format(type(index[0])))
-        return True
+        # eg. Tensor[tuple(Slice...)] = u
+        if isinstance(index[0], (Slice, int)):
+            return True
+        raise ValueError("Index of type '{}' is not supported yet.".format(type(index[0])))
     # eg. Tensor[Tensor[dtype=bool]] = u
     if index == mstype.tensor:
         if element_type is None or element_type != mstype.bool_:
@@ -57,7 +59,7 @@ def check_tensor_setitem_index(index, element_type=None):
 @constexpr
 def is_same_type(inst, type_):
     """
-    Check whether an object is an instance of a target type.
+    Checks whether an object is an instance of a target type.
 
     Inputs:
         inst (mindspore.dtype): Inspected type.
@@ -69,34 +71,23 @@ def is_same_type(inst, type_):
     return inst == type_
 
 
-@constexpr
-def error_msg(msg="", format_values=""):
-    """
-    Used to throw exception information.
-
-    Inputs:
-        msg (str): information content.
-    """
-
-    raise ValueError(msg.format(*format_values))
-
 def slice_expand(input_slices, shape):
     """
-    Convert slice to indices.
+    Converts slice to indices.
 
     Inputs:
-        slices (List or Tuple(List, ...)): Slice tuple or slice.
-        shape (Tuple): The shape of a sensor is an integer element tuple.
+        slices (Union[Slice, tuple[Slice]]): Slice tuple or slice.
+        shape (tuple): The shape of a sensor is an integer element tuple.
 
     Outputs:
-        (List, List, List), This is expressed as (begins, ends, strides).
+        tuple[list], This is expressed as (begins, ends, strides).
     """
     begin = []
     end = []
     strides = []
     index = 0
     slices = None
-    # Slice or Tuple(Slice...)
+    # Slice or tuple(Slice...)
     if isinstance(input_slices, Slice):
         slices = (input_slices,)
     elif isinstance(input_slices, (tuple, list)) and input_slices and isinstance(input_slices[0], Slice):
@@ -119,14 +110,15 @@ def slice_expand(input_slices, shape):
         index += 1
     return begin, end, strides
 
+
 @constexpr
 def slice2indices(input_slices, shape):
     """
-    Convert slice to indices.
+    Converts slice to indices.
 
     Inputs:
-        slices (List or Tuple(List, ...)): Slice tuple or slice.
-        shape (Tuple): The shape of a sensor is an integer element tuple.
+        slices (Union[Slice, tuple[Slice]]): Slice tuple or slice.
+        shape (tuple): The shape of a tensor is an integer element tuple.
 
     Outputs:
         Tensor, the shape is (n, 1).
@@ -145,6 +137,7 @@ def slice2indices(input_slices, shape):
 
 @constexpr
 def check_indices(indices_size, index):
+    """Checks indices whether is empty."""
     if indices_size < 1:
         raise ValueError("The tensor's index is unreasonable. index:{}".format(index))
     return indices_size
@@ -152,6 +145,7 @@ def check_indices(indices_size, index):
 
 @constexpr
 def check_indices_value_size(indices_size, value_size):
+    """Checks if the sizes are already matched."""
     if value_size < 1:
         raise ValueError("The value assigned to tensor cannot be empty.")
     if value_size > 1:
@@ -160,3 +154,30 @@ def check_indices_value_size(indices_size, value_size):
                 "The value given to tensor does not match the index size. \
                 value size:{}, indics size:{}".format(value_size, indices_size))
     return value_size
+
+@constexpr
+def integer_to_indices(index, shape):
+    """Converts int or tuple[int] to indices."""
+    size = reduce(lambda x, y: x * y, shape)
+    range_ = np.arange(size).reshape(shape)
+    value = range_[index]
+    value = value.reshape(-1, 1)
+    return Tensor(value, dtype=mstype.int32)
+
+@constexpr
+def tuple_element_is_slice(indexs):
+    """Judges tuple element type."""
+    if not indexs:
+        raise ValueError("Tensor's index cannot be empty.")
+    if isinstance(indexs, tuple) and isinstance(indexs[0], Slice):
+        return True
+    return False
+
+@constexpr
+def tuple_element_is_int(indexs):
+    """Judges tuple element type."""
+    if not indexs:
+        raise ValueError("Tensor's index cannot be empty.")
+    if isinstance(indexs, tuple) and isinstance(indexs[0], int):
+        return True
+    return False
diff --git a/mindspore/ops/composite/multitype_ops/setitem_impl.py b/mindspore/ops/composite/multitype_ops/setitem_impl.py
index 742ee57166..13d4a1ffce 100644
--- a/mindspore/ops/composite/multitype_ops/setitem_impl.py
+++ b/mindspore/ops/composite/multitype_ops/setitem_impl.py
@@ -25,15 +25,14 @@ setitem = base.MultitypeFuncGraph('setitem')
 @setitem.register("List", "Number", "String")
 def _list_setitem_with_string(data, number_index, value):
     """
-    Assign value to list.
+    Assigns value to list.
 
     Inputs:
         data (list): Data of type lis.
         number_index (Number): Index of data.
-        value (String): Value given.
 
     Outputs:
-        List, type is same as the element type of data.
+        list, type is same as the element type of data.
     """
     return F.list_setitem(data, number_index, value)
 
@@ -41,7 +40,7 @@ def _list_setitem_with_string(data, number_index, value):
 @setitem.register("List", "Number", "Number")
 def _list_setitem_with_number(data, number_index, value):
     """
-    Assign value to list.
+    Assigns value to list.
 
     Inputs:
         data (list): Data of type lis.
@@ -49,7 +48,7 @@ def _list_setitem_with_number(data, number_index, value):
         value (Number): Value given.
 
     Outputs:
-        List, type is same as the element type of data.
+        list, type is same as the element type of data.
     """
     return F.list_setitem(data, number_index, value)
 
@@ -57,7 +56,7 @@ def _list_setitem_with_number(data, number_index, value):
 @setitem.register("List", "Number", "Tensor")
 def _list_setitem_with_Tensor(data, number_index, value):
     """
-    Assign value to list.
+    Assigns value to list.
 
     Inputs:
         data (list): Data of type lis.
@@ -65,7 +64,7 @@ def _list_setitem_with_Tensor(data, number_index, value):
         value (Tensor): Value given.
 
     Outputs:
-        List, type is same as the element type of data.
+        list, type is same as the element type of data.
     """
     return F.list_setitem(data, number_index, value)
 
@@ -73,15 +72,15 @@ def _list_setitem_with_Tensor(data, number_index, value):
 @setitem.register("List", "Number", "List")
 def _list_setitem_with_List(data, number_index, value):
     """
-    Assign value to list.
+    Assigns value to list.
 
     Inputs:
         data (list): Data of type lis.
         number_index (Number): Index of data.
-        value (List): Value given.
+        value (list): Value given.
 
     Outputs:
-        List, type is same as the element type of data.
+        list, type is same as the element type of data.
     """
     return F.list_setitem(data, number_index, value)
 
@@ -89,15 +88,15 @@ def _list_setitem_with_List(data, number_index, value):
 @setitem.register("Dictionary", "String", "Tensor")
 def _dict_setitem_with_tensor(data, key, value):
     """
-    Assign value to dictionary.
+    Assigns value to dictionary.
 
     Inputs:
-        data (Dictionary): Data of type dict.
+        data (dict): Data of type dict.
         key (str): Key of the data.
         value (Tensor): Value given.
 
     Outputs:
-        Dict, type is as same as the element type of data.
+        dict, type is as same as the element type of data.
     """
     return F.dict_setitem(data, key, value)
 
@@ -105,15 +104,15 @@ def _dict_setitem_with_tensor(data, key, value):
 @setitem.register("Dictionary", "String", "Number")
 def _dict_setitem_with_number(data, key, value):
     """
-    Assign value to dictionary.
+    Assigns value to dictionary.
 
     Inputs:
-        data (Dictionary): Data of type dict.
+        data (dict): Data of type dict.
         key (str): Key of the data.
         value (Number): Value given.
 
     Outputs:
-        Dict, type is as same as the element type of data.
+        dict, type is as same as the element type of data.
     """
     return F.dict_setitem(data, key, value)
 
@@ -219,14 +218,14 @@ def _tensor_setitem_with_slice_v4(data, input_slice, value):
     Tensor assignment.
 
     Note:
-        Syntax support: A[Slice] = U
+        Syntax support: A[tuple(Slice)] = U, and A[tuple(Number)] = U
         Restraint condition: A is a Tensor
                              Slice like "1:3, ::, :4:-1"
                              U is a Tensor(size=1) or Tensor(size>1)
 
     Inputs:
         data (Tensor): Assigned tensor.
-        input_slice (Tuple(Slice)): Slice expression.
+        input_slice (Union[tuple[Slice], tuple[Number]]): Slice expression.
         value (Number): Assignment value.
 
     Outputs:
@@ -236,39 +235,43 @@ def _tensor_setitem_with_slice_v4(data, input_slice, value):
 
 
 def _tensor_assgin_tensor(data, input_slice, value):
-    """Given a tensor value assign to tensor by slice"""
-    # 1. condition
+    """Assigns a tensor value to the tensor by slice."""
     result = None
     check_result = mult_util.check_tensor_setitem_index(input_slice)
     if check_result:
         data_shape = F.shape(data)
-        data_size = F.size(data)
-        data_dtype = F.dtype(data)
         indices = mult_util.slice2indices(input_slice, data_shape)
-        indices_size = F.size(indices)
-        indices_size = mult_util.check_indices(indices_size, input_slice)
-        update = F.fill(data_dtype, (indices_size,), 1)
-        condition_1d = F.scatter_nd(indices, update, (data_size,))
-        condition_1d = F.cast(condition_1d, mstype.bool_)
-        condition = F.reshape(condition_1d, data_shape)
-        # 2. u
-        value_fill = None
-        value_size = F.size(value)
-
-        value_size = mult_util.check_indices_value_size(indices_size, value_size)
-        if value_size == 1:
-            value_fill = F.fill(data_dtype, (indices_size,), 1)
-            value = F.cast(value, data_dtype)
-            value_fill = F.tensor_mul(value_fill, value)
-        elif value_size > 1:
-            value_fill = F.reshape(value, (indices_size,))
-        value_1d = F.scatter_nd(indices, value_fill, (data_size,))
-        u = F.reshape(value_1d, data_shape)
-        # A[slice]= u -> A[B]=U -> select(B, U, A)
-        result = F.select(condition, u, data)
+        is_tuple_int = mult_util.tuple_element_is_int(input_slice)
+        if is_tuple_int:
+            indices = mult_util.integer_to_indices(input_slice, data_shape)
+        result = _tensor_indices_tensor(data, data_shape, input_slice, indices, value)
     return result
 
 
+def _tensor_indices_tensor(data, data_shape, index, indices, value):
+    """Assigns a tensor value to the tensor."""
+    data_size = F.size(data)
+    data_dtype = F.dtype(data)
+    indices_size = F.size(indices)
+    indices_size = mult_util.check_indices(indices_size, index)
+    update = F.fill(data_dtype, (indices_size,), 1)
+    condition_1d = F.scatter_nd(indices, update, (data_size,))
+    condition_1d = F.cast(condition_1d, mstype.bool_)
+    condition = F.reshape(condition_1d, data_shape)
+    value_fill = None
+    value_size = F.size(value)
+
+    value_size = mult_util.check_indices_value_size(indices_size, value_size)
+    if value_size == 1:
+        value_fill = F.fill(data_dtype, (indices_size,), 1)
+        value = F.cast(value, data_dtype)
+        value_fill = F.tensor_mul(value_fill, value)
+    elif value_size > 1:
+        value_fill = F.reshape(value, (indices_size,))
+    value_1d = F.scatter_nd(indices, value_fill, (data_size,))
+    u = F.reshape(value_1d, data_shape)
+    return F.select(condition, u, data)
+
 @setitem.register("Tensor", "Slice", "Number")
 def _tensor_setitem_with_slice_v1(data, input_slice, value):
     """
@@ -297,14 +300,14 @@ def _tensor_setitem_with_slice_v2(data, input_slice, value):
     Tensor assignment.
 
     Note:
-        Syntax support: A[Slice] = u
+        Syntax support: A[tuple(Slice)] = u, and A[tuple(Number)] = u
         Restraint condition: A is a Tensor.
                              Slice like "1:3, ::, :4:-1"
                              u is a scalar
 
     Inputs:
         data (Tensor): Assigned tensor.
-        input_slice (Tuple(Slice)): slice expression.
+        input_slice (Union[tuple[Slice], tuple[Number]]): slice expression.
         value (Number): Assignment value.
 
     Outputs:
@@ -314,25 +317,46 @@ def _tensor_setitem_with_slice_v2(data, input_slice, value):
 
 
 def _tensor_assgin_number(data, input_slice, value):
-    """Given a scalar assign to tensor by slice"""
-    # 1. condition
+    """Givens a scalar assign to tensor by slice"""
     check_result = mult_util.check_tensor_setitem_index(input_slice)
     result = None
     if check_result:
         data_shape = F.shape(data)
-        data_size = F.size(data)
-        data_dtype = F.dtype(data)
         indices = mult_util.slice2indices(input_slice, data_shape)
-        indices_size = F.size(indices)
-        indices_size = mult_util.check_indices(indices_size, input_slice)
-        update = F.fill(data_dtype, (indices_size,), 1)
-        condition_1d = F.scatter_nd(indices, update, (data_size,))
-        condition_1d = F.cast(condition_1d, mstype.bool_)
-        condition = F.reshape(condition_1d, data_shape)
-        # 2. u
-        value_fill = F.fill(data_dtype, (indices_size,), value)
-        value_1d = F.scatter_nd(indices, value_fill, (data_size,))
-        u = F.reshape(value_1d, data_shape)
-        # A[slice]= u -> A[B]=U -> select(B, U, A)
-        result = F.select(condition, u, data)
+        is_tuple_int = mult_util.tuple_element_is_int(input_slice)
+        if is_tuple_int:
+            indices = mult_util.integer_to_indices(input_slice, data_shape)
+        result = _tensor_indices_number(data, data_shape, input_slice, indices, value)
     return result
+
+
+def _tensor_indices_number(data, data_shape, index, indices, value):
+    """Assigns a scalar value to the tensor."""
+    data_size = F.size(data)
+    data_dtype = F.dtype(data)
+    indices_size = F.size(indices)
+    indices_size = mult_util.check_indices(indices_size, index)
+    update = F.fill(data_dtype, (indices_size,), 1)
+    condition_1d = F.scatter_nd(indices, update, (data_size,))
+    condition_1d = F.cast(condition_1d, mstype.bool_)
+    condition = F.reshape(condition_1d, data_shape)
+    value_fill = F.fill(data_dtype, (indices_size,), value)
+    value_1d = F.scatter_nd(indices, value_fill, (data_size,))
+    u = F.reshape(value_1d, data_shape)
+    return F.select(condition, u, data)
+
+
+@setitem.register("Tensor", "Number", "Number")
+def _tensor_setitem_with_int_v1(data, index, value):
+    """Syntax: A[1] = 3"""
+    data_shape = F.shape(data)
+    indices = mult_util.integer_to_indices(index, data_shape)
+    return _tensor_indices_number(data, data_shape, index, indices, value)
+
+
+@setitem.register("Tensor", "Number", "Tensor")
+def _tensor_setitem_with_int_v2(data, index, value):
+    """Syntax: A[1] = Tensor"""
+    data_shape = F.shape(data)
+    indices = mult_util.integer_to_indices(index, data_shape)
+    return _tensor_indices_tensor(data, data_shape, index, indices, value)
diff --git a/tests/ut/python/ops/test_tensor_slice.py b/tests/ut/python/ops/test_tensor_slice.py
index 08ba143de8..3145a12f0a 100644
--- a/tests/ut/python/ops/test_tensor_slice.py
+++ b/tests/ut/python/ops/test_tensor_slice.py
@@ -138,7 +138,7 @@ class TensorAssignWithSlice(Cell):
         z = a
         return z
 
-def test_tensor_assign_with_slice():
+def test_tensor_assign():
     context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
     net = TensorAssignWithSlice()
     net2= TensorAssignWithSlice2()
@@ -147,6 +147,7 @@ def test_tensor_assign_with_slice():
     a = np.arange(60).reshape(3,4,5)
     b = Tensor([1])
     Ta = Tensor(a)
+    Ta4d = Tensor(a.reshape(1,3,4,5))
     Tb= Tensor([1,3])
     Tc= Tensor([])
     t = Tensor([1, 2, 3, 4, 5, 6, 7, 8])
@@ -184,6 +185,47 @@ def test_tensor_assign_with_slice():
     with pytest.raises(ValueError):
         net_e1(Ta, 2)
 
+    net = TensorAssignWithInteger()
+    # Error for A[Number] = scalar/Tensor
+    # 1. A[Number] = U, U is a Tensor, u.size not match
+    with pytest.raises(ValueError):
+        net(Ta, Tb)
+    with pytest.raises(ValueError):
+        net(Ta, Tc)
+    # 2. A[Number] = U, the number index error
+    with pytest.raises(IndexError):
+        net(Ta4d, b)
+
+    # Error for A[(n,m)] = scalar/Tensor
+    # 1. A[(n,m)] = U, U is a tensor. u.size not match
+    net = TensorAssignWithTupleInteger()
+    with pytest.raises(ValueError):
+        net(Ta, Tc)
+    with pytest.raises(ValueError):
+        net(Ta, Tb)
+    # 2. A[(n,m)] = U, the number index error
+    with pytest.raises(IndexError):
+        net(Ta4d, b)
+
+class TensorAssignWithInteger(Cell):
+    def __init__(self):
+        super(TensorAssignWithInteger, self).__init__()
+
+    def construct(self, a, b):
+        a[1] = 1
+        a[0] = b
+        return a
+
+class TensorAssignWithTupleInteger(Cell):
+    def __init__(self):
+        super(TensorAssignWithTupleInteger, self).__init__()
+
+    def construct(self, a, b):
+        a[(1)] = 1
+        a[(1)] = b
+        a[(1,1)] = b
+        a[(1,1)] = 1
+        return a
 
 class TensorAssignWithBoolTensorIndex(Cell):
     def __init__(self):
@@ -273,6 +315,14 @@ def test_tensor_assign_bool_index():
         net4(Ta, u_scalar)
 
 test_cases = [
+    ('TensorAssignWithTupleInteger', {
+        'block': TensorAssignWithTupleInteger(),
+        'desc_inputs': [Ta,  u_tensor],
+    }),
+    ('TensorAssignWithInteger', {
+        'block': TensorAssignWithInteger(),
+        'desc_inputs': [Ta,  u_tensor],
+    }),
     ('TensorAssignWithSlice', {
         'block': TensorAssignWithSlice(),
         'desc_inputs': [Ta,  u_tensor],

From bfa0f98c1c91c8e6805d41010dac0f12b01c69ce Mon Sep 17 00:00:00 2001
From: jonyguo <guozhijian@huawei.com>
Date: Fri, 24 Apr 2020 11:52:02 +0800
Subject: [PATCH 053/242] fix: update mindrecord example docs

---
 example/convert_to_mindrecord/README.md | 57 +++++++++++++++++++++----
 1 file changed, 48 insertions(+), 9 deletions(-)

diff --git a/example/convert_to_mindrecord/README.md b/example/convert_to_mindrecord/README.md
index 8d3b25e311..008cff5ee3 100644
--- a/example/convert_to_mindrecord/README.md
+++ b/example/convert_to_mindrecord/README.md
@@ -1,15 +1,53 @@
-# MindRecord generating guidelines
+# Guideline to Efficiently Generating MindRecord
 
 <!-- TOC -->
 
-- [MindRecord generating guidelines](#mindrecord-generating-guidelines)
+- [What does the example do](#what-does-the-example-do)
+- [Example test for ImageNet](#example-test-for-imagenet)
+- [How to use the example for other dataset](#how-to-use-the-example-for-other-dataset)
     - [Create work space](#create-work-space)
     - [Implement data generator](#implement-data-generator)
     - [Run data generator](#run-data-generator)
 
+
 <!-- /TOC -->
 
-## Create work space
+## What does the example do
+
+This example provides an efficient way to generate MindRecord. Users only need to define the parallel granularity of training data reading and the data reading function of a single task. That is, they can efficiently convert the user's training data into MindRecord.
+
+1.  run_template.sh: entry script, users need to modify parameters according to their own training data.
+2.  writer.py: main script, called by run_template.sh, it mainly reads user training data in parallel and generates MindRecord.
+3.  template/mr_api.py: uers define their own parallel granularity of training data reading and single task reading function through the template.
+
+## Example test for ImageNet
+
+1. Download and prepare the ImageNet dataset as required.
+
+    > [ImageNet dataset download address](http://image-net.org/download)
+
+    Store the downloaded ImageNet dataset in a folder. The folder contains all images and a mapping file that records labels of the images.
+
+    In the mapping file, there are three columns, which are separated by spaces. They indicate image classes, label IDs, and label names. The following is an example of the mapping file:
+    ```
+    n02119789 1 pen
+    n02100735 2 notbook
+    n02110185 3 mouse
+    n02096294 4 orange
+    ```
+2. Edit run_imagenet.sh and modify the parameters
+3. Run the bash script  
+    ```bash  
+    bash run_imagenet.sh
+    ```  
+4. Performance result
+
+    |  Training Data |  General API | Current Example |  Env  |
+    | ---- | ---- | ---- | ---- |
+    |ImageNet(140G)|  2h40m |  50m  |  CPU: Intel Xeon Gold 6130 x 64, Memory: 256G, Storage: HDD |
+
+## How to use the example for other dataset
+### Create work space
 
 Assume the dataset name is 'xyz'
 * Create work space from template
@@ -18,7 +56,7 @@ Assume the dataset name is 'xyz'
     cp -r template xyz
     ```
 
-## Implement data generator 
+### Implement data generator
 
 Edit dictionary data generator  
 * Edit file 
@@ -27,20 +65,21 @@ Edit dictionary data generator
     vi xyz/mr_api.py
     ```
 
- Two API, 'mindrecord_task_number' and 'mindrecord_dict_data', must be implemented
+Two API, 'mindrecord_task_number' and 'mindrecord_dict_data', must be implemented
 - 'mindrecord_task_number()' returns number of tasks. Return 1 if data row is generated serially. Return N if generator can be split into N parallel-run tasks.
 - 'mindrecord_dict_data(task_id)' yields dictionary data row by row. 'task_id' is 0..N-1, if N is return value of mindrecord_task_number()
 
-
 Tricky for parallel run
-- For imagenet, one directory can be a task.
+- For ImageNet, one directory can be a task.
 - For TFRecord with multiple files, each file can be a task.
 - For TFRecord with 1 file only, it could also be split into N tasks. Task_id=K means: data row is picked only if (count % N == K) 
 
+### Run data generator
 
-## Run data generator 
 * run python script 
     ```shell
     cd ${your_mindspore_home}/example/convert_to_mindrecord
-    python writer.py --mindrecord_script imagenet [...]
+    python writer.py --mindrecord_script xyz [...]
     ```
+    > You can put this command in script **run_xyz.sh** for easy execution
+

From 80e5bb982b4f3b262b0bb2582de0c746f8b6e5ed Mon Sep 17 00:00:00 2001
From: liubuyu <liubuyu1@huawei.com>
Date: Fri, 24 Apr 2020 16:10:06 +0800
Subject: [PATCH 054/242] fix log print

---
 mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc
index 127451851e..63e0fb888d 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_select.cc
@@ -543,11 +543,6 @@ bool IsValidKernelInfo(const std::shared_ptr<CNode> &kernel_node, const kernel::
     if (!IsShapeMatchFormat(shape, format)) {
       return false;
     }
-    for (auto shape_value : shape) {
-      if (shape_value == 0) {
-        MS_LOG(EXCEPTION) << "Dimension size of the tensor shape should be a positive integer, but got " << shape_value;
-      }
-    }
     return true;
   };
   for (size_t index = 0; index < kernel_build_info.GetOutputNum(); ++index) {
@@ -593,10 +588,12 @@ void TbeMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<Ke
     if (context_ptr->execution_mode() == kPynativeMode) {
       kernel_info_list->push_back(parse_info);
     } else {
-      if (IsValidKernelInfo(kernel_node, *(parse_info)) && CheckSupported(kernel_node, parse_info)) {
-        kernel_info_list->push_back(parse_info);
-      } else {
-        MS_LOG(INFO) << "CheckSupported Failed for TBE op" << op_name << " kernel info.";
+      if (IsValidKernelInfo(kernel_node, *(parse_info))) {
+        if (CheckSupported(kernel_node, parse_info)) {
+          kernel_info_list->push_back(parse_info);
+        } else {
+          MS_LOG(INFO) << "CheckSupported Failed for TBE op" << op_name << " kernel info.";
+        }
       }
     }
   }

From b4151efe81ffd7ad9a345d7cf9cb56621d18f8f1 Mon Sep 17 00:00:00 2001
From: jonyguo <guozhijian@huawei.com>
Date: Fri, 24 Apr 2020 17:27:14 +0800
Subject: [PATCH 055/242] fix: imagenet to mindrecord failed when the pic size
 is 0

---
 mindspore/mindrecord/tools/imagenet_to_mr.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mindspore/mindrecord/tools/imagenet_to_mr.py b/mindspore/mindrecord/tools/imagenet_to_mr.py
index 8c8de689c1..e941e76477 100644
--- a/mindspore/mindrecord/tools/imagenet_to_mr.py
+++ b/mindspore/mindrecord/tools/imagenet_to_mr.py
@@ -111,6 +111,9 @@ class ImageNetToMR:
                 image_file = open(file_name, "rb")
                 image_bytes = image_file.read()
                 image_file.close()
+                if not image_bytes:
+                    logger.warning("The image file: {} is invalid.".format(file_name))
+                    continue
                 data["data"] = image_bytes
                 yield data
 

From 82c5daa94781ae71c468cc32419a3ad5f099fbc3 Mon Sep 17 00:00:00 2001
From: geekun <youjiangkun@huawei.com>
Date: Fri, 24 Apr 2020 18:02:16 +0800
Subject: [PATCH 056/242] fix codedex for mindspore/vm,transform,util,operator

---
 mindspore/ccsrc/operator/composite/composite.h    | 1 -
 mindspore/ccsrc/operator/composite/unpack_call.cc | 1 -
 mindspore/ccsrc/transform/convert.h               | 1 -
 mindspore/ccsrc/utils/any.cc                      | 2 +-
 mindspore/ccsrc/utils/callbacks.cc                | 1 -
 mindspore/ccsrc/utils/callbacks_ge.cc             | 1 -
 mindspore/ccsrc/utils/callbacks_ge.h              | 2 --
 mindspore/ccsrc/utils/convert_utils.cc            | 2 +-
 mindspore/ccsrc/utils/log_adapter.cc              | 2 +-
 9 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/mindspore/ccsrc/operator/composite/composite.h b/mindspore/ccsrc/operator/composite/composite.h
index 429cf5341a..6c4bede82b 100644
--- a/mindspore/ccsrc/operator/composite/composite.h
+++ b/mindspore/ccsrc/operator/composite/composite.h
@@ -210,7 +210,6 @@ class TensorSlice : public MetaFuncGraph {
   FuncGraphPtr ExpandADim(const FuncGraphPtr &ret_graph, const AnfNodePtr &tensor_node) const;
 };
 using TensorSlicePtr = std::shared_ptr<TensorSlice>;
-
 }  // namespace prim
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/operator/composite/unpack_call.cc b/mindspore/ccsrc/operator/composite/unpack_call.cc
index 122f276657..6363d495c5 100644
--- a/mindspore/ccsrc/operator/composite/unpack_call.cc
+++ b/mindspore/ccsrc/operator/composite/unpack_call.cc
@@ -89,6 +89,5 @@ REGISTER_PYBIND_DEFINE(UnpackCall_, ([](const py::module *m) {
                          (void)py::class_<UnpackCall, MetaFuncGraph, std::shared_ptr<UnpackCall>>(*m, "UnpackCall_")
                            .def(py::init<std::string &>());
                        }));
-
 }  // namespace prim
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/transform/convert.h b/mindspore/ccsrc/transform/convert.h
index 5596e20f19..39efd5d287 100644
--- a/mindspore/ccsrc/transform/convert.h
+++ b/mindspore/ccsrc/transform/convert.h
@@ -252,7 +252,6 @@ class DfGraphConvertor {
   bool training_ = false;
   bool distribute_ = false;
 };
-
 }  // namespace transform
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/utils/any.cc b/mindspore/ccsrc/utils/any.cc
index 3cb89f5dd7..80b8d86658 100644
--- a/mindspore/ccsrc/utils/any.cc
+++ b/mindspore/ccsrc/utils/any.cc
@@ -26,7 +26,7 @@ bool AnyIsLiteral(const Any &any) {
   static const std::type_index typeid_float = std::type_index(typeid(float));
   static const std::type_index typeid_bool = std::type_index(typeid(bool));
 
-  std::type_index typeid_any = std::type_index(any.type());
+  auto typeid_any = std::type_index(any.type());
   return typeid_int == typeid_any || typeid_float == typeid_any || typeid_bool == typeid_any;
 }
 
diff --git a/mindspore/ccsrc/utils/callbacks.cc b/mindspore/ccsrc/utils/callbacks.cc
index 06bf1c73ab..ad9751c332 100644
--- a/mindspore/ccsrc/utils/callbacks.cc
+++ b/mindspore/ccsrc/utils/callbacks.cc
@@ -26,7 +26,6 @@
 
 namespace mindspore {
 namespace callbacks {
-
 const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback";
 const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "_checkpoint_cb_for_save_op";
 const char PYTHON_FUN_PROCESS_SUMMARY[] = "_summary_cb_for_save_op";
diff --git a/mindspore/ccsrc/utils/callbacks_ge.cc b/mindspore/ccsrc/utils/callbacks_ge.cc
index b4c9fda634..da817b3f78 100644
--- a/mindspore/ccsrc/utils/callbacks_ge.cc
+++ b/mindspore/ccsrc/utils/callbacks_ge.cc
@@ -24,7 +24,6 @@
 
 namespace mindspore {
 namespace callbacks {
-
 const char PYTHON_MOD_CALLBACK_MODULE[] = "mindspore.train.callback";
 const char PYTHON_FUN_PROCESS_CHECKPOINT[] = "_checkpoint_cb_for_save_op";
 const char PYTHON_FUN_PROCESS_SUMMARY[] = "_summary_cb_for_save_op";
diff --git a/mindspore/ccsrc/utils/callbacks_ge.h b/mindspore/ccsrc/utils/callbacks_ge.h
index 08f5bb59db..f9088f3f5a 100644
--- a/mindspore/ccsrc/utils/callbacks_ge.h
+++ b/mindspore/ccsrc/utils/callbacks_ge.h
@@ -26,12 +26,10 @@
 
 namespace mindspore {
 namespace callbacks {
-
 using mindspore::tensor::TensorPtr;
 
 uint32_t CheckpointSaveCallback(uint32_t, const std::map<std::string, ge::Tensor> &);
 uint32_t SummarySaveCallback(uint32_t, const std::map<std::string, ge::Tensor> &);
-
 }  // namespace callbacks
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc
index 049c1dcdb8..df4a8656f5 100644
--- a/mindspore/ccsrc/utils/convert_utils.cc
+++ b/mindspore/ccsrc/utils/convert_utils.cc
@@ -335,7 +335,7 @@ py::object VectorRefToPyData(const VectorRef &value_list) {
   py::object ret;
   MS_LOG(DEBUG) << "vector_ref";
   size_t value_size = value_list.size();
-  py::tuple ref_tuple = py::tuple(value_size);
+  auto ref_tuple = py::tuple(value_size);
   for (size_t i = 0; i < value_size; i++) {
     ref_tuple[i] = BaseRefToPyData(value_list[i]);
   }
diff --git a/mindspore/ccsrc/utils/log_adapter.cc b/mindspore/ccsrc/utils/log_adapter.cc
index deb4f3838e..b23916b4fe 100644
--- a/mindspore/ccsrc/utils/log_adapter.cc
+++ b/mindspore/ccsrc/utils/log_adapter.cc
@@ -35,7 +35,7 @@ static std::string GetTime() {
             now_time.tm_hour, now_time.tm_min, now_time.tm_sec);
 #else
   struct timeval cur_time;
-  (void)gettimeofday(&cur_time, NULL);
+  (void)gettimeofday(&cur_time, nullptr);
 
   struct tm now;
   (void)localtime_r(&cur_time.tv_sec, &now);

From 56f785f7e6fecc4bd307d30a31f0fa9586891845 Mon Sep 17 00:00:00 2001
From: lirongzhen1 <lirongzhen1@huawei.com>
Date: Fri, 24 Apr 2020 18:04:41 +0800
Subject: [PATCH 057/242] add context configration

---
 tests/ut/python/parallel/test_auto_parallel_resnet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ut/python/parallel/test_auto_parallel_resnet.py b/tests/ut/python/parallel/test_auto_parallel_resnet.py
index ae7bd952d9..726b3f356f 100644
--- a/tests/ut/python/parallel/test_auto_parallel_resnet.py
+++ b/tests/ut/python/parallel/test_auto_parallel_resnet.py
@@ -303,7 +303,7 @@ def train_32k_8p(epoch_size=3, batch_size=32, num_classes=32768):
     return allreduce_fusion_dict
 
 
-def test_train_32k_8p_fusion1(epoch_size=3, batch_size=32, num_classes=32768): #1048576 #131072 #32768 #8192
+def train_32k_8p_fusion1(epoch_size=3, batch_size=32, num_classes=32768): #1048576 #131072 #32768 #8192
     cost_model_context.set_cost_model_context(costmodel_gamma=0.001, costmodel_beta=400.0)
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
@@ -475,7 +475,7 @@ def test_train_32k_8p_fusion1(epoch_size=3, batch_size=32, num_classes=32768): #
     cost_model_context.reset_cost_model_context()
 
 
-def test_train_32k_8p_fusion2(epoch_size=3, batch_size=32, num_classes=32768): #1048576 #131072 #32768 #8192
+def train_32k_8p_fusion2(epoch_size=3, batch_size=32, num_classes=32768): #1048576 #131072 #32768 #8192
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=2)
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_time=0.1)
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.05)

From 1e8997f4cb55b0808f76f5c8bdbd1c763b2b16f7 Mon Sep 17 00:00:00 2001
From: kswang <wangkaisheng2@huawei.com>
Date: Thu, 23 Apr 2020 20:56:42 +0800
Subject: [PATCH 058/242] optimize sort for mem reuse and fix memreuse bug

---
 .../mem_reuse/mem_reuse_allocator.cc          |   3 +-
 .../ccsrc/session/anf_runtime_algorithm.cc    |  15 +-
 .../ccsrc/session/anf_runtime_algorithm.h     |   1 -
 mindspore/ccsrc/session/kernel_graph.cc       | 133 +++++++++---------
 mindspore/ccsrc/session/kernel_graph.h        |   5 +-
 5 files changed, 76 insertions(+), 81 deletions(-)

diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc b/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc
index 1cecd170d3..8a3647d980 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc
+++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_reuse_allocator.cc
@@ -251,9 +251,10 @@ void BestFitMemReuse::ReleaseNodeUnusedOutput(const KernelDef *kernel_def_ptr) {
 }
 
 size_t BestFitMemReuse::FindIndx(const std::vector<MembufPtr> &membuf_ptr_list, int fac_idx) const {
-  size_t membuf_index = 0;
+  size_t membuf_index = membuf_ptr_list.size();
   for (size_t n = 0; n < membuf_ptr_list.size(); ++n) {
     auto membuf = membuf_ptr_list[n];
+    MS_EXCEPTION_IF_NULL(membuf);
     if (membuf->index_ == fac_idx) {
       membuf_index = n;
       break;
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
index 525ff44dd8..3d5be5298a 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
@@ -851,17 +851,12 @@ void AnfRuntimeAlgorithm::SetNodeInput(const CNodePtr &node, const AnfNodePtr &i
 
 bool AnfRuntimeAlgorithm::IsCommunicationOp(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_name = AnfAlgo::GetCNodeName(node);
-  auto kernel_type = AnfAlgo::GetKernelType(node);
-  if (kernel_name == kAllReduceOpName || kernel_type == HCCL_KERNEL) {
-    return true;
+  if (!node->isa<CNode>()) {
+    return false;
   }
-  return false;
-}
-
-bool AnfRuntimeAlgorithm::IsAllReduceOp(const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  if (node->isa<CNode>() && AnfAlgo::GetCNodeName(node) == kAllReduceOpName) {
+  auto kernel_name = AnfAlgo::GetCNodeName(node);
+  if (kernel_name == kAllReduceOpName || kernel_name == kAllGatherOpName || kernel_name == kBroadcastOpName ||
+      kernel_name == kReduceScatterOpName) {
     return true;
   }
   return false;
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.h b/mindspore/ccsrc/session/anf_runtime_algorithm.h
index 78359cdd5a..a70a63b678 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.h
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.h
@@ -176,7 +176,6 @@ class AnfRuntimeAlgorithm {
   // get real input index for some tbe ops which input order is different between me and tbe impl
   static size_t GetRealInputIndex(const AnfNodePtr &anf_node, const size_t cur_index);
   static bool IsCommunicationOp(const AnfNodePtr &node);
-  static bool IsAllReduceOp(const AnfNodePtr &node);
   static bool IsGetNext(const NotNull<AnfNodePtr> &node);
 };
 }  // namespace session
diff --git a/mindspore/ccsrc/session/kernel_graph.cc b/mindspore/ccsrc/session/kernel_graph.cc
index 139539ccb2..cdadf389a6 100755
--- a/mindspore/ccsrc/session/kernel_graph.cc
+++ b/mindspore/ccsrc/session/kernel_graph.cc
@@ -49,80 +49,81 @@ std::vector<AnfNodePtr> KernelGraph::outputs() const {
   return std::vector<AnfNodePtr>();
 }
 
-void KernelGraph::SetExecOrderByDefault() {
-  std::stack<AnfNodePtr> seed_nodes;
-  UpdateNodeEdgeList(&seed_nodes);
-  execution_order_.clear();
-  std::unordered_set<AnfNodePtr> visited_nodes;
-  std::queue<AnfNodePtr> zero_input_nodes;
-
-  auto visit_node_descendant = [&visited_nodes, this](const AnfNodePtr &node, std::queue<AnfNodePtr> *visit_queue) {
-    auto it = node_output_edges_.find(node);
-    if (it == node_output_edges_.end()) {
-      // value node and parameter has no input,no need to print log
-      if (node->isa<CNode>()) {
-        MS_LOG(DEBUG) << "Can not find node [" << node->DebugString() << "]";
-      }
-      return;
+void KernelGraph::VisitNodeDescendants(const AnfNodePtr &node, std::queue<AnfNodePtr> *visit_queue,
+                                       std::unordered_set<AnfNodePtr> *visited_nodes) {
+  MS_EXCEPTION_IF_NULL(visit_queue);
+  MS_EXCEPTION_IF_NULL(visited_nodes);
+  auto it = node_output_edges_.find(node);
+  if (it == node_output_edges_.end()) {
+    // value node and parameter has no input,no need to print log
+    if (node->isa<CNode>()) {
+      MS_LOG(DEBUG) << "Can not find node [" << node->DebugString() << "]";
     }
+    return;
+  }
 
-    // visit all reduce node first, then other nodes
-    std::vector<AnfNodePtr> active_nodes;
-    for (const auto &output_edge : it->second) {
-      auto next_node = output_edge.first;
-      if (node_input_num_.find(next_node) == node_input_num_.end()) {
-        MS_EXCEPTION_IF_NULL(next_node);
-        MS_LOG(EXCEPTION) << "Can't find node[" << next_node->DebugString() << "]";
-      }
+  // visit all reduce node first, then other nodes
+  std::vector<AnfNodePtr> active_nodes;
+  for (const auto &output_edge : it->second) {
+    auto next_node = output_edge.first;
+    if (node_input_num_.find(next_node) == node_input_num_.end()) {
       MS_EXCEPTION_IF_NULL(next_node);
-      MS_LOG(DEBUG) << "Decrease input:" << next_node->DebugString() << ",node:" << node->DebugString()
-                    << ",num: " << node_input_num_[next_node] << ",decrease num:" << output_edge.second;
-      if (node_input_num_[next_node] < output_edge.second) {
-        MS_LOG(EXCEPTION) << "Input node:" << next_node->DebugString() << ",node_output_num"
-                          << node_input_num_[next_node] << ",depend edge:" << output_edge.second;
-      }
-      node_input_num_[next_node] = node_input_num_[next_node] - output_edge.second;
-      // allreduce first
-      if (node_input_num_[next_node] == 0 && visited_nodes.find(next_node) == visited_nodes.end()) {
-        (void)visited_nodes.insert(next_node);
-        if (AnfAlgo::IsAllReduceOp(next_node)) {
-          MS_LOG(DEBUG) << "visit node:" << next_node->DebugString();
-          visit_queue->push(next_node);
-        } else {
-          active_nodes.emplace_back(next_node);
-        }
+      MS_LOG(EXCEPTION) << "Can't find node[" << next_node->DebugString() << "]";
+    }
+    MS_EXCEPTION_IF_NULL(next_node);
+    MS_LOG(DEBUG) << "Decrease input:" << next_node->DebugString() << ",node:" << node->DebugString()
+                  << ",num: " << node_input_num_[next_node] << ",decrease num:" << output_edge.second;
+    if (node_input_num_[next_node] < output_edge.second) {
+      MS_LOG(EXCEPTION) << "Input node:" << next_node->DebugString() << ",node_output_num" << node_input_num_[next_node]
+                        << ",depend edge:" << output_edge.second;
+    }
+    node_input_num_[next_node] = node_input_num_[next_node] - output_edge.second;
+    // allreduce first
+    if (node_input_num_[next_node] == 0 && visited_nodes->find(next_node) == visited_nodes->end()) {
+      (void)visited_nodes->insert(next_node);
+      if (AnfAlgo::IsCommunicationOp(next_node)) {
+        MS_LOG(DEBUG) << "visit node:" << next_node->DebugString();
+        visit_queue->push(next_node);
+      } else {
+        active_nodes.emplace_back(next_node);
       }
     }
+  }
 
-    for (auto &node : active_nodes) {
-      MS_LOG(DEBUG) << "visit node:" << node->DebugString();
-      visit_queue->push(node);
-    }
-  };
+  for (auto &node : active_nodes) {
+    MS_LOG(DEBUG) << "visit node:" << node->DebugString();
+    visit_queue->push(node);
+  }
+}
 
-  AnfNodePtr last_allreduce_node = nullptr;
-  std::queue<AnfNodePtr> allreduce_descendants;
-  while (!seed_nodes.empty() || last_allreduce_node != nullptr) {
+void KernelGraph::SetExecOrderByDefault() {
+  std::queue<AnfNodePtr> seed_nodes;
+  UpdateNodeEdgeList(&seed_nodes);
+  execution_order_.clear();
+  std::unordered_set<AnfNodePtr> visited_nodes;
+  std::queue<AnfNodePtr> zero_input_nodes;
+  AnfNodePtr last_communication_node = nullptr;
+  std::queue<AnfNodePtr> communication_descendants;
+  while (!seed_nodes.empty() || last_communication_node != nullptr) {
     // seed nodes first, then visit last all reduce node descendant
     if (seed_nodes.empty()) {
-      visit_node_descendant(last_allreduce_node, &allreduce_descendants);
-      last_allreduce_node = nullptr;
+      VisitNodeDescendants(last_communication_node, &communication_descendants, &visited_nodes);
+      last_communication_node = nullptr;
     } else {
-      zero_input_nodes.push(seed_nodes.top());
+      zero_input_nodes.push(seed_nodes.front());
       seed_nodes.pop();
     }
-
     // all reduce node descendant first, then common queue
-    while (!zero_input_nodes.empty() || !allreduce_descendants.empty()) {
+    while (!zero_input_nodes.empty() || !communication_descendants.empty()) {
       AnfNodePtr node = nullptr;
-      bool is_allreduce_descendant = false;
-      if (allreduce_descendants.empty()) {
+      bool is_communication_descendant = false;
+      if (communication_descendants.empty()) {
         node = zero_input_nodes.front();
         zero_input_nodes.pop();
       } else {
-        node = allreduce_descendants.front();
-        allreduce_descendants.pop();
-        is_allreduce_descendant = true;
+        node = communication_descendants.front();
+        communication_descendants.pop();
+        is_communication_descendant = true;
       }
       // add execute node
       MS_EXCEPTION_IF_NULL(node);
@@ -130,19 +131,18 @@ void KernelGraph::SetExecOrderByDefault() {
         execution_order_.push_back(node->cast<CNodePtr>());
       }
       // for all reduce node, visit last all reduce node descendant
-      if (AnfAlgo::IsAllReduceOp(node)) {
-        if (last_allreduce_node != nullptr) {
-          visit_node_descendant(last_allreduce_node, &allreduce_descendants);
+      if (AnfAlgo::IsCommunicationOp(node)) {
+        if (last_communication_node != nullptr) {
+          VisitNodeDescendants(last_communication_node, &communication_descendants, &visited_nodes);
         }
-        last_allreduce_node = node;
-      } else if (is_allreduce_descendant) {
-        visit_node_descendant(node, &allreduce_descendants);
+        last_communication_node = node;
+      } else if (is_communication_descendant) {
+        VisitNodeDescendants(node, &communication_descendants, &visited_nodes);
       } else {
-        visit_node_descendant(node, &zero_input_nodes);
+        VisitNodeDescendants(node, &zero_input_nodes, &visited_nodes);
       }
     }
   }
-
   CheckLoop();
 }
 
@@ -467,7 +467,7 @@ bool KernelGraph::HandleControlDependNode(const AnfNodePtr &node, std::queue<Anf
   return true;
 }
 
-void KernelGraph::UpdateNodeEdgeList(std::stack<AnfNodePtr> *seed_nodes) {
+void KernelGraph::UpdateNodeEdgeList(std::queue<AnfNodePtr> *seed_nodes) {
   node_output_edges_.clear();
   node_input_num_.clear();
   node_input_edges_.clear();
@@ -483,7 +483,6 @@ void KernelGraph::UpdateNodeEdgeList(std::stack<AnfNodePtr> *seed_nodes) {
       seed_nodes->push(node);
       continue;
     }
-
     if (!node->isa<CNode>()) {
       continue;
     }
diff --git a/mindspore/ccsrc/session/kernel_graph.h b/mindspore/ccsrc/session/kernel_graph.h
index 54b16014a3..a33e8f7bd6 100755
--- a/mindspore/ccsrc/session/kernel_graph.h
+++ b/mindspore/ccsrc/session/kernel_graph.h
@@ -22,7 +22,6 @@
 #include <utility>
 #include <string>
 #include <queue>
-#include <stack>
 #include <map>
 #include <unordered_set>
 #include "ir/func_graph.h"
@@ -94,8 +93,10 @@ class KernelGraph : public FuncGraph {
  private:
   // remove value node form graph
   bool RemoveValueNodeFromGraph(const ValueNodePtr &value_node);
+  void VisitNodeDescendants(const AnfNodePtr &node, std::queue<AnfNodePtr> *visit_queue,
+                            std::unordered_set<AnfNodePtr> *visited_nodes);
   // update node edge list
-  void UpdateNodeEdgeList(std::stack<AnfNodePtr> *seed_nodes);
+  void UpdateNodeEdgeList(std::queue<AnfNodePtr> *seed_nodes);
   // add node depend edge by data edge or control depend
   void AddDependEdge(const AnfNodePtr &node, const AnfNodePtr &input, size_t depend_edge_num);
   // handle control depend

From 2d115cd04e0cd856e0cda160422afed9c8291745 Mon Sep 17 00:00:00 2001
From: eric <eric.zhang1@huawei.com>
Date: Wed, 22 Apr 2020 11:14:35 -0400
Subject: [PATCH 059/242] Added example for multiple iterator

Added new testcase for multi iterator

Addressing review

Fixed typo
---
 .../dataset/engine/datasetops/barrier_op.cc   |  4 +--
 mindspore/dataset/engine/datasets.py          |  2 +-
 tests/ut/python/dataset/test_sync_wait.py     | 32 ++++++++++++++++++-
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
index b0ea7dbd07..b50a7788da 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
@@ -65,8 +65,8 @@ Status BarrierOp::operator()() {
   TaskManager::FindMe()->Post();
 
   // create child iterator, right now this barrier is a pipeline operator
-  int32_t worker_id = 0;
-  int32_t child_idx = 0;
+  const int32_t worker_id = 0;
+  const int32_t child_idx = 0;
   child_iterator_ = std::make_unique<ChildIterator>(this, worker_id, child_idx);
 
   // Loop until eof is true
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 1e16b5161e..317704d533 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -920,7 +920,7 @@ class Dataset:
     def sync_update(self, condition_name, num_batch=None, data=None):
         """
         condition_name (str): The condition name that is used to toggle sending next row
-        step_size (int or None): The number of steps(rows) that are released
+        num_batch (int or None): The number of batches(rows) that are released
                          when pass_rows is None, will update the same number as sync_wait specified
         data (dict or None): The data passed to the callback
         """
diff --git a/tests/ut/python/dataset/test_sync_wait.py b/tests/ut/python/dataset/test_sync_wait.py
index 277499d9ae..7e9fade39d 100644
--- a/tests/ut/python/dataset/test_sync_wait.py
+++ b/tests/ut/python/dataset/test_sync_wait.py
@@ -107,6 +107,7 @@ def test_two_sync():
         if count % 2 == 0:
             dataset.sync_update(condition_name="every 2 batches")
 
+
 def test_sync_epoch():
     """
     Test sync wait with epochs: test sync with epochs in dataset pipeline  
@@ -130,6 +131,34 @@ def test_sync_epoch():
             dataset.sync_update(condition_name="policy", data=data)
 
 
+def test_multiple_iterators(): 
+    """
+    Test sync wait with multiple iterators: will start multiple 
+    """
+    logger.info("test_sync_epoch")
+    batch_size = 30
+    dataset = ds.GeneratorDataset(gen, column_names=["input"])
+
+    aug = Augment(0)
+    dataset = dataset.sync_wait(condition_name="policy", callback=aug.update)
+    dataset = dataset.map(input_columns=["input"], operations=[aug.preprocess])
+    dataset = dataset.batch(batch_size, drop_remainder=True)
+    # 2nd dataset 
+    dataset2 = ds.GeneratorDataset(gen, column_names=["input"])
+
+    aug = Augment(0)
+    dataset2 = dataset2.sync_wait(condition_name="policy", callback=aug.update)
+    dataset2 = dataset2.map(input_columns=["input"], operations=[aug.preprocess])
+    dataset2 = dataset2.batch(batch_size, drop_remainder=True)
+
+    for item1, item2 in zip(dataset.create_dict_iterator(), dataset2.create_dict_iterator()):
+        assert (item1["input"][0] == item2["input"][0]) 
+        data1 = {"loss": item1["input"][0]}
+        data2 = {"loss": item2["input"][0]}
+        dataset.sync_update(condition_name="policy", data=data1)
+        dataset2.sync_update(condition_name="policy", data=data2)
+
+
 def test_sync_exception_01():
     """
     Test sync: with shuffle in sync mode 
@@ -179,4 +208,5 @@ if __name__ == "__main__":
     test_two_sync()
     test_sync_exception_01()
     test_sync_exception_02()
-    test_sync_epoch()
\ No newline at end of file
+    test_sync_epoch()
+    test_multiple_iterators()

From 97a6fc77a1b498e39ca40054109f6b1816bfc2e7 Mon Sep 17 00:00:00 2001
From: zhoufeng <zhoufeng54@huawei.com>
Date: Sat, 25 Apr 2020 14:46:42 +0800
Subject: [PATCH 060/242] only pack vc runtime v14.0

---
 cmake/package.cmake | 2 +-
 graphengine         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/package.cmake b/cmake/package.cmake
index 1961b10025..f0a080e776 100644
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -153,7 +153,7 @@ endif ()
 if (CMAKE_SYSTEM_NAME MATCHES "Windows")
     get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH)
     file(GLOB CXX_LIB_LIST ${CXX_DIR}/*.dll)
-    file(GLOB VC_LIB_LIST $ENV{SystemRoot}/System32/msvcp*.dll $ENV{SystemRoot}/System32/vcomp*.dll)
+    file(GLOB VC_LIB_LIST $ENV{SystemRoot}/System32/msvcp140.dll $ENV{SystemRoot}/System32/vcomp140.dll)
     file(GLOB JPEG_LIB_LIST ${jpeg_turbo_LIBPATH}/*.dll)
     file(GLOB SQLITE_LIB_LIST ${sqlite_LIBPATH}/*.dll)
     install(
diff --git a/graphengine b/graphengine
index 43f5d24337..0c33e9d125 160000
--- a/graphengine
+++ b/graphengine
@@ -1 +1 @@
-Subproject commit 43f5d24337bf785251eefae2d810c7d5684194d6
+Subproject commit 0c33e9d12562953ca4bd6c03cb77da2c2da74acd

From 219bc184399ea1b2827974452d19bc62971836d0 Mon Sep 17 00:00:00 2001
From: jinyaohui <jinyaohui@huawei.com>
Date: Sat, 25 Apr 2020 17:14:53 +0800
Subject: [PATCH 061/242] clean pylint

---
 example/yolov3_coco2017/dataset.py            |  2 +-
 mindspore/_akg/__init__.py                    |  6 ++--
 mindspore/nn/optim/ftrl.py                    | 10 +++++--
 mindspore/ops/operations/nn_ops.py            |  6 ++--
 mindspore/train/amp.py                        |  3 ++
 mindspore/train/callback.py                   |  4 +--
 mindspore/train/model.py                      | 28 +++++++++----------
 .../apps/test_bert_parts.py                   |  9 ++----
 .../components/executor/check_exceptions.py   |  2 +-
 9 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/example/yolov3_coco2017/dataset.py b/example/yolov3_coco2017/dataset.py
index 9c6a0f362d..23d34e0f4f 100644
--- a/example/yolov3_coco2017/dataset.py
+++ b/example/yolov3_coco2017/dataset.py
@@ -18,8 +18,8 @@ from __future__ import division
 
 import os
 import numpy as np
-from PIL import Image
 from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
+from PIL import Image
 import mindspore.dataset as de
 from mindspore.mindrecord import FileWriter
 import mindspore.dataset.transforms.vision.c_transforms as C
diff --git a/mindspore/_akg/__init__.py b/mindspore/_akg/__init__.py
index e3dceaf35e..a343e3532a 100644
--- a/mindspore/_akg/__init__.py
+++ b/mindspore/_akg/__init__.py
@@ -16,6 +16,9 @@
 from __future__ import absolute_import as _abs
 import sys
 import os
+from .op_build import op_build
+from .message import compilewithjson
+
 
 def AKGAddPath():
     """_akg add path."""
@@ -58,6 +61,3 @@ class AKGMetaPathLoader:
 
 
 sys.meta_path.insert(0, AKGMetaPathFinder())
-
-from .op_build import op_build
-from .message import compilewithjson
diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py
index 2bc329f42d..e6f658acae 100644
--- a/mindspore/nn/optim/ftrl.py
+++ b/mindspore/nn/optim/ftrl.py
@@ -14,7 +14,6 @@
 # ============================================================================
 """FTRL"""
 from mindspore.ops import functional as F, composite as C, operations as P
-from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter
 from mindspore.common import Tensor
 import mindspore.common.dtype as mstype
@@ -23,6 +22,8 @@ from mindspore._checkparam import Rel
 from .optimizer import Optimizer, apply_decay, grad_scale
 
 ftrl_opt = C.MultitypeFuncGraph("ftrl_opt")
+
+
 @ftrl_opt.register("Function", "Tensor", "Number", "Number", "Number", "Tensor", "Tensor", "Tensor", "Tensor")
 def _tensor_run_opt(opt, learning_rate, l1, l2, lr_power, linear, gradient, weight, moment):
     """Apply ftrl optimizer to the weight parameter."""
@@ -30,8 +31,10 @@ def _tensor_run_opt(opt, learning_rate, l1, l2, lr_power, linear, gradient, weig
     success = F.depend(success, opt(weight, moment, linear, gradient, learning_rate, l1, l2, lr_power))
     return success
 
+
 def _check_param(initial_accum, learning_rate, lr_power, l1, l2, use_locking, loss_scale=1.0, weight_decay=0.0,
                  prim_name=None):
+    """Check param."""
     validator.check_value_type("initial_accum", initial_accum, [float], prim_name)
     validator.check_number("initial_accum", initial_accum, 0.0, Rel.GE, prim_name)
 
@@ -104,7 +107,7 @@ class FTRL(Optimizer):
         self.lr_power = lr_power
         self.reciprocal_scale = 1.0 / loss_scale
         self.weight_decay = weight_decay
-        self.decay_tf = tuple((lambda:True)() for x in self.parameters)
+        self.decay_tf = tuple((lambda: True)() for x in self.parameters)
         self.hyper_map = C.HyperMap()
         self.opt = P.ApplyFtrl(use_locking=use_locking)
         self.one = Tensor(1, mstype.int32)
@@ -118,5 +121,6 @@ class FTRL(Optimizer):
         if self.reciprocal_scale != 1.0:
             grads = self.hyper_map(F.partial(grad_scale, self.reciprocal_scale), grads)
         lr = self.learning_rate
-        success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power), linear, grads, params, moments)
+        success = self.hyper_map(F.partial(ftrl_opt, self.opt, lr, self.l1, self.l2, self.lr_power),
+                                 linear, grads, params, moments)
         return success
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index e574dd2566..ecb707ed51 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -2063,7 +2063,7 @@ class LSTM(PrimitiveWithInfer):
         return (y_shape, h_shape, c_shape, reserved_shape, state_shape)
 
     def infer_dtype(self, x_dtype, h_dtype, c_dtype, w_dtype):
-        args =  {'x': x_dtype,  'h': h_dtype, 'c': c_dtype, 'w': w_dtype}
+        args = {'x': x_dtype, 'h': h_dtype, 'c': c_dtype, 'w': w_dtype}
         validator.check_tensor_type_same(args, (mstype.float32, mstype.float16), self.name)
         return (x_dtype, x_dtype, x_dtype, x_dtype, x_dtype)
 
@@ -2691,8 +2691,8 @@ class ConfusionMulGrad(PrimitiveWithInfer):
     """
 
     @prim_attr_register
-    def __init__(self, axis = (), keep_dims = False):
-        self.init_prim_io_names(inputs = ["input0", "input1", "input2"], outputs = ["output0", "output1"])
+    def __init__(self, axis=(), keep_dims=False):
+        self.init_prim_io_names(inputs=["input0", "input1", "input2"], outputs=["output0", "output1"])
         self.axis_ = validator.check_value_type("axis", axis, [int, tuple, list], self.name)
         self.keep_dims_ = validator.check_value_type("keep_dims", keep_dims, [bool], self.name)
 
diff --git a/mindspore/train/amp.py b/mindspore/train/amp.py
index 917b4c3359..2e758b0e9d 100644
--- a/mindspore/train/amp.py
+++ b/mindspore/train/amp.py
@@ -41,6 +41,7 @@ class OutputTo16(nn.Cell):
 
 
 def _do_keep_batchnorm_fp32(network):
+    """Do keep batchnorm fp32."""
     cells = network.name_cells()
     change = False
     for name in cells:
@@ -68,6 +69,7 @@ _config_level = {
 
 
 def _check_kwargs(key_words):
+    """Check kwargs."""
     for arg in key_words:
         if arg not in ['cast_model_type', 'keep_batchnorm_fp32', 'loss_scale_manager']:
             raise  ValueError(f"Unsupported arg '{arg}'")
@@ -84,6 +86,7 @@ def _check_kwargs(key_words):
 
 
 def _add_loss_network(network, loss_fn, cast_model_type):
+    """Add loss network."""
     class WithLossCell(nn.Cell):
         "Wrap loss for amp. Cast network output back to float32"
 
diff --git a/mindspore/train/callback.py b/mindspore/train/callback.py
index b9635acc62..c8ce5d22ef 100644
--- a/mindspore/train/callback.py
+++ b/mindspore/train/callback.py
@@ -683,13 +683,14 @@ class LossMonitor(Callback):
 
 
 class TimeMonitor(Callback):
+    """Time Monitor."""
     def __init__(self, data_size):
         super(TimeMonitor, self).__init__()
         self.data_size = data_size
 
     def epoch_begin(self, run_context):
         self.epoch_time = time.time()
-   
+
     def epoch_end(self, run_context):
         epoch_mseconds = (time.time() - self.epoch_time) * 1000
         per_step_mseconds = epoch_mseconds / self.data_size
@@ -701,4 +702,3 @@ class TimeMonitor(Callback):
     def step_end(self, run_context):
         step_mseconds = (time.time() - self.step_time) * 1000
         print('step time', step_mseconds, flush=True)
-
diff --git a/mindspore/train/model.py b/mindspore/train/model.py
index 36e9417095..66b03ce06c 100755
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@@ -122,7 +122,7 @@ class Model:
     def _check_kwargs(self, kwargs):
         for arg in kwargs:
             if arg not in ['loss_scale_manager', 'keep_batchnorm_fp32']:
-                raise  ValueError(f"Unsupport arg '{arg}'")
+                raise ValueError(f"Unsupport arg '{arg}'")
 
     def _build_train_network(self):
         """Build train network"""
@@ -130,17 +130,17 @@ class Model:
         if self._optimizer:
             if self._loss_scale_manager_set:
                 network = amp.build_train_network(network,
-                                                self._optimizer,
-                                                self._loss_fn,
-                                                level=self._amp_level,
-                                                loss_scale_manager=self._loss_scale_manager,
-                                                keep_batchnorm_fp32=self._keep_bn_fp32)
+                                                  self._optimizer,
+                                                  self._loss_fn,
+                                                  level=self._amp_level,
+                                                  loss_scale_manager=self._loss_scale_manager,
+                                                  keep_batchnorm_fp32=self._keep_bn_fp32)
             else:
                 network = amp.build_train_network(network,
-                                                self._optimizer,
-                                                self._loss_fn,
-                                                level=self._amp_level,
-                                                keep_batchnorm_fp32=self._keep_bn_fp32)
+                                                  self._optimizer,
+                                                  self._loss_fn,
+                                                  level=self._amp_level,
+                                                  keep_batchnorm_fp32=self._keep_bn_fp32)
         elif self._loss_fn:
             network = nn.WithLossCell(network, self._loss_fn)
         # If need to check if loss_fn is not None, but optimizer is None
@@ -273,14 +273,14 @@ class Model:
         # remove later to deal with loop sink
         need_wrap = False
         if not hasattr(train_dataset, '__ME_INITED__') and context.get_context("enable_loop_sink") \
-               and not context.get_context("enable_ge"):
+                and not context.get_context("enable_ge"):
             need_wrap = True
 
         dataset_helper = DatasetHelper(train_dataset)
         # remove later to deal with loop sink
         if need_wrap:
             self._train_network = nn.DataWrapper(self._train_network, *(dataset_helper.types_shapes()),
-                                              train_dataset.__ME_INITED__)
+                                                 train_dataset.__ME_INITED__)
             cb_params.train_network = self._train_network
             self._train_network.set_train()
 
@@ -440,7 +440,7 @@ class Model:
         # remove later to deal with loop sink
         need_wrap = False
         if not hasattr(valid_dataset, '__ME_INITED__') and context.get_context("enable_loop_sink") \
-               and not context.get_context("enable_ge"):
+                and not context.get_context("enable_ge"):
             need_wrap = True
 
         valid_dataset.__loop_size__ = 1
@@ -449,7 +449,7 @@ class Model:
         # remove later to deal with loop sink
         if need_wrap:
             self._eval_network = nn.DataWrapper(self._eval_network, *(dataset_helper.types_shapes()),
-                                             valid_dataset.__ME_INITED__)
+                                                valid_dataset.__ME_INITED__)
             self._eval_network.set_train(mode=False)
             self._eval_network.phase = 'eval'
 
diff --git a/tests/mindspore_test_framework/apps/test_bert_parts.py b/tests/mindspore_test_framework/apps/test_bert_parts.py
index 226d175c3d..944ea07842 100644
--- a/tests/mindspore_test_framework/apps/test_bert_parts.py
+++ b/tests/mindspore_test_framework/apps/test_bert_parts.py
@@ -174,8 +174,7 @@ test_sets = [
                                                   embedding_shape=[1, 128, 768],
                                                   use_one_hot_embeddings=True,
                                                   initializer_range=0.02), 1, 1), {
-                      'init_param_with': lambda shp: np.ones(shp).astype(np.float32)
-                  }),
+                      'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}),
         'desc_inputs': [input_ids],
         'desc_bprop': [[128]]}),
     ('EmbeddingLookup_multi_outputs_init_param', {
@@ -184,8 +183,7 @@ test_sets = [
                                   embedding_shape=[1, 128, 768],
                                   use_one_hot_embeddings=False,
                                   initializer_range=0.02), {
-                      'init_param_with': lambda shp: np.ones(shp).astype(np.float32)
-                  }),
+                      'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}),
         'desc_inputs': [input_ids],
         'desc_bprop': [[1, 128, 768], [128]]}),
     ('EmbeddingLookup_multi_outputs_grad_with_no_sens', {
@@ -194,8 +192,7 @@ test_sets = [
                                   embedding_shape=[1, 128, 768],
                                   use_one_hot_embeddings=False,
                                   initializer_range=0.02), {
-                      'init_param_with': lambda shp: np.ones(shp).astype(np.float32)
-                  }),
+                      'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}),
         'desc_inputs': [input_ids]}),
     ('GetMaskedLMOutput_grad_with_no_sens', {
         'block': GetMaskedLMOutput(BertConfig(batch_size=1)),
diff --git a/tests/mindspore_test_framework/components/executor/check_exceptions.py b/tests/mindspore_test_framework/components/executor/check_exceptions.py
index fe57a3d287..a4eb1cd8a0 100644
--- a/tests/mindspore_test_framework/components/executor/check_exceptions.py
+++ b/tests/mindspore_test_framework/components/executor/check_exceptions.py
@@ -44,4 +44,4 @@ class CheckExceptionsEC(IExectorComponent):
             raise Exception(f"Expect {e}, but got {sys.exc_info()[0]}")
         if error_kws and any(keyword not in str(exec_info.value) for keyword in error_kws):
             raise ValueError('Error message `{}` does not contain all keywords `{}`'.format(
-                             str(exec_info.value), error_kws))
+                str(exec_info.value), error_kws))

From 30f397dbd636af1decad3533a170d5ecb41e364f Mon Sep 17 00:00:00 2001
From: Wei Luning <weiluning@huawei.com>
Date: Sat, 25 Apr 2020 19:50:24 +0800
Subject: [PATCH 062/242] Revert "use the old op"

This reverts commit 252ed4f7c99e02cba8622cfcec674ebf648e581f.
---
 .../ccsrc/optimizer/irpass/arithmetic_simplify.h   | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
index ff6e4f6170..0d48fc1463 100644
--- a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
+++ b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
@@ -248,18 +248,17 @@ class AdjustAllReduceMulAdd : public AnfVisitor {
     if (addn->size() != 2) {
       return nullptr;
     }
+
     AnfVisitor::Match(prim::kPrimMakeTuple, {IsNode, IsNode})(addn->input(1));
     if (x_ == nullptr || y_ == nullptr || z_ == nullptr) {
       return nullptr;
     }
 
-    auto addn_op_node = addn->input(0);
-    auto make_tuple_op_node = addn->input(1)->cast<CNodePtr>()->input(0);
     auto fg = node->func_graph();
-    AnfNodePtr tuple = NewCNode({make_tuple_op_node, z_, x_}, fg);
-    AnfNodePtr add = NewCNode({addn_op_node, tuple}, fg);
-    AnfNodePtr all_reduce = NewCNode({all_reduce_, add}, fg);
-    return NewCNode({mul_, all_reduce, y_}, fg);
+    AnfNodePtr tuple = NewCNode({NewValueNode(prim::kPrimMakeTuple), z_, x_}, fg);
+    AnfNodePtr add = NewCNode({NewValueNode(prim::kPrimAddN), tuple}, fg);
+    AnfNodePtr all_reduce = NewCNode({NewValueNode(prim::kPrimAllReduce), add}, fg);
+    return NewCNode({NewValueNode(prim::kPrimMul), all_reduce, y_}, fg);
   }
 
   void Visit(const AnfNodePtr &node) override {
@@ -270,7 +269,6 @@ class AdjustAllReduceMulAdd : public AnfVisitor {
       AnfVisitor::Match(prim::kPrimMul)(node);
       level_ = 0;
       if (is_reduce_match_) {
-        mul_ = node->cast<CNodePtr>()->input(0);
         y_ = tmp_;
       } else {
         z_ = node;
@@ -282,7 +280,6 @@ class AdjustAllReduceMulAdd : public AnfVisitor {
       if (IsPrimitiveCNode(node, prim::kPrimAllReduce)) {
         auto cnode = node->cast<CNodePtr>();
         if (cnode->size() > 1) {
-          all_reduce_ = cnode->input(0);
           x_ = cnode->input(1);
           is_reduce_match_ = true;
         }
@@ -305,7 +302,6 @@ class AdjustAllReduceMulAdd : public AnfVisitor {
   int level_{0};
   bool is_reduce_match_{false};
   AnfNodePtr x_{nullptr}, y_{nullptr}, z_{nullptr}, tmp_{nullptr};
-  AnfNodePtr all_reduce_{nullptr}, mul_{nullptr};
 };
 
 class ArithmeticSimplify {

From f305d6f8a76db5bef9afec1b381cb05c7293113c Mon Sep 17 00:00:00 2001
From: Wei Luning <weiluning@huawei.com>
Date: Sat, 25 Apr 2020 19:50:50 +0800
Subject: [PATCH 063/242] Revert "add pattern AdjustAllReduceMulAdd"

This reverts commit ea6958c50a02c87350b26ec2080abc6650f1a045.
---
 mindspore/ccsrc/operator/ops.cc               |  1 -
 mindspore/ccsrc/operator/ops.h                |  1 -
 mindspore/ccsrc/optimizer/irpass.cc           |  2 +-
 .../optimizer/irpass/arithmetic_simplify.h    | 78 -------------------
 mindspore/ops/operations/array_ops.py         |  2 +-
 mindspore/ops/operations/nn_ops.py            |  2 +-
 tests/ut/cpp/optimizer/lib_test.cc            | 19 -----
 .../gtest_input/optimizer/opt_test.py         | 45 +----------
 8 files changed, 5 insertions(+), 145 deletions(-)

diff --git a/mindspore/ccsrc/operator/ops.cc b/mindspore/ccsrc/operator/ops.cc
index 9d5777641b..91a54e1fdb 100755
--- a/mindspore/ccsrc/operator/ops.cc
+++ b/mindspore/ccsrc/operator/ops.cc
@@ -230,7 +230,6 @@ const PrimitivePtr kPrimNotInDict = std::make_shared<Primitive>("not_in_dict");
 const PrimitivePtr kPrimMirror = std::make_shared<Primitive>("_MirrorOperator");
 const PrimitivePtr kPrimVirtualDiv = std::make_shared<Primitive>("_VirtualDiv");
 const PrimitivePtr kPrimVirtualDataset = std::make_shared<Primitive>("_VirtualDataset");
-const PrimitivePtr kPrimAllReduce = std::make_shared<Primitive>("AllReduce");
 
 // Debug ops
 const PrimitivePtr kPrimScalarSummary = std::make_shared<Primitive>("ScalarSummary");
diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/operator/ops.h
index 4852e2345e..d84b2e4738 100755
--- a/mindspore/ccsrc/operator/ops.h
+++ b/mindspore/ccsrc/operator/ops.h
@@ -234,7 +234,6 @@ extern const PrimitivePtr kPrimInDict;
 extern const PrimitivePtr kPrimNotInDict;
 
 // Comm ops
-extern const PrimitivePtr kPrimAllReduce;
 extern const PrimitivePtr kPrimMirror;
 extern const PrimitivePtr kPrimVirtualDiv;
 extern const PrimitivePtr kPrimVirtualDataset;
diff --git a/mindspore/ccsrc/optimizer/irpass.cc b/mindspore/ccsrc/optimizer/irpass.cc
index 15df5a40bf..be9c8f787a 100644
--- a/mindspore/ccsrc/optimizer/irpass.cc
+++ b/mindspore/ccsrc/optimizer/irpass.cc
@@ -48,7 +48,7 @@ namespace irpass {
 OptimizeIRPassLib::OptimizeIRPassLib() {
   arithmetic_simplify_ = MakeSubstitution(ArithmeticSimplify(), "arithmetic_simplify",
                                           {prim::kPrimScalarAdd, prim::kPrimScalarMul, prim::kPrimTensorAdd,
-                                           prim::kPrimAddN, prim::kPrimIdentity, prim::kPrimMomentum, prim::kPrimMul});
+                                           prim::kPrimIdentity, prim::kPrimMomentum, prim::kPrimMul});
   special_op_eliminate_ = MakeSubstitution(SpecialOpEliminater(), "special_op_eliminate",
                                            {prim::kPrimInsertGradientOf, prim::kPrimPrintShapeType,
                                             prim::kPrimGetRefKey, prim::kPrimMirror, prim::kPrimVirtualDiv});
diff --git a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
index 0d48fc1463..ab191aab20 100644
--- a/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
+++ b/mindspore/ccsrc/optimizer/irpass/arithmetic_simplify.h
@@ -228,82 +228,6 @@ class ConstantDuplicateMul : public AnfVisitor {
   CNodePtr cnode_;
 };
 
-// grad = AllReduce(grad) / worker_number
-// grad = grad + weight * decy
-// ->
-// grad = grad + weight * decy
-// grad = AllReduce(grad) / worker_number
-
-// {prim::kPrimAddN, {prim::kPrimMakeTuple, {prim::kPrimMul, {prim::kPrimAllReduce, X}, Y}, Z}} ->
-// {prim::kPrimMul, {prim::kPrimAllReduce, {prim::kPrimAddN,{prim::kPrimMakeTuple, Z, X}}}, Y}
-class AdjustAllReduceMulAdd : public AnfVisitor {
- public:
-  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
-    Reset();
-    // {prim::kPrimAddN, Zs}
-    if (!IsPrimitiveCNode(node, prim::kPrimAddN)) {
-      return nullptr;
-    }
-    auto addn = node->cast<CNodePtr>();
-    if (addn->size() != 2) {
-      return nullptr;
-    }
-
-    AnfVisitor::Match(prim::kPrimMakeTuple, {IsNode, IsNode})(addn->input(1));
-    if (x_ == nullptr || y_ == nullptr || z_ == nullptr) {
-      return nullptr;
-    }
-
-    auto fg = node->func_graph();
-    AnfNodePtr tuple = NewCNode({NewValueNode(prim::kPrimMakeTuple), z_, x_}, fg);
-    AnfNodePtr add = NewCNode({NewValueNode(prim::kPrimAddN), tuple}, fg);
-    AnfNodePtr all_reduce = NewCNode({NewValueNode(prim::kPrimAllReduce), add}, fg);
-    return NewCNode({NewValueNode(prim::kPrimMul), all_reduce, y_}, fg);
-  }
-
-  void Visit(const AnfNodePtr &node) override {
-    if (level_ == 0) {
-      level_ = 1;
-      is_reduce_match_ = false;
-      // {prim::kPrimMul, {prim::kPrimAllReduce, X}, Y}
-      AnfVisitor::Match(prim::kPrimMul)(node);
-      level_ = 0;
-      if (is_reduce_match_) {
-        y_ = tmp_;
-      } else {
-        z_ = node;
-      }
-    }
-
-    if (level_ == 1) {
-      // {prim::kPrimAllReduce, X}
-      if (IsPrimitiveCNode(node, prim::kPrimAllReduce)) {
-        auto cnode = node->cast<CNodePtr>();
-        if (cnode->size() > 1) {
-          x_ = cnode->input(1);
-          is_reduce_match_ = true;
-        }
-      } else {
-        tmp_ = node;
-      }
-    }
-  }
-
-  void Reset() {
-    level_ = 0;
-    is_reduce_match_ = false;
-    x_ = nullptr;
-    y_ = nullptr;
-    z_ = nullptr;
-    tmp_ = nullptr;
-  }
-
- private:
-  int level_{0};
-  bool is_reduce_match_{false};
-  AnfNodePtr x_{nullptr}, y_{nullptr}, z_{nullptr}, tmp_{nullptr};
-};
-
 class ArithmeticSimplify {
  public:
   ArithmeticSimplify()
@@ -319,7 +243,6 @@ class ArithmeticSimplify {
     eliminaters_.emplace_back(identity_);
     eliminaters_.emplace_back(opt_update_zero_tensor_);
     eliminaters_.emplace_back(constant_duplicate_mul_);
-    eliminaters_.emplace_back(adjust_allreduce_mul_add_);
   }
   ~ArithmeticSimplify() = default;
 
@@ -341,7 +264,6 @@ class ArithmeticSimplify {
   PrimEliminater identity_;
   OptUpdateZeroTensor opt_update_zero_tensor_;
   ConstantDuplicateMul constant_duplicate_mul_;
-  AdjustAllReduceMulAdd adjust_allreduce_mul_add_;
   std::vector<TransformFuncType> eliminaters_{};
 };
 }  // namespace irpass
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index 6f51dd0a1c..38504990e8 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -1229,7 +1229,7 @@ class UnsortedSegmentSum(PrimitiveWithInfer):
         Tensor, the shape is :math:`(z, x_{N+1}, ..., x_R)`.
 
     Examples:
-        >>> input_x = Tensor([1, 2, 3, 4], mindspore.float32)
+        >>> input_x = Tensor([1, 2, 3, 4], mindspore.float)
         >>> segment_ids = Tensor([0, 0, 1, 2], mindspore.int32)
         >>> num_segments = 4
         >>> P.UnsortedSegmentSum()(input_x, segment_ids, num_segments)
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index e574dd2566..56aa20b25b 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -1630,7 +1630,7 @@ class LayerNorm(Primitive):
     `Layer Normalization <https://arxiv.org/abs/1607.06450>`_.
 
     .. math::
-        y = \frac{x - mean}{\sqrt{variance + \epsilon}} * \gamma + \beta
+        y = \frac{x - mean]}{\sqrt{variance + \epsilon}} * \gamma + \beta
 
     where :math:`\gamma` is scale, :math:`\beta` is bias, :math:`\epsilon` is epsilon.
 
diff --git a/tests/ut/cpp/optimizer/lib_test.cc b/tests/ut/cpp/optimizer/lib_test.cc
index 8e348c698a..2d4cf0e78e 100644
--- a/tests/ut/cpp/optimizer/lib_test.cc
+++ b/tests/ut/cpp/optimizer/lib_test.cc
@@ -556,24 +556,5 @@ TEST_F(TestOptLib, test_constant_duplicate_mul) {
   ASSERT_TRUE(CheckOpt(beforerl, after, patterns));
   ASSERT_TRUE(CheckOpt(beforerr, after, patterns));
 }
-
-TEST_F(TestOptLib, test_adjust_allreduce_mul_add) {
-  FuncGraphPtr beforell = getPyFun.CallAndParseRet("test_adjust_allreduce_mul_add", "beforell");
-  FuncGraphPtr beforelr = getPyFun.CallAndParseRet("test_adjust_allreduce_mul_add", "beforelr");
-  FuncGraphPtr beforerl = getPyFun.CallAndParseRet("test_adjust_allreduce_mul_add", "beforerl");
-  FuncGraphPtr beforerr = getPyFun.CallAndParseRet("test_adjust_allreduce_mul_add", "beforerr");
-  FuncGraphPtr after1 = getPyFun.CallAndParseRet("test_adjust_allreduce_mul_add", "after1");
-  FuncGraphPtr before2r = getPyFun.CallAndParseRet("test_adjust_allreduce_mul_add", "before2r");
-  FuncGraphPtr before2l = getPyFun.CallAndParseRet("test_adjust_allreduce_mul_add", "before2l");
-  FuncGraphPtr after2 = getPyFun.CallAndParseRet("test_adjust_allreduce_mul_add", "after2");
-  auto patterns = std::vector<SubstitutionPtr>({irpass.arithmetic_simplify_});
-  ASSERT_TRUE(CheckOpt(beforell, after1, patterns));
-  ASSERT_TRUE(CheckOpt(beforelr, after1, patterns));
-  ASSERT_TRUE(CheckOpt(beforerl, after1, patterns));
-  ASSERT_TRUE(CheckOpt(beforerr, after1, patterns));
-  ASSERT_TRUE(CheckOpt(before2l, after2, patterns));
-  ASSERT_TRUE(CheckOpt(before2r, after2, patterns));
-}
-
 }  // namespace opt
 }  // namespace mindspore
diff --git a/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py b/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py
index d74aa15952..d494ad27d3 100644
--- a/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py
+++ b/tests/ut/cpp/python_input/gtest_input/optimizer/opt_test.py
@@ -908,8 +908,8 @@ def test_print_tuple_wrapper(tag):
 
 def test_constant_duplicate_mul(tag):
     fns = FnDict()
-    Mul = Primitive('Mul')
-    Sqrt = Primitive('Sqrt')
+    Mul = Primitive('Mul');
+    Sqrt = Primitive('Sqrt');
 
     x = Tensor(np.array([[2, 2], [2, 3]]).astype('float32'))
     tensor1 = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32'))
@@ -936,44 +936,3 @@ def test_constant_duplicate_mul(tag):
         return Mul(Sqrt(x), Mul(tensor1, tensor2))
 
     return fns[tag]
-
-
-def test_adjust_allreduce_mul_add(tag):
-    fns = FnDict()
-    Mul = Primitive('Mul')
-    AddN = Primitive('AddN')
-    AllReduce = Primitive('AllReduce')
-
-    @fns
-    def beforell(x, y, z):
-        return AddN((z, Mul(y, AllReduce(x))))
-
-    @fns
-    def beforelr(x, y, z):
-        return AddN((z, Mul(AllReduce(x), y)))
-
-    @fns
-    def beforerl(x, y, z):
-        return AddN((Mul(y, AllReduce(x)), z))
-
-    @fns
-    def beforerr(x, y, z):
-        return AddN((Mul(AllReduce(x), y), z))
-
-    @fns
-    def after1(x, y, z):
-        return Mul(AllReduce(AddN((z, x))), y)
-
-    @fns
-    def before2r(x, y, z):
-        return AddN((Mul(AllReduce(x), y), Mul(z, z)))
-
-    @fns
-    def before2l(x, y, z):
-        return AddN((Mul(z, z), Mul(AllReduce(x), y)))
-
-    @fns
-    def after2(x, y, z):
-        return Mul(AllReduce(AddN((Mul(z, z), x))), y)
-
-    return fns[tag]

From 5f2bfb5679d39be091053ae1d807bbd2954bc49a Mon Sep 17 00:00:00 2001
From: guohongzilong <2713219276@qq.com>
Date: Fri, 24 Apr 2020 14:35:12 +0800
Subject: [PATCH 064/242] trans const to variable in assign case

---
 mindspore/ccsrc/transform/convert.cc | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 mode change 100755 => 100644 mindspore/ccsrc/transform/convert.cc

diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
old mode 100755
new mode 100644
index 2daa86b960..36faa5787a
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -1154,6 +1154,9 @@ void DfGraphConvertor::SetOpControlInput(const AnfNodePtr node) {
   }
 }
 
+const std::vector<std::string> trans_var_list = {prim::kPrimAssign->name(), string(kNameAssignAdd),
+                                                 string(kNameAssignSub)};
+
 void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node) {
   OperatorPtr src = Convert(node);
   auto &inputs = node->inputs();
@@ -1166,6 +1169,26 @@ void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node
     if (IsValueNode<None>(pred)) {
       continue;
     }
+    // transform "Const" op to "Variable" op when the next node is "Assign" op.
+    std::string c_name = GetCNodeFuncName(node);
+    auto pos = std::find(trans_var_list.begin(), trans_var_list.end(), c_name);
+    if (!training_ && pos != trans_var_list.end() && pred->isa<Parameter>()) {
+      std::string name = std::static_pointer_cast<Parameter>(pred)->name();
+      auto op_itor = op_cache_.find(pred.get());
+      if (op_itor == op_cache_.end()) {
+        MS_LOG(EXCEPTION) << "Can not find op for node " << pred->ToString() << ".";
+      }
+      if (op_itor->second != nullptr &&
+          (op_itor->second->GetOpType() == "Constant" || op_itor->second->GetOpType() == "Const") &&
+          vars_.find(name) != vars_.end()) {
+        auto variable = std::make_shared<Variable>(name);
+        auto desc = vars_[name]->GetOutputDesc("y");
+        (void)variable->update_output_desc_y(desc);
+        MS_LOG(DEBUG) << "Trans to variable, var = " << variable->GetName() << ".";
+        op_itor->second = variable;  // replace parameter with variable
+        vars_[name] = variable;
+      }
+    }
     // find in out_hadnle_cache_ first
     auto it = out_handle_cache_.find(pred.get());
     if (it != out_handle_cache_.end()) {

From 7e8e44ef56ee806f48f56685563576d1a5465525 Mon Sep 17 00:00:00 2001
From: candanzg <zhangshucheng@huawei.com>
Date: Fri, 24 Apr 2020 16:52:21 +0800
Subject: [PATCH 065/242] fixed summary graph bug

Signed-off-by: candanzg <zhangshucheng@huawei.com>
---
 mindspore/train/summary/summary_record.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mindspore/train/summary/summary_record.py b/mindspore/train/summary/summary_record.py
index 3dbe31f0e4..4c60dce862 100644
--- a/mindspore/train/summary/summary_record.py
+++ b/mindspore/train/summary/summary_record.py
@@ -158,6 +158,9 @@ class SummaryRecord:
                     package_graph_event(graph_proto).SerializeToString())
                 self.event_writer.flush()
                 self.has_graph = True
+                data = _summary_tensor_cache.get("SummaryRecord")
+                if data is None:
+                    return True
 
         data = _summary_tensor_cache.get("SummaryRecord")
         if data is None:

From 3ff9e54734e09596adefcdfae63a47e37893018f Mon Sep 17 00:00:00 2001
From: Xiaoda Zhang <zhangxiaoda@huawei.com>
Date: Sun, 26 Apr 2020 09:07:37 +0800
Subject: [PATCH 066/242] add the resnet50 32k-8p testcase

---
 tests/ut/python/parallel/test_auto_parallel_resnet.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/ut/python/parallel/test_auto_parallel_resnet.py b/tests/ut/python/parallel/test_auto_parallel_resnet.py
index 726b3f356f..a563efb06d 100644
--- a/tests/ut/python/parallel/test_auto_parallel_resnet.py
+++ b/tests/ut/python/parallel/test_auto_parallel_resnet.py
@@ -273,7 +273,7 @@ class DatasetLenet():
         return 1
 
 
-def train_32k_8p(epoch_size=3, batch_size=32, num_classes=32768):
+def test_train_32k_8p(epoch_size=3, batch_size=32, num_classes=32768):
     dev_num = 8
     context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, device_num=dev_num)
     set_algo_parameters(elementwise_op_strategy_follow=True)
@@ -308,7 +308,7 @@ def train_32k_8p_fusion1(epoch_size=3, batch_size=32, num_classes=32768): #10485
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_algorithm=1)
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_times=2)
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_tail_percent=0.5)
-    allreduce_fusion_dict = train_32k_8p(epoch_size, batch_size, num_classes)
+    allreduce_fusion_dict = test_train_32k_8p(epoch_size, batch_size, num_classes)
     expect_dict = {'end_point.bias': 2,
                    'end_point.weight': 2,
                    'layer4.2.bn3.beta': 2,
@@ -481,7 +481,7 @@ def train_32k_8p_fusion2(epoch_size=3, batch_size=32, num_classes=32768): #10485
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_inherent_time=0.05)
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_allreduce_bandwidth=0.000001)
     cost_model_context.set_cost_model_context(costmodel_allreduce_fusion_computation_time_parameter=0.0000015)
-    allreduce_fusion_dict = train_32k_8p(epoch_size, batch_size, num_classes)
+    allreduce_fusion_dict = test_train_32k_8p(epoch_size, batch_size, num_classes)
     expect_dict = {'end_point.bias': 2,
                    'end_point.weight': 2,
                    'layer4.2.bn3.beta': 2,

From 52ab6f494833f473f1d6407bec419810a7f92bc3 Mon Sep 17 00:00:00 2001
From: lvliang <lvliang18@huawei.com>
Date: Wed, 22 Apr 2020 21:10:03 +0800
Subject: [PATCH 067/242] pynative-lamb-op-zeros-like-tensor-query-failed

---
 mindspore/_extends/builtin_operations.py                 | 2 +-
 .../pre_activate/ascend/ascend_backend_optimization.h    | 1 -
 mindspore/ccsrc/pynative/pynative_execute.cc             | 2 +-
 mindspore/ops/operations/debug_ops.py                    | 9 +++++++++
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/mindspore/_extends/builtin_operations.py b/mindspore/_extends/builtin_operations.py
index 6fea07425e..a423fe6395 100644
--- a/mindspore/_extends/builtin_operations.py
+++ b/mindspore/_extends/builtin_operations.py
@@ -86,7 +86,7 @@ def identity(x):
 def zeros_like_tensor(x):
     """Implement `zeros_like_tensor`."""
     x = x.asnumpy()
-    value = Tensor(np.zeros(x.shape))
+    value = Tensor(np.zeros(x.shape).astype(np.float32))
     return value
 
 
diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h
index fcd9c15c58..65e70def85 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.h
@@ -26,7 +26,6 @@ void AscendDataLayout(const std::shared_ptr<session::KernelGraph> &kernel_graph)
 void AscendMixPrecision(const std::shared_ptr<session::KernelGraph> &kernel_graph);
 void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph);
 void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph);
-void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph);
 }  // namespace opt
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/pynative/pynative_execute.cc b/mindspore/ccsrc/pynative/pynative_execute.cc
index 0d18dfb577..e13e12fffc 100644
--- a/mindspore/ccsrc/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pynative/pynative_execute.cc
@@ -39,7 +39,7 @@
 
 const char SINGLE_OP_GRAPH[] = "single_op_graph";
 // primitive unable to infer value for constant input in PyNative mode
-const std::unordered_set<std::string> vm_operators = {"partial", "depend", "make_ref"};
+const std::set<std::string> vm_operators = {"partial", "depend", "make_ref", "zeros_like_tensor"};
 
 namespace mindspore {
 namespace pynative {
diff --git a/mindspore/ops/operations/debug_ops.py b/mindspore/ops/operations/debug_ops.py
index 21c9c519b9..48ede57be4 100644
--- a/mindspore/ops/operations/debug_ops.py
+++ b/mindspore/ops/operations/debug_ops.py
@@ -45,6 +45,9 @@ class ScalarSummary(Primitive):
     def __init__(self):
         """init"""
 
+    def __call__(self, *args, **kwargs):
+        pass
+
 
 class ImageSummary(Primitive):
     """
@@ -70,6 +73,9 @@ class ImageSummary(Primitive):
     def __init__(self):
         """init"""
 
+    def __call__(self, *args, **kwargs):
+        pass
+
 
 class TensorSummary(Primitive):
     """
@@ -97,6 +103,9 @@ class TensorSummary(Primitive):
     def __init__(self):
         """init"""
 
+    def __call__(self, *args, **kwargs):
+        pass
+
 
 class HistogramSummary(Primitive):
     """

From 3f69b5b5183f4ce0919919cedea60278681aef0e Mon Sep 17 00:00:00 2001
From: caifubi <caifubi1@huawei.com>
Date: Fri, 24 Apr 2020 14:33:19 +0800
Subject: [PATCH 068/242] insert profiling iteration end point after last tbe
 kernel

---
 .../ascend/profiling/profiling_utils.cc       | 25 +++++++++++++------
 .../device/ascend/profiling/profiling_utils.h |  3 ++-
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
index c1478915b7..1f87bf7bfa 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc
@@ -33,7 +33,7 @@ constexpr char kIterEndNode[] = "PROFILING_ITER_END";
 std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
 uint32_t ProfilingUtils::custom_node_index_ = 1;
 
-ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(const NotNull<session::KernelGraph *> graph_ptr) {
+ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr) {
   MS_LOG(INFO) << "get env start";
   custom_node_index_ = 1;
   auto &cnode_exec_order = graph_ptr->execution_order();
@@ -148,18 +148,29 @@ std::string ProfilingUtils::GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exe
   }
 
   if (bp_end_str.empty()) {
-    auto last_cnode = cnode_exec_order.back();
-    MS_EXCEPTION_IF_NULL(last_cnode);
-    bp_end_str = last_cnode->fullname_with_scope();
+    bp_end_str = GetGraphLastTbeKernelName(cnode_exec_order);
   }
   return bp_end_str;
 }
 
+std::string ProfilingUtils::GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order) {
+  std::string last_tbe_kernel_name = "";
+  // find last tbe_kernel
+  for (auto iter = cnode_exec_order.rbegin(); iter != cnode_exec_order.rend(); ++iter) {
+    if (AnfAlgo::GetKernelType(*iter) == TBE_KERNEL) {
+      last_tbe_kernel_name = (*iter)->fullname_with_scope();
+      break;
+    }
+  }
+  if (last_tbe_kernel_name.empty()) {
+    MS_LOG(WARNING) << "tbe kernel not found in graph";
+  }
+  return last_tbe_kernel_name;
+}
+
 std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) {
   const char *trace_netoutput = std::getenv(kIterEndNode);
-  auto &last_cnode = cnode_exec_order.back();
-  MS_EXCEPTION_IF_NULL(last_cnode);
-  return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput);
+  return trace_netoutput == nullptr ? GetGraphLastTbeKernelName(cnode_exec_order) : std::string(trace_netoutput);
 }
 
 NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content,
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
index 99245b2c57..59909c1f2f 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
@@ -94,7 +94,7 @@ class ProfilingUtils {
   // And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode'
   // GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode'
   // The variable i in PROFILING_CUSTOM_i should start from 1 without interruption.
-  static ProfilingTraceInfo GetProfilingTraceFromEnv(const NotNull<session::KernelGraph *> graph_ptr);
+  static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr);
 
   // Insert two profiling trace points, one in front and one behind
   static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
@@ -114,6 +114,7 @@ class ProfilingUtils {
   static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order);
   static std::string GetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order);
   static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order);
+  static std::string GetGraphLastTbeKernelName(const std::vector<CNodePtr> &cnode_exec_order);
   static void GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order,
                            NotNull<ProfilingTraceInfo *> profiling_trace);
   static void GetCNodeOutputRealNode(const std::string &node_name, const std::vector<CNodePtr> &cnode_exec_order,

From aff6777ecc30d658f2d5bdd7d1a0d2409d8bb4fe Mon Sep 17 00:00:00 2001
From: buxue <yiren19920727@163.com>
Date: Fri, 24 Apr 2020 16:51:59 +0800
Subject: [PATCH 069/242] fix reviewboot and example of TruncatedNormal and add
 type mapping

---
 mindspore/ccsrc/debug/info.h                    |  2 +-
 mindspore/ccsrc/debug/trace.cc                  |  2 +-
 mindspore/ccsrc/debug/trace_info.h              |  2 +-
 mindspore/ccsrc/ir/dtype/type.cc                |  6 ++++++
 mindspore/ccsrc/optimizer/irpass.cc             |  1 -
 mindspore/ccsrc/optimizer/irpass.h              |  1 -
 .../ccsrc/optimizer/irpass/grad_var_prepare.cc  |  1 -
 .../ccsrc/optimizer/irpass/grad_var_prepare.h   |  1 -
 mindspore/ccsrc/pipeline/base.h                 |  2 --
 mindspore/ccsrc/pipeline/pipeline.cc            |  2 +-
 mindspore/ccsrc/pipeline/pipeline_ge.cc         | 12 +++---------
 mindspore/ccsrc/pipeline/pipeline_ge.h          |  2 --
 .../static_analysis/abstract_function.h         |  4 ++--
 .../ccsrc/pipeline/static_analysis/prim.cc      |  1 -
 mindspore/ops/operations/array_ops.py           | 17 ++++++++++-------
 tests/ut/python/ops/test_ops.py                 |  2 +-
 16 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/mindspore/ccsrc/debug/info.h b/mindspore/ccsrc/debug/info.h
index a34d6e3df5..e8d02827d8 100644
--- a/mindspore/ccsrc/debug/info.h
+++ b/mindspore/ccsrc/debug/info.h
@@ -134,7 +134,7 @@ class DebugInfo : public Base {
 
   explicit DebugInfo(const LocationPtr &loc);
 
-  virtual ~DebugInfo() = default;
+  ~DebugInfo() override = default;
   MS_DECLARE_PARENT(DebugInfo, Base);
   int64_t debug_id();
   int64_t unique_id() const { return unique_id_; }
diff --git a/mindspore/ccsrc/debug/trace.cc b/mindspore/ccsrc/debug/trace.cc
index 16ce77725e..a78d8446d8 100644
--- a/mindspore/ccsrc/debug/trace.cc
+++ b/mindspore/ccsrc/debug/trace.cc
@@ -231,10 +231,10 @@ std::string AnalyzedFuncGraphExporter::GetNodeType(const AnfNodePtr &node) {
   auto engine = node_cfg_->engine();
   auto cfg = engine->MakeConfig(node, ctx);
   auto abs = engine->cache().GetValue(cfg);
-
   if (abs == nullptr) {
     return "Undefined";
   }
+
   auto dtype = abs->BuildType();
   auto shape = abs->BuildShape();
   std::ostringstream oss;
diff --git a/mindspore/ccsrc/debug/trace_info.h b/mindspore/ccsrc/debug/trace_info.h
index e7a8c83dad..85eae0e958 100644
--- a/mindspore/ccsrc/debug/trace_info.h
+++ b/mindspore/ccsrc/debug/trace_info.h
@@ -321,7 +321,7 @@ class TraceTransform : public TraceInfo {
 
   std::string full_name() override { return full_name_ + transform_name_; }
   MS_DECLARE_PARENT(TraceTransform, TraceInfo);
-  virtual std::string symbol() {
+  std::string symbol() override {
     if (transform_name_.empty()) {
       return "";
     }
diff --git a/mindspore/ccsrc/ir/dtype/type.cc b/mindspore/ccsrc/ir/dtype/type.cc
index 30bf0c8e3f..56954495df 100644
--- a/mindspore/ccsrc/ir/dtype/type.cc
+++ b/mindspore/ccsrc/ir/dtype/type.cc
@@ -87,6 +87,12 @@ const char *MetaIdLabel(const TypeId &v) {
       return "kMetaTypeExternal";
     case kMetaTypeNone:
       return "kMetaTypeNone";
+    case kMetaTypeNull:
+      return "kMetaTypeNull";
+    case kMetaTypeEllipsis:
+      return "kMetaTypeEllipsis";
+    case kMetaTypeEnd:
+      return "kMetaTypeEnd";
     default:
       return "[Unknown Type Id]";
   }
diff --git a/mindspore/ccsrc/optimizer/irpass.cc b/mindspore/ccsrc/optimizer/irpass.cc
index 15df5a40bf..72298bec25 100644
--- a/mindspore/ccsrc/optimizer/irpass.cc
+++ b/mindspore/ccsrc/optimizer/irpass.cc
@@ -133,7 +133,6 @@ ResolveIRPassLib::ResolveIRPassLib() {
 InferenceOptPrepareLib::InferenceOptPrepareLib() {
   grad_var_prepare_ = MakeSubstitution(GradVarPrepare(), "grad_var_prepare", IsCNode);
 }
-
 }  // namespace irpass
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/optimizer/irpass.h b/mindspore/ccsrc/optimizer/irpass.h
index 00274bdcc8..0af22c5cd0 100644
--- a/mindspore/ccsrc/optimizer/irpass.h
+++ b/mindspore/ccsrc/optimizer/irpass.h
@@ -159,7 +159,6 @@ inline bool IsCNodeDup(const AnfNodePtr &node) {
   }
   return false;
 }
-
 }  // namespace irpass
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc b/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc
index 32a42bc16b..317d67e792 100644
--- a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc
+++ b/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.cc
@@ -31,7 +31,6 @@
 namespace mindspore {
 namespace opt {
 namespace irpass {
-
 static AnfNodePtr GenerateUnpackGraphNode(std::vector<AnfNodePtr> inputs_y, FuncGraphPtr func_graph,
                                           AnfNodePtr func_node, bool is_unpack, bool sens_param) {
   MS_EXCEPTION_IF_NULL(func_graph);
diff --git a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h b/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h
index 599d1dca17..9713017d12 100644
--- a/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h
+++ b/mindspore/ccsrc/optimizer/irpass/grad_var_prepare.h
@@ -33,7 +33,6 @@
 namespace mindspore {
 namespace opt {
 namespace irpass {
-
 // {{GradOperation, g, w}, Ys}
 // {UnPackCall, {GradOperation, g, w}, Ys}
 class GradVarPrepare : public AnfVisitor {
diff --git a/mindspore/ccsrc/pipeline/base.h b/mindspore/ccsrc/pipeline/base.h
index 8ca153f45b..57edea03a2 100644
--- a/mindspore/ccsrc/pipeline/base.h
+++ b/mindspore/ccsrc/pipeline/base.h
@@ -28,13 +28,11 @@
 
 namespace mindspore {
 namespace pipeline {
-
 struct ExecutorInfo {
   FuncGraphPtr func_graph;
   ResourcePtr resource;
   std::size_t arg_list_size;
 };
-
 using ExecutorInfoPtr = std::shared_ptr<ExecutorInfo>;
 
 inline std::string GetPhasePrefix(const std::string &phase) {
diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc
index 7524fb9d53..251a0c2d84 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -101,7 +101,7 @@ py::tuple GenerateKey(const std::string &name, const std::unordered_map<std::str
     MS_LOG(INFO) << "Start new args and compile key:" << key;
     g_args_cache[args_spec] = key++;
   }
-  py::tuple argSpec = py::tuple(2);
+  auto argSpec = py::tuple(2);
   argSpec[0] = name;
   argSpec[1] = g_args_cache[args_spec];
   return argSpec;
diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc
index 3d4b8b3e2a..4a7328d325 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.cc
+++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc
@@ -52,11 +52,11 @@ void DoExecNonInputGraph(const std::string &phase) {
   transform::RunOptions run_options;
   run_options.name = phase;
   auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
-
   if (graph_runner == nullptr) {
     MS_LOG(ERROR) << "Can not found GraphRunner";
     return;
   }
+
   {
     // Release GIL before calling into (potentially long-running) C++ code
     py::gil_scoped_release release;
@@ -181,7 +181,6 @@ bool AddDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const py::di
   size_t pos = phase.find('.');
   std::string net_id = ((pos == std::string::npos || pos == phase.size() - 1) ? phase : phase.substr(pos + 1));
   std::string phase_prefix = phase.substr(0, pos);
-
   if (phase_prefix == "export") {
     MS_LOG(INFO) << "Set DfGraphConvertor training : false";
     convertor.set_training(false);
@@ -348,7 +347,7 @@ py::object ExtractGeneralCnodeRet(const AbstractBasePtr &cnode_data, const py::t
   auto data_tp = cnode_data->cast<AbstractTuplePtr>();
   auto elements = data_tp->elements();
   size_t size = data_tp->size();
-  py::tuple tp = py::tuple(size);
+  auto tp = py::tuple(size);
   for (size_t i = 0; i < size; i++) {
     tp[i] = ExtractGeneralCnodeRet(elements[i], data, count);
   }
@@ -379,7 +378,7 @@ py::object StructureOutput(const AnfNodePtr &output_node, const py::tuple &data,
   if (output_c->IsApply(prim::kPrimMakeTuple)) {
     auto input_list = output_c->inputs();
     size_t size = input_list.size();
-    py::tuple tp = py::tuple(size - 1);
+    auto tp = py::tuple(size - 1);
     for (size_t i = 1; i < size; i++) {
       tp[i - 1] = StructureOutput(input_list[i], data, count);
     }
@@ -401,11 +400,8 @@ std::shared_ptr<py::object> DoExecGraph(const FuncGraphPtr &graph, const std::ve
 
   std::vector<GeTensorPtr> ge_outputs;
   transform::RunOptions run_options;
-
   run_options.name = phase;
-
   auto graph_runner = DfGraphManager::GetInstance().GetGraphRunner();
-
   if (graph_runner == nullptr) {
     MS_LOG(EXCEPTION) << "Can not found GraphRunner.";
   }
@@ -478,7 +474,6 @@ void ProcessGeArg(const std::map<std::string, ExecutorInfoPtr> &info, const py::
 py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const py::tuple &args,
                        const std::string &phase) {
   std::string phase_prefix = GetPhasePrefix(phase);
-
   if (phase_prefix == "save") {
     DoExecNonInputGraph(phase);
     ConfigManager::GetInstance().ResetConfig();
@@ -488,7 +483,6 @@ py::object ExecDFGraph(const std::map<std::string, ExecutorInfoPtr> &info, const
   if (info.count(phase) == 0) {
     MS_LOG(EXCEPTION) << "There is no phase:" << phase;
   }
-
   FuncGraphPtr anf_graph = info.at(phase)->func_graph;
 
 #ifdef ENABLE_INFER
diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.h b/mindspore/ccsrc/pipeline/pipeline_ge.h
index 9dc1524682..f3a363dbe8 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.h
+++ b/mindspore/ccsrc/pipeline/pipeline_ge.h
@@ -31,7 +31,6 @@
 
 namespace mindspore {
 namespace pipeline {
-
 namespace py = pybind11;
 
 void SetGeOption(const std::map<std::string, std::string> &options);
@@ -50,7 +49,6 @@ bool InitExecDatasetGe(const std::string &queue_name, int64_t size, int64_t batc
                        const std::vector<int64_t> &input_indexes, const std::string &phase);
 
 void ExportDFGraph(const std::string &file_name, const std::string &phase);
-
 }  // namespace pipeline
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h b/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h
index 133d5e99a9..513b290a9d 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h
+++ b/mindspore/ccsrc/pipeline/static_analysis/abstract_function.h
@@ -41,7 +41,7 @@ class AbstractFuncAtom : public AbstractFunction {
 
   AbstractFunctionPtr Join(const AbstractFunctionPtr &other) final;
   void Visit(std::function<void(const AbstractFuncAtomPtr &)>) const final;
-  bool operator==(const AbstractFunction &other) const;
+  bool operator==(const AbstractFunction &other) const override;
 
   std::size_t hash() const override { return tid(); }
 };
@@ -270,7 +270,7 @@ class TypedPrimitiveAbstractClosure : public AbstractFuncAtom {
 class DummyAbstractClosure : public AbstractFuncAtom {
  public:
   DummyAbstractClosure() = default;
-  ~DummyAbstractClosure() = default;
+  ~DummyAbstractClosure() override = default;
   MS_DECLARE_PARENT(DummyAbstractClosure, AbstractFuncAtom)
 
   EvaluatorPtr GetEvaluator(AnalysisEnginePtr) override { MS_LOG(EXCEPTION) << "A dummy function cannot eval."; }
diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
index 1115cd9978..d71ad8f710 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
@@ -295,7 +295,6 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) {
     dic["shape"] = shape;
     dic["dtype"] = arg_slice->BuildType();
     dic["value"] = BuildValue(arg_slice->BuildValue());
-
   } else if (abs_base->isa<AbstractTuple>()) {
     auto arg_tuple = dyn_cast<AbstractTuple>(abs_base);
     size_t len = arg_tuple->size();
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index 6f51dd0a1c..a97cf5e43a 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -639,9 +639,9 @@ class TruncatedNormal(PrimitiveWithInfer):
         Tensor, type of output tensor is same as attribute `dtype`.
 
     Examples:
-        >>> input_shape = Tensor(np.array([1, 2, 3]))
+        >>> shape = (1, 2, 3)
         >>> truncated_normal = P.TruncatedNormal()
-        >>> output = truncated_normal(input_shape)
+        >>> output = truncated_normal(shape)
     """
 
     @prim_attr_register
@@ -652,6 +652,8 @@ class TruncatedNormal(PrimitiveWithInfer):
 
     def __infer__(self, shape):
         shape_value = shape['value']
+        validator.check_const_input("shape", shape_value)
+        validator.check_type("shape", shape_value, [tuple])
         for i, value in enumerate(shape_value):
             validator.check_integer(f'{i}th value of shape', value, 0, Rel.GT)
         out = {'shape': shape_value,
@@ -1642,15 +1644,16 @@ class StridedSlice(PrimitiveWithInfer):
         validator.check_type('shrink_axis_mask', shrink_axis_mask, [int])
 
     def __infer__(self, x, begin, end, strides):
-        x_shape = x['shape']
-        x_shp_len = len(x_shape)
         begin_v, end_v, strides_v = begin['value'], end['value'], strides['value']
         validator.check_const_input("begin", begin_v)
         validator.check_const_input("end", end_v)
         validator.check_const_input("strides", strides_v)
-        validator.check_type("begin", begin['value'], [tuple])
-        validator.check_type("end", end['value'], [tuple])
-        validator.check_type("strides", strides['value'], [tuple])
+        validator.check_type("begin", begin_v, [tuple])
+        validator.check_type("end", end_v, [tuple])
+        validator.check_type("strides", strides_v, [tuple])
+
+        x_shape = x['shape']
+        x_shp_len = len(x_shape)
         if len(begin_v) != x_shp_len or len(end_v) != x_shp_len or len(strides_v) != x_shp_len:
             raise ValueError(f"The length of begin index{begin_v}, end index{end_v} and strides{strides_v} "
                              f"must be equal to the dims({x_shp_len}) of input.")
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 22df3d1fd3..d6622e76f4 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -372,7 +372,7 @@ test_case_math_ops = [
         'desc_bprop': [[3]]}),
     ('TruncatedNormal', {
         'block': P.TruncatedNormal(),
-        'desc_const': [[1, 2, 3]],
+        'desc_const': [(1, 2, 3)],
         'desc_inputs': [],
         'skip': ['backward'],
         'add_fake_input': True}),

From fe9008f73c6d163f256dd9e05db976835f01d65e Mon Sep 17 00:00:00 2001
From: VectorSL <shiliang10@huawei.com>
Date: Sun, 26 Apr 2020 10:08:33 +0800
Subject: [PATCH 070/242] fix codex for gpu conv2d

---
 .../ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h   | 36 ++++++++++---------
 .../gpu/nn/conv2d_grad_filter_gpu_kernel.h    | 30 ++++++++--------
 .../gpu/nn/conv2d_grad_input_gpu_kernel.h     | 29 ++++++++-------
 3 files changed, 51 insertions(+), 44 deletions(-)

diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
index 75b2a97cf8..d7649815c4 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
@@ -114,23 +114,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
     pad_height_ = GetAttr<int>(kernel_node, "pad");
     pad_width_ = pad_height_;
     pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
-    auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
-    auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
-    if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) {
-      MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!";
-    }
-    if (stride_ori[0] != 1 || stride_ori[1] != 1) {
-      MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!";
-    }
-    if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) {
-      MS_LOG(EXCEPTION) << "conv2d only support equal dilation, and dilation must be 4d!";
-    }
-    if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
-      MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!";
-    }
-    stride_ = stride_ori[2];
-    dilation_ = dilation_ori[2];
-
+    SetStrideAndDilation(kernel_node);
     cudnnTensorDescriptor_t input_descriptor_real = nullptr;
     if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
       SetPad(in_shape, kernel_node);
@@ -277,6 +261,24 @@ class Conv2dGpuFwdKernel : public GpuKernel {
       conv_algorithm_ = perf_results.algo;
     }
   }
+  void SetStrideAndDilation(const CNodePtr &kernel_node) {
+    auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
+    auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
+    if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) {
+      MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!";
+    }
+    if (stride_ori[0] != 1 || stride_ori[1] != 1) {
+      MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!";
+    }
+    if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) {
+      MS_LOG(EXCEPTION) << "conv2d only support equal dilation, and dilation must be 4d!";
+    }
+    if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
+      MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!";
+    }
+    stride_ = stride_ori[2];
+    dilation_ = dilation_ori[2];
+  }
   cudnnHandle_t cudnn_handle_;
   cudnnTensorDescriptor_t input_desc_;
   cudnnTensorDescriptor_t output_desc_;
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
index e481fd448e..40e2413f6a 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
@@ -117,19 +117,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
     pad_height_ = GetAttr<int>(kernel_node, "pad");
     pad_width_ = pad_height_;
     pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
-    auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
-    auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
-    if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
-      MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal stride, and stride must be 2d!";
-    }
-    if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) {
-      MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal dilation, and dilation must be 4d!";
-    }
-    if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
-      MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel dilation only support 1 in N axis and C axis!";
-    }
-    stride_ = stride_ori[0];
-    dilation_ = dilation_ori[2];
+    SetStrideAndDilation(kernel_node);
     cudnnTensorDescriptor_t x_desc_real = nullptr;
     if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
       SetPad(in_shape, kernel_node);
@@ -281,7 +269,21 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
                                  SizeToInt(in_shape[1]), SizeToInt(in_shape[2]), SizeToInt(in_shape[3])),
       "SetTensor4dDescriptor failed");
   }
-
+  void SetStrideAndDilation(const CNodePtr &kernel_node) {
+    auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
+    auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
+    if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
+      MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal stride, and stride must be 2d!";
+    }
+    if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) {
+      MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal dilation, and dilation must be 4d!";
+    }
+    if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
+      MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel dilation only support 1 in N axis and C axis!";
+    }
+    stride_ = stride_ori[0];
+    dilation_ = dilation_ori[2];
+  }
   cudnnHandle_t cudnn_handle_;
   cudnnFilterDescriptor_t dw_desc_;
   cudnnConvolutionDescriptor_t conv_desc_;
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
index 008abcc658..da09b73792 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
@@ -118,19 +118,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
     pad_height_ = GetAttr<int>(kernel_node, "pad");
     pad_width_ = pad_height_;
     pad_mode_ = GetAttr<std::string>(kernel_node, "pad_mode");
-    auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
-    auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
-    if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
-      MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal stride, and stride must be 2d!";
-    }
-    if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) {
-      MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal dilation, and dilation must be 4d!";
-    }
-    if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
-      MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel dilation only support 1 in N axis and C axis!";
-    }
-    stride_ = stride_ori[0];
-    dilation_ = dilation_ori[2];
+    SetStrideAndDilation(kernel_node);
     cudnnTensorDescriptor_t dx_desc_real = nullptr;
     if (pad_mode_ == kSamePadModeUpperCase || pad_mode_ == kSamePadModeLowerCase) {
       SetPad(input_shape, kernel_node);
@@ -279,6 +267,21 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
                                  input_shape[2], input_shape[3]),
       "SetTensor4dDescriptor failed");
   }
+  void SetStrideAndDilation(const CNodePtr &kernel_node) {
+    auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
+    auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
+    if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
+      MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal stride, and stride must be 2d!";
+    }
+    if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) {
+      MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal dilation, and dilation must be 4d!";
+    }
+    if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
+      MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel dilation only support 1 in N axis and C axis!";
+    }
+    stride_ = stride_ori[0];
+    dilation_ = dilation_ori[2];
+  }
   cudnnHandle_t cudnn_handle_;
   cudnnFilterDescriptor_t w_desc_;
   cudnnConvolutionDescriptor_t conv_desc_;

From 9e29224af9defe334e487dc0d2f85bfa186916eb Mon Sep 17 00:00:00 2001
From: huanghui <huanghui44@huawei.com>
Date: Sun, 26 Apr 2020 10:48:44 +0800
Subject: [PATCH 071/242] move add_memcpy_async pass and transdata_split pass

---
 .../pre_activate/ascend/ascend_backend_optimization.cc      | 4 ++--
 .../ascend/{ir_fission => enhancer}/add_memcpy_async.cc     | 2 +-
 .../ascend/{ir_fission => enhancer}/add_memcpy_async.h      | 6 +++---
 .../ascend/{ir_fusion => ir_fission}/transdata_split.cc     | 2 +-
 .../ascend/{ir_fusion => ir_fission}/transdata_split.h      | 6 +++---
 .../{ir_fission => enhancer}/add_memcpy_async_test.cc       | 2 +-
 .../{ir_fusion => ir_fission}/transdata_split_test.cc       | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)
 rename mindspore/ccsrc/pre_activate/ascend/{ir_fission => enhancer}/add_memcpy_async.cc (97%)
 rename mindspore/ccsrc/pre_activate/ascend/{ir_fission => enhancer}/add_memcpy_async.h (82%)
 rename mindspore/ccsrc/pre_activate/ascend/{ir_fusion => ir_fission}/transdata_split.cc (98%)
 rename mindspore/ccsrc/pre_activate/ascend/{ir_fusion => ir_fission}/transdata_split.h (83%)
 rename tests/ut/cpp/pre_activate/ascend/{ir_fission => enhancer}/add_memcpy_async_test.cc (97%)
 rename tests/ut/cpp/pre_activate/ascend/{ir_fusion => ir_fission}/transdata_split_test.cc (99%)

diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
index ead48a7059..f213611a4d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@@ -39,7 +39,7 @@
 #include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
 #include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h"
 #include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h"
-#include "pre_activate/ascend/ir_fusion/transdata_split.h"
+#include "pre_activate/ascend/ir_fission/transdata_split.h"
 #include "pre_activate/ascend/ir_fission/topk_split.h"
 #include "pre_activate/ascend/ir_fusion/momentum_lossscale_fusion.h"
 #include "pre_activate/ascend/ir_fusion/mul_add_fusion.h"
@@ -59,7 +59,7 @@
 #include "pre_activate/ascend/format_type/check_consistency.h"
 #include "pre_activate/ascend/buffer_fusion/buffer_fusion.h"
 #include "pre_activate/ascend/format_type/deal_ref_trans_and_cast.h"
-#include "pre_activate/ascend/ir_fission/add_memcpy_async.h"
+#include "pre_activate/ascend/enhancer/add_memcpy_async.h"
 #include "pre_activate/ascend/format_type/insert_cast_for_runop.h"
 #include "pre_activate/ascend/format_type/insert_transdata_for_runop.h"
 #include "pre_activate/ascend/enhancer/getnext_memcpy_elimination.h"
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.cc b/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.cc
similarity index 97%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.cc
rename to mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.cc
index bbea944750..bb708e02a2 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fission/add_memcpy_async.h"
+#include "pre_activate/ascend/enhancer/add_memcpy_async.h"
 #include <vector>
 #include "utils/utils.h"
 #include "session/anf_runtime_algorithm.h"
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.h b/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.h
similarity index 82%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.h
rename to mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.h
index 227fc74fed..900b0fb46a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/add_memcpy_async.h
+++ b/mindspore/ccsrc/pre_activate/ascend/enhancer/add_memcpy_async.h
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADD_MEMCPY_ASYNC_H_
-#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADD_MEMCPY_ASYNC_H_
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_MEMCPY_ASYNC_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_MEMCPY_ASYNC_H_
 
 #include <memory>
 #include "pre_activate/common/optimizer.h"
@@ -28,4 +28,4 @@ class AddMemcpyAsync : public PatternProcessPass {
 };
 }  // namespace opt
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_ADD_MEMCPY_ASYNC_H_
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_ENHANCER_ADD_MEMCPY_ASYNC_H_
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc
similarity index 98%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.cc
rename to mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc
index d3990fe898..2c77794b14 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "pre_activate/ascend/ir_fusion/transdata_split.h"
+#include "pre_activate/ascend/ir_fission/transdata_split.h"
 #include <set>
 #include "pre_activate/ascend/ascend_helper.h"
 #include "session/anf_runtime_algorithm.h"
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h
similarity index 83%
rename from mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.h
rename to mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h
index 0e84c23256..f450897db1 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/transdata_split.h
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/transdata_split.h
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_TRANSDATA_SPLIT_H_
-#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_TRANSDATA_SPLIT_H_
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TRANSDATA_SPLIT_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TRANSDATA_SPLIT_H_
 #include <vector>
 #include <string>
 #include <utility>
@@ -42,4 +42,4 @@ class TransDataSplit : public Pass {
 };
 }  // namespace opt
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_TRANSDATA_SPLIT_H_
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TRANSDATA_SPLIT_H_
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/add_memcpy_async_test.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/add_memcpy_async_test.cc
similarity index 97%
rename from tests/ut/cpp/pre_activate/ascend/ir_fission/add_memcpy_async_test.cc
rename to tests/ut/cpp/pre_activate/ascend/enhancer/add_memcpy_async_test.cc
index 516bcb89f0..367ab25054 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/add_memcpy_async_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/enhancer/add_memcpy_async_test.cc
@@ -22,7 +22,7 @@
 #include "utils/utils.h"
 #include "kernel/kernel_build_info.h"
 #include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fission/add_memcpy_async.h"
+#include "pre_activate/ascend/enhancer/add_memcpy_async.h"
 
 namespace mindspore {
 namespace opt {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transdata_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc
similarity index 99%
rename from tests/ut/cpp/pre_activate/ascend/ir_fusion/transdata_split_test.cc
rename to tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc
index 19215d2f1c..b358b002a4 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/transdata_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/transdata_split_test.cc
@@ -23,7 +23,7 @@
 #define private public
 #define protected public
 #include "pre_activate/ascend/format_type/insert_trans_op.h"
-#include "pre_activate/ascend/ir_fusion/transdata_split.h"
+#include "pre_activate/ascend/ir_fission/transdata_split.h"
 #undef private
 #undef protected
 

From e0c3ec6c796472bd201bd2f51ce343858f4c6203 Mon Sep 17 00:00:00 2001
From: zhoufeng <zhoufeng54@huawei.com>
Date: Sun, 26 Apr 2020 11:02:20 +0800
Subject: [PATCH 072/242] check gcc version, check patch is found or not

---
 CMakeLists.txt      | 16 +++++++++++++++-
 cmake/package.cmake |  5 ++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7dceca7ad7..dc07ccae8b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,10 @@
 cmake_minimum_required(VERSION 3.14)
 project (MindSpore)
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0)
+    message(FATAL_ERROR "GCC vesion ${CMAKE_CXX_COMPILER_VERSION} must not be less than 7.3.0")
+endif ()
+
 include(${CMAKE_SOURCE_DIR}/cmake/options.cmake)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules/")
 
@@ -18,7 +23,16 @@ set(PYBIND11_CPP_STANDARD -std=c++17)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPTION_CXX_FLAGS}")
 
 find_package(Threads)
-find_package(Patch)
+if (DEFINED ENV{MS_PATCH_PATH})
+    find_program(Patch_EXECUTABLE patch PATHS $ENV{MS_PATCH_PATH})
+    set(Patch_FOUND ${Patch_EXECUTABLE})
+else ()
+    find_package(Patch)
+endif ()
+if (NOT Patch_FOUND)
+    message(FATAL_ERROR "Patch not found, please set env variable MS_PATCH_PATH, "
+            "usually locate in GIT_PATH/usr/bin in windows")
+endif ()
 message(PATCH_EXECUTABLE = ${Patch_EXECUTABLE})
 
 include(${CMAKE_SOURCE_DIR}/cmake/mind_expression.cmake)
diff --git a/cmake/package.cmake b/cmake/package.cmake
index f0a080e776..08919eb0e7 100644
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -153,7 +153,10 @@ endif ()
 if (CMAKE_SYSTEM_NAME MATCHES "Windows")
     get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH)
     file(GLOB CXX_LIB_LIST ${CXX_DIR}/*.dll)
-    file(GLOB VC_LIB_LIST $ENV{SystemRoot}/System32/msvcp140.dll $ENV{SystemRoot}/System32/vcomp140.dll)
+
+    string(REPLACE "\\" "/" SystemRoot $ENV{SystemRoot})
+    file(GLOB VC_LIB_LIST ${SystemRoot}/System32/msvcp140.dll ${SystemRoot}/System32/vcomp140.dll)
+
     file(GLOB JPEG_LIB_LIST ${jpeg_turbo_LIBPATH}/*.dll)
     file(GLOB SQLITE_LIB_LIST ${sqlite_LIBPATH}/*.dll)
     install(

From 5e877a7715adb10b4955e8a1f6a4d6f1c87488c9 Mon Sep 17 00:00:00 2001
From: liuxiao <liuxiao93@huawei.com>
Date: Fri, 24 Apr 2020 17:24:23 +0800
Subject: [PATCH 073/242] modify api and add example

---
 mindspore/ccsrc/transform/op_declare.cc       |  4 +--
 mindspore/nn/layer/normalization.py           |  5 ++-
 mindspore/nn/optim/adam.py                    |  3 +-
 mindspore/ops/operations/nn_ops.py            | 36 +++++++++++++++++--
 .../test_tbe_ops/test_relu_v2_grad.py         |  0
 tests/ut/python/ops/test_ops.py               |  6 ++--
 6 files changed, 42 insertions(+), 12 deletions(-)
 rename tests/st/ops/{davinci => ascend}/test_tbe_ops/test_relu_v2_grad.py (100%)

diff --git a/mindspore/ccsrc/transform/op_declare.cc b/mindspore/ccsrc/transform/op_declare.cc
index f39d7e4223..477c915b15 100644
--- a/mindspore/ccsrc/transform/op_declare.cc
+++ b/mindspore/ccsrc/transform/op_declare.cc
@@ -893,8 +893,8 @@ ATTR_MAP(TransposeD) = EMPTY_ATTR_MAP;
 
 // DropOutGenMask
 INPUT_MAP(DropOutGenMask) = {{1, INPUT_DESC(shape)}, {2, INPUT_DESC(prob)}};
-ATTR_MAP(DropOutGenMask) = {{"seed", ATTR_DESC(seed, AnyTraits<int64_t>())},
-                            {"seed2", ATTR_DESC(seed2, AnyTraits<int64_t>())}};
+ATTR_MAP(DropOutGenMask) = {{"Seed0", ATTR_DESC(seed, AnyTraits<int64_t>())},
+                            {"Seed1", ATTR_DESC(seed2, AnyTraits<int64_t>())}};
 OUTPUT_MAP(DropOutGenMask) = {{0, OUTPUT_DESC(y)}};
 
 // Pack
diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index 3ef2381ba1..2203451260 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -397,9 +397,8 @@ class LayerNorm(Cell):
         y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
 
     Args:
-        normalized_shape (Union(tuple[int], list[int]): The normalization is performed over axes
-            `begin_norm_axis ... R - 1` and centering and scaling parameters are calculated over
-            `begin_params_axis ... R - 1`.
+        normalized_shape (Union(tuple[int], list[int]): The normalization is performed over axis
+            `begin_norm_axis ... R - 1`.
         begin_norm_axis (int): It first normalization dimension: normalization will be performed along dimensions
             `begin_norm_axis: rank(inputs)`, the value should be in [-1, rank(input)). Default: -1.
         begin_params_axis (int): The first parameter(beta, gamma)dimension: scale and centering parameters
diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index 4e88c3ef93..9ae1431247 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -126,7 +126,8 @@ class Adam(Optimizer):
     Args:
         params (list[Parameter]): A list of parameter, which will be updated. The element in `params`
                                   should be class mindspore.Parameter.
-        learning_rate (float): The Learning rate.
+        learning_rate (Union[float, Tensor, Iterable]): The Learning rate.
+            Iterable type is used for the dynamic learning rate.
         beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0).
         beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0).
         eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index c03aa45490..fcb06d749f 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -490,6 +490,15 @@ class FusedBatchNorm(Primitive):
         - **updated_bias** (Tensor) - Tensor of shape :math:`(C,)`.
         - **updated_moving_mean** (Tensor) - Tensor of shape :math:`(C,)`.
         - **updated_moving_variance** (Tensor) - Tensor of shape :math:`(C,)`.
+
+    Examples:
+        >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32)
+        >>> scale = Tensor(np.ones([64]), mindspore.float32)
+        >>> bias = Tensor(np.ones([64]), mindspore.float32)
+        >>> mean = Tensor(np.ones([64]), mindspore.float32)
+        >>> variance = Tensor(np.ones([64]), mindspore.float32)
+        >>> op = P.FusedBatchNorm()
+        >>> output = op(input_x, scale, bias, mean, variance)
     """
 
     @prim_attr_register
@@ -733,10 +742,17 @@ class DepthwiseConv2dNative(PrimitiveWithInfer):
     Inputs:
         - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
         - **weight** (Tensor) - Set size of kernel is :math:`(K_1, K_2)`, then the shape is
-          :math:`(\text{channel_multiplier}, C_{in}, K_1, K_2)`.
+          :math:`(K, C_{in}, K_1, K_2)`, `K` must be 1.
 
     Outputs:
         Tensor of shape :math:`(N, C_{in} * \text{channel_multiplier}, H_{out}, W_{out})`.
+
+    Examples:
+        >>> input = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
+        >>> weight = Tensor(np.ones([1, 32, 3, 3]), mindspore.float32)
+        >>> depthwise_conv2d = P.DepthwiseConv2dNative(channel_multiplier = 3, kernel_size = (3, 3))
+        >>> output = depthwise_conv2d(input, weight)
+        >>> assert output.shape() == (10, 96, 30, 30)
     """
 
     @prim_attr_register
@@ -1655,6 +1671,15 @@ class LayerNorm(Primitive):
           The shape is :math:`(N, C)`.
         - **updated_gamma** (Tensor) - Tensor of shape :math:`(C,)`.
         - **updated_beta** (Tensor) - Tensor of shape :math:`(C,)`.
+
+    Examples:
+        >>> input_x = Tensor(np.array([[1, 2, 3], [1, 2, 3]]), mindspore.float32)
+        >>> gamma = Tensor(np.ones([3]), mindspore.float32)
+        >>> beta = Tensor(np.ones([3]), mindspore.float32)
+        >>> layer_norm = P.LayerNorm()
+        >>> output = layer_norm(input_x, gamma, beta)
+        ([[-0.22474492, 1., 2.2247488], [-0.22474492, 1., 2.2247488]],
+         [[2.], [2.]], [[0.6666667], [0.6666667]])
     """
 
     @prim_attr_register
@@ -2312,11 +2337,13 @@ class Adam(PrimitiveWithInfer):
 
     Inputs:
         - **var** (Tensor) - Weights to be updated.
-        - **m** (Tensor) - The 1st moment vector in the updating formula.
+        - **m** (Tensor) - The 1st moment vector in the updating formula. Has the same type as `var`.
         - **v** (Tensor) - the 2nd moment vector in the updating formula.
+          Mean square gradients, has the same type as `var`.
         - **beta1_power** (float) - :math:`beta_1^t` in the updating formula.
         - **beta2_power** (float) - :math:`beta_2^t` in the updating formula.
-        - **lr** (float) - :math:`l` in the updating formula.
+        - **lr** (Union[float, Tensor, Iterable]) - :math:`l` in the updating formula.
+          Iterable type is used for the dynamic learning rate.
         - **beta1** (float) - The exponential decay rate for the 1st moment estimates.
         - **beta2** (float) - The exponential decay rate for the 2nd moment estimates.
         - **epsilon** (float) - Term added to the denominator to improve numerical stability.
@@ -2328,6 +2355,9 @@ class Adam(PrimitiveWithInfer):
         - **var** (Tensor) - The same shape and data type as `var`.
         - **m** (Tensor) - The same shape and data type as `m`.
         - **v** (Tensor) - The same shape and data type as `v`.
+
+    Examples:
+        Please refer to the usage in nn.Adam.
     """
 
     @prim_attr_register
diff --git a/tests/st/ops/davinci/test_tbe_ops/test_relu_v2_grad.py b/tests/st/ops/ascend/test_tbe_ops/test_relu_v2_grad.py
similarity index 100%
rename from tests/st/ops/davinci/test_tbe_ops/test_relu_v2_grad.py
rename to tests/st/ops/ascend/test_tbe_ops/test_relu_v2_grad.py
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index d6622e76f4..72173a4393 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -793,12 +793,12 @@ test_case_nn_ops = [
         'desc_bprop': [[5, 5]]}),
     ('DepthwiseConv2dNative_1', {
         'block': P.DepthwiseConv2dNative(3, (3, 3), pad_mode="pad", pad=1, stride=2),
-        'desc_inputs': [[10, 32, 32, 32], [3, 32, 3, 3]],
-        'desc_bprop': [[10, 30, 16, 16]]}),
+        'desc_inputs': [[10, 32, 32, 32], [1, 32, 3, 3]],
+        'desc_bprop': [[10, 32, 16, 16]]}),
     ('DepthwiseConv2dNative_2', {
         'block': P.DepthwiseConv2dNative(1, (3, 3), pad_mode="same", pad=0, stride=1),
         'desc_inputs': [[2592, 2048, 4, 4], [1, 2048, 3, 3]],
-        'desc_bprop': [[2592, 2048, 2, 2]]}),
+        'desc_bprop': [[2592, 2048, 4, 4]]}),
     ('SigmoidCrossEntropyWithLogits', {
         'block': P.SigmoidCrossEntropyWithLogits(),
         'desc_inputs': [[128, 10], [128, 10]],

From ba3a1f4ffef9746eaa5d5bae06424fc2ff93c739 Mon Sep 17 00:00:00 2001
From: lichenever <lichentrue@163.com>
Date: Fri, 24 Apr 2020 15:39:38 +0800
Subject: [PATCH 074/242] change get_group to internal interface

---
 mindspore/communication/__init__.py   |  4 ++--
 mindspore/communication/management.py | 12 ++++++------
 mindspore/ops/operations/comm_ops.py  | 28 +++++++++++++--------------
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/mindspore/communication/__init__.py b/mindspore/communication/__init__.py
index 65078f6820..26acc53d91 100644
--- a/mindspore/communication/__init__.py
+++ b/mindspore/communication/__init__.py
@@ -17,12 +17,12 @@ Collective communication interface.
 """
 
 from .management import GlobalComm, init, release, get_rank, get_group_size, get_world_rank_from_group_rank, \
-    get_group_rank_from_world_rank, create_group, HCCL_WORLD_COMM_GROUP, NCCL_WORLD_COMM_GROUP, get_group, \
+    get_group_rank_from_world_rank, create_group, HCCL_WORLD_COMM_GROUP, NCCL_WORLD_COMM_GROUP, \
     get_local_rank, get_local_rank_size, destroy_group
 
 
 __all__ = [
     "GlobalComm", "init", "release", "get_rank", "get_group_size", "get_world_rank_from_group_rank",
-    "get_group_rank_from_world_rank", "create_group", "HCCL_WORLD_COMM_GROUP", "NCCL_WORLD_COMM_GROUP", "get_group",
+    "get_group_rank_from_world_rank", "create_group", "HCCL_WORLD_COMM_GROUP", "NCCL_WORLD_COMM_GROUP",
     "get_local_rank", "get_local_rank_size", "destroy_group"
 ]
diff --git a/mindspore/communication/management.py b/mindspore/communication/management.py
index 7208538a07..1cd60fe2e5 100755
--- a/mindspore/communication/management.py
+++ b/mindspore/communication/management.py
@@ -21,7 +21,7 @@ from ._comm_helper import Backend, _get_rank_helper, _get_size_helper, \
 from .._c_expression import init_hccl, finalize_hccl, init_gpu_collective
 
 
-__all__ = ["init", "release", "get_rank", "get_local_rank", "get_group_size", "get_group",
+__all__ = ["init", "release", "get_rank", "get_local_rank", "get_group_size",
            "get_local_rank_size", "get_world_rank_from_group_rank",
            "get_group_rank_from_world_rank", "create_group", "destroy_group",
            "HCCL_WORLD_COMM_GROUP", "NCCL_WORLD_COMM_GROUP"]
@@ -30,7 +30,7 @@ DEFAULT_WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP
 DEFAULT_BACKEND = Backend("hccl")
 
 
-def get_group(group):
+def _get_group(group):
     """Get the global world group if the group is default world comm group."""
     if group == DEFAULT_WORLD_COMM_GROUP:
         return GlobalComm.WORLD_COMM_GROUP
@@ -100,7 +100,7 @@ def get_rank(group=GlobalComm.WORLD_COMM_GROUP):
         ValueError: If backend is invalid.
         RuntimeError: If hccl/nccl is not available or nccl not supports.
     """
-    return _get_rank_helper(group=get_group(group), backend=GlobalComm.BACKEND)
+    return _get_rank_helper(group=_get_group(group), backend=GlobalComm.BACKEND)
 
 
 def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP):
@@ -121,7 +121,7 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP):
         ValueError: If backend is invalid.
         RuntimeError: If hccl/nccl is not available or nccl not supports.
     """
-    return _get_local_rank_helper(group=get_group(group), backend=GlobalComm.BACKEND)
+    return _get_local_rank_helper(group=_get_group(group), backend=GlobalComm.BACKEND)
 
 
 def get_group_size(group=GlobalComm.WORLD_COMM_GROUP):
@@ -139,7 +139,7 @@ def get_group_size(group=GlobalComm.WORLD_COMM_GROUP):
         ValueError: If backend is invalid.
         RuntimeError: If hccl/nccl is not available or nccl not supports.
     """
-    return _get_size_helper(group=get_group(group), backend=GlobalComm.BACKEND)
+    return _get_size_helper(group=_get_group(group), backend=GlobalComm.BACKEND)
 
 
 def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP):
@@ -160,7 +160,7 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP):
         ValueError: If backend is invalid.
         RuntimeError: If hccl/nccl is not available or nccl not supports.
     """
-    return _get_local_size_helper(group=get_group(group), backend=GlobalComm.BACKEND)
+    return _get_local_size_helper(group=_get_group(group), backend=GlobalComm.BACKEND)
 
 
 def get_world_rank_from_group_rank(group, group_rank_id):
diff --git a/mindspore/ops/operations/comm_ops.py b/mindspore/ops/operations/comm_ops.py
index fbad5b49d3..969091de97 100644
--- a/mindspore/ops/operations/comm_ops.py
+++ b/mindspore/ops/operations/comm_ops.py
@@ -17,7 +17,7 @@
 
 from ..._checkparam import Validator as validator
 from ..._checkparam import Rel
-from ...communication.management import get_rank, get_group_size, GlobalComm, get_group
+from ...communication.management import get_rank, get_group_size, GlobalComm, _get_group
 from ...common import dtype as mstype
 from ..primitive import PrimitiveWithInfer, prim_attr_register
 
@@ -88,10 +88,10 @@ class AllReduce(PrimitiveWithInfer):
             raise TypeError("The operation of AllReduce should be str.")
         if op == ReduceOp.PROD:
             raise RuntimeError("The operation of AllReduce 'prod' is not supported yet.")
-        if not isinstance(get_group(group), str):
+        if not isinstance(_get_group(group), str):
             raise TypeError("The group of AllReduce should be str.")
         self.op = op
-        self.add_prim_attr('group', get_group(group))
+        self.add_prim_attr('group', _get_group(group))
         self.add_prim_attr('fusion', 0)
 
     def vm_impl(self, x):
@@ -149,12 +149,12 @@ class AllGather(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, group=GlobalComm.WORLD_COMM_GROUP):
-        validator.check_value_type('group', get_group(group), (str,), self.name)
-        self.rank = get_rank(get_group(group))
-        self.rank_size = get_group_size(get_group(group))
+        validator.check_value_type('group', _get_group(group), (str,), self.name)
+        self.rank = get_rank(_get_group(group))
+        self.rank_size = get_group_size(_get_group(group))
         validator.check('rank', self.rank, 'rank_size', self.rank_size, Rel.LT, self.name)
         self.add_prim_attr('rank_size', self.rank_size)
-        self.add_prim_attr('group', get_group(group))
+        self.add_prim_attr('group', _get_group(group))
 
     def infer_shape(self, x_shape):
         x_shape[0] = x_shape[0] * self.rank_size
@@ -205,11 +205,11 @@ class ReduceScatter(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, op=ReduceOp.SUM, group=GlobalComm.WORLD_COMM_GROUP):
         validator.check_value_type('op', op, (type(ReduceOp.SUM),), self.name)
-        validator.check_value_type('group', get_group(group), (str,), self.name)
+        validator.check_value_type('group', _get_group(group), (str,), self.name)
         self.op = op
-        self.rank_size = get_group_size(get_group(group))
+        self.rank_size = get_group_size(_get_group(group))
         self.add_prim_attr('rank_size', self.rank_size)
-        self.add_prim_attr('group', get_group(group))
+        self.add_prim_attr('group', _get_group(group))
 
     def infer_shape(self, x_shape):
         if x_shape[0] % self.rank_size != 0:
@@ -268,8 +268,8 @@ class Broadcast(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, root_rank, group=GlobalComm.WORLD_COMM_GROUP):
         validator.check_value_type('root_rank', root_rank, (int,), self.name)
-        validator.check_value_type('group', get_group(group), (str,), self.name)
-        self.add_prim_attr('group', get_group(group))
+        validator.check_value_type('group', _get_group(group), (str,), self.name)
+        self.add_prim_attr('group', _get_group(group))
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -306,11 +306,11 @@ class _AlltoAll(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, split_count, split_dim, concat_dim, group=GlobalComm.WORLD_COMM_GROUP):
         """init AlltoAll"""
-        validator.check_value_type('group', get_group(group), (str,), self.name)
+        validator.check_value_type('group', _get_group(group), (str,), self.name)
         self.split_count = split_count
         self.split_dim = split_dim
         self.concat_dim = concat_dim
-        self.add_prim_attr('group', get_group(group))
+        self.add_prim_attr('group', _get_group(group))
 
     def infer_shape(self, x_shape):
         x_shape[self.concat_dim] = x_shape[self.concat_dim] * self.split_count

From 9b19cd8312a696d64a56bff6cb60973ae829b8db Mon Sep 17 00:00:00 2001
From: guohongzilong <2713219276@qq.com>
Date: Fri, 24 Apr 2020 13:28:25 +0800
Subject: [PATCH 075/242] add comment to save_dump_path for created real path

---
 mindspore/context.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/mindspore/context.py b/mindspore/context.py
index 159522a87a..f181dbe839 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -487,7 +487,7 @@ def reset_auto_parallel_context():
                  variable_memory_max_size=str)
 def set_context(**kwargs):
     """
-    Set context for running environment.
+    Sets context for running environment.
 
     Context should be configured before running your program. If there is no configuration,
     the "Ascend" device target will be used by default. GRAPH_MODE or
@@ -524,10 +524,12 @@ def set_context(**kwargs):
         reserve_class_name_in_scope (bool) : Whether to save the network class name in the scope. Default: True.
         enable_reduce_precision (bool): Whether to enable precision reduction. Default: True.
         enable_dump (bool): Whether to enable dump. Default: False.
-        save_dump_path (str): Set path to dump data. Default: ".".
+        save_dump_path (str): When the program is executed on Ascend, operators can dump data here.
+            The root dump path is configured in /home/HwHiAiUser/ide_daemon/ide_daemon.cfg.
+            So the real dump path is "{configured root dump path}/{`save_dump_path`}". Default: ".".
         enable_dynamic_memory (bool): Whether to enable dynamic memory. Default: False.
-        graph_memory_max_size (str): Set graph memory max size. Default: "26GB".
-        variable_memory_max_size (str): Set variable memory max size. Default: "5GB".
+        graph_memory_max_size (str): Sets graph memory max size. Default: "26GB".
+        variable_memory_max_size (str): Sets variable memory max size. Default: "5GB".
 
     Raises:
         ValueError: If input key is not an attribute in context.

From b4df04b74ccb0d3679e0a592c1bfe3aa7f138b92 Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Sun, 26 Apr 2020 02:24:00 -0400
Subject: [PATCH 076/242] fix groupnorm bug and change globalbn parameter name

---
 mindspore/nn/layer/normalization.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index 3ef2381ba1..09a0b4bb27 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -41,7 +41,7 @@ class _BatchNorm(Cell):
                  moving_mean_init='zeros',
                  moving_var_init='ones',
                  use_batch_statistics=True,
-                 group=1):
+                 device_num_each_group=1):
         super(_BatchNorm, self).__init__()
         if num_features < 1:
             raise ValueError("num_features must be at least 1")
@@ -60,7 +60,7 @@ class _BatchNorm(Cell):
             gamma_init, num_features), name="gamma", requires_grad=affine)
         self.beta = Parameter(initializer(
             beta_init, num_features), name="beta", requires_grad=affine)
-        self.group = check_int_positive(group)
+        self.group = check_int_positive(device_num_each_group)
         if self.group != 1:
             self.rank_id = get_rank()
             self.rank_size = get_group_size()
@@ -324,7 +324,7 @@ class GlobalBatchNorm(_BatchNorm):
 
     Args:
         num_features (int): `C` from an expected input of size (N, C, H, W).
-        group (int): The number of device in each group.
+        device_num_each_group (int): The number of device in each group.
         eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
         momentum (float): A floating hyperparameter of the momentum for the
             running_mean and running_var computation. Default: 0.9.
@@ -364,7 +364,7 @@ class GlobalBatchNorm(_BatchNorm):
                  moving_mean_init='zeros',
                  moving_var_init='ones',
                  use_batch_statistics=True,
-                 group=1):
+                 device_num_each_group=1):
         super(GlobalBatchNorm, self).__init__(num_features,
                                               eps,
                                               momentum,
@@ -374,8 +374,8 @@ class GlobalBatchNorm(_BatchNorm):
                                               moving_mean_init,
                                               moving_var_init,
                                               use_batch_statistics,
-                                              group)
-        self.group = check_int_positive(group)
+                                              device_num_each_group)
+        self.group = check_int_positive(device_num_each_group)
         if self.group <= 1:
             raise ValueError("the number of group must be greater than 1.")
     def _check_data_dim(self, x):
@@ -482,17 +482,17 @@ class GroupNorm(Cell):
         >>> x = Tensor(np.ones([1, 64, 256, 256], np.float32))
         >>> goup_norm_op(x)
     """
-    def __init__(self, num_groups, num_channels, eps=1e-05, affine=True):
+    def __init__(self, num_groups, num_channels, eps=1e-05, affine=True, gamma_init='ones', beta_init='zeros'):
         super(GroupNorm, self).__init__()
         self.num_groups = check_int_positive(num_groups)
         self.num_channels = check_int_positive(num_channels)
         if num_channels % num_groups != 0:
             raise ValueError("num_channels should be divided by num_groups")
-        self.eps = Tensor(check_typename('eps', eps, (float,)), mstype.float32)
+        self.eps = check_typename('eps', eps, (float,))
         self.affine = check_bool(affine)
 
-        gamma = initializer('ones', [num_channels, 1, 1], mstype.float32)
-        beta = initializer('zeros', [num_channels, 1, 1], mstype.float32)
+        gamma = initializer(gamma_init, [num_channels, 1, 1])
+        beta = initializer(beta_init, [num_channels, 1, 1])
         if self.affine:
             self.gamma = Parameter(gamma, name='gamma')
             self.beta = Parameter(beta, name='beta')

From 36a62576e84ab7218bd9650f1da2498e353e8e40 Mon Sep 17 00:00:00 2001
From: yangzhenzhang <285824651@qq.com>
Date: Sun, 26 Apr 2020 11:14:12 +0800
Subject: [PATCH 077/242] support forward graph

---
 mindspore/ccsrc/parallel/step_parallel.cc     | 173 +++++++++---------
 mindspore/ccsrc/parallel/step_parallel.h      |   3 +-
 .../ut/python/parallel/test_forward_graph.py  |  82 +++++++++
 3 files changed, 168 insertions(+), 90 deletions(-)
 create mode 100644 tests/ut/python/parallel/test_forward_graph.py

diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index 91d1461803..17a6228552 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -345,7 +345,6 @@ bool FindCommunicationOp(const std::vector<AnfNodePtr> &all_nodes) {
       continue;
     }
     auto cnode = node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
     if (!IsValueNode<Primitive>(cnode->input(0))) {
       continue;
     }
@@ -903,9 +902,15 @@ void InsertMirrorOps(const MirrorOps &mirror_ops, const CNodePtr &node) {
   }
 }
 
-void BackwardCommunication(const OperatorInfoPtr &distribute_operator, const CNodePtr &node, bool is_loss_node) {
+void BackwardCommunication(const OperatorInfoPtr &distribute_operator, const CNodePtr &node,
+                           const std::vector<std::pair<CNodePtr, CNodePtr>> &sens_loss_pairs) {
   MS_EXCEPTION_IF_NULL(distribute_operator);
   MS_EXCEPTION_IF_NULL(node);
+
+  bool is_loss_cnode =
+    std::any_of(sens_loss_pairs.begin(), sens_loss_pairs.end(),
+                [node](const std::pair<CNodePtr, CNodePtr> &element) { return element.second == node; });
+
   MirrorOps mirror_ops = distribute_operator->mirror_ops();
   VirtualDivOp virtual_div_op = distribute_operator->virtual_div_op();
   // insert mirror op
@@ -914,7 +919,7 @@ void BackwardCommunication(const OperatorInfoPtr &distribute_operator, const CNo
     InsertMirrorOps(mirror_ops, node);
   }
   // insert virtual div op
-  if (!virtual_div_op.empty() && is_loss_node) {
+  if (!virtual_div_op.empty() && is_loss_cnode) {
     MS_LOG(INFO) << "insert virtual div op for " << distribute_operator->name();
     InsertVirtualDivOp(virtual_div_op, node);
   }
@@ -986,10 +991,6 @@ StrategyPtr ExtractStrategy(std::unordered_map<std::string, ValuePtr> attrs) {
       Dimensions dim;
       if (elements[index]->isa<ValueSequeue>()) {
         ValueTuplePtr value_tuple = elements[index]->cast<ValueTuplePtr>();
-        if (value_tuple == nullptr) {
-          MS_LOG(EXCEPTION) << "Failure:value_tuple is nullptr";
-        }
-
         std::vector<ValuePtr> value_vector = value_tuple->value();
         (void)std::transform(value_vector.begin(), value_vector.end(), std::back_inserter(dim),
                              [](const ValuePtr &value) { return static_cast<int32_t>(GetValue<int>(value)); });
@@ -1013,7 +1014,6 @@ Shapes GetNodeShape(const AnfNodePtr &node) {
   BaseShapePtr base_shape_ptr = node->Shape();
   if (node->isa<CNode>()) {
     auto cnode = node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
     if (IsValueNode<Primitive>(cnode->input(0))) {
       PrimitivePtr prim = GetValueNode<PrimitivePtr>(cnode->input(0));
       MS_EXCEPTION_IF_NULL(prim);
@@ -1190,7 +1190,7 @@ std::pair<AnfNodePtr, int> FindSubGraph(const FuncGraphPtr &graph, const AnfNode
         continue;
       }
       CNodePtr graph_cnode_inp0 = graph_cnode->input(0)->cast<CNodePtr>();
-      if ((graph_cnode_inp0 == nullptr) || !IsValueNode<FuncGraph>(graph_cnode_inp0->input(1))) {
+      if (!IsValueNode<FuncGraph>(graph_cnode_inp0->input(1))) {
         continue;
       }
       FuncGraphPtr graph_sub = GetValueNode<FuncGraphPtr>(graph_cnode_inp0->input(1));
@@ -1692,14 +1692,8 @@ CNodePtr FindLossCNode(const FuncGraphPtr &func_graph) {
   return pre_cnode;
 }
 
-TensorLayouts GetLossNodeGradOutputLayout(const CNodePtr &cnode) {
-  MS_EXCEPTION_IF_NULL(cnode);
+TensorLayouts GetLossNodeGradOutputLayout(const CNodePtr &loss_cnode) {
   TensorLayouts ret;
-  if (!IsValueNode<FuncGraph>(cnode->input(1))) {
-    MS_LOG(EXCEPTION) << "Sens can't find the corresponding graph.";
-  }
-  auto func_graph = GetValueNode<FuncGraphPtr>(cnode->input(1));
-  auto loss_cnode = FindLossCNode(func_graph);
   MS_EXCEPTION_IF_NULL(loss_cnode);
   AnfNodePtr node = loss_cnode->cast<AnfNodePtr>();
   MS_EXCEPTION_IF_NULL(node);
@@ -1735,16 +1729,16 @@ TensorLayouts GetLossNodeGradOutputLayout(const CNodePtr &cnode) {
   return ret;
 }
 
-void SplitSens(const AnfNodePtr &grad_sens_node, const TensorLayout &loss_grad_layout) {
+void SplitSens(const CNodePtr &grad_sens_node, const TensorLayout &loss_grad_layout) {
   MS_EXCEPTION_IF_NULL(grad_sens_node);
-
-  auto cnode = grad_sens_node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
-  AnfNodePtr sens_tensor_node = cnode->input(1);
+  if (grad_sens_node->size() <= 1) {
+    MS_LOG(EXCEPTION) << "The size of grad sens node is smaller than 2";
+  }
+  AnfNodePtr sens_tensor_node = grad_sens_node->input(1);
   MS_EXCEPTION_IF_NULL(sens_tensor_node);
   Shapes sens_shapes = GetNodeShape(sens_tensor_node);
   if (sens_shapes.size() != 1) {
-    MS_LOG(EXCEPTION) << "SplitSens: GetNodeShape for sens_tensor_node, output size is not 1";
+    MS_LOG(EXCEPTION) << "GetNodeShape for sens_tensor_node, output size is not 1";
   }
   // If the shape of sens tensor is [] or [1], no need to split it.
   Shape sens_shape = sens_shapes[0];
@@ -1780,14 +1774,14 @@ void SplitSens(const AnfNodePtr &grad_sens_node, const TensorLayout &loss_grad_l
       sens_tensor_param->set_tensor_layout(std::make_shared<TensorLayout>(loss_grad_layout));
       return;
     }
-    MS_LOG(EXCEPTION) << "SplitSens: the type of sens node is not Tensor or Parameter, it is unsupported now.";
+    MS_LOG(EXCEPTION) << "The type of sens node is not Tensor or Parameter, it is unsupported now.";
   }
 
   // Use _GetTensorSlice operator to split the sens tensor
-  FuncGraphPtr func_graph = cnode->func_graph();  // only cnode can get the graph
+  FuncGraphPtr func_graph = grad_sens_node->func_graph();  // only cnode can get the graph
   MS_EXCEPTION_IF_NULL(func_graph);
   Operator op = CreateGetTensorSliceOp(loss_grad_layout);
-  InsertGetTensorSliceOp(op, cnode, func_graph, 1, SPLIT_SENS);
+  InsertGetTensorSliceOp(op, grad_sens_node, func_graph, 1, SPLIT_SENS);
 }
 
 void InsertForwardOps(const OperatorInfoPtr &distribute_operator, const CNodePtr &cnode) {
@@ -1853,7 +1847,6 @@ std::set<FuncGraphPtr> FindForwardGraphByRootNodes(const AnfNodeSet &root_all_no
     }
 
     auto cnode = node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
     if ((cnode->size() < 2) || !IsValueNode<Primitive>(cnode->input(0))) {
       continue;
     }
@@ -1870,55 +1863,12 @@ std::set<FuncGraphPtr> FindForwardGraphByRootNodes(const AnfNodeSet &root_all_no
   return graph_set;
 }
 
-// Sens node satisfies the following conditions: cnode(sens)-->cnode(tuple_getitem)-->cnode-->cnode(J)
-void StepSplitSens(const AnfNodePtr &node) {
-  if (!node->isa<CNode>()) {
-    return;
-  }
-
-  // cnode(sens)-->cnode(tuple_getitem)
-  auto cnode = node->cast<CNodePtr>();
-  AnfNodePtr expect_tuple_getitem = cnode->input(0);
-  MS_EXCEPTION_IF_NULL(expect_tuple_getitem);
-  if (!expect_tuple_getitem->isa<CNode>()) {
-    return;
-  }
-  auto expect_tuple_getitem_cnode = expect_tuple_getitem->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(expect_tuple_getitem_cnode);
-  if (!IsValueNode<Primitive>(expect_tuple_getitem_cnode->input(0))) {
-    return;
-  }
-  auto expect_tuple_getitem_prim = GetValueNode<PrimitivePtr>(expect_tuple_getitem_cnode->input(0));
-  if (expect_tuple_getitem_prim->name() != TUPLE_GETITEM) {
-    return;
-  }
-
-  // cnode(sens)-->cnode(tuple_getitem)-->cnode
-  AnfNodePtr expect_anonymous = expect_tuple_getitem_cnode->input(1);
-  MS_EXCEPTION_IF_NULL(expect_anonymous);
-  if (!expect_anonymous->isa<CNode>()) {
-    return;
-  }
-
-  // cnode(sens)-->cnode(tuple_getitem)-->cnode-->cnode(J)
-  auto expect_anonymous_cnode = expect_anonymous->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(expect_anonymous_cnode);
-  AnfNodePtr expect_j = expect_anonymous_cnode->input(0);
-  MS_EXCEPTION_IF_NULL(expect_j);
-  if (!expect_j->isa<CNode>()) {
-    return;
-  }
-  auto expect_j_cnode = expect_j->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(expect_j_cnode);
-  if (!IsValueNode<Primitive>(expect_j_cnode->input(0))) {
-    return;
-  }
-  auto expect_j_prim = GetValueNode<PrimitivePtr>(expect_j_cnode->input(0));
-  if (expect_j_prim->name() == J) {
-    auto loss_grad_layout = GetLossNodeGradOutputLayout(expect_j_cnode);
-    if (!loss_grad_layout.empty()) {
-      SplitSens(node, loss_grad_layout[0]);
-    }
+void StepSplitSens(const std::pair<CNodePtr, CNodePtr> &sens_loss_pair) {
+  CNodePtr sens_node = sens_loss_pair.first;
+  CNodePtr loss_node = sens_loss_pair.second;
+  auto loss_grad_layout = GetLossNodeGradOutputLayout(loss_node);
+  if (!loss_grad_layout.empty()) {
+    SplitSens(sens_node, loss_grad_layout[0]);
   }
 }
 
@@ -1937,26 +1887,77 @@ std::vector<CNodePtr> FindLossCNodeFromRoot(const FuncGraphPtr &root) {
   return loss_node;
 }
 
+// Sens node satisfies the following conditions: cnode(sens)-->cnode(tuple_getitem)-->cnode-->cnode(J)
+std::vector<std::pair<CNodePtr, CNodePtr>> GetSensLossPairs(const FuncGraphPtr &root) {
+  MS_EXCEPTION_IF_NULL(root);
+  std::vector<std::pair<CNodePtr, CNodePtr>> sens_loss_pairs;
+  for (auto &node : root->nodes()) {
+    if (!node->isa<CNode>()) {
+      continue;
+    }
+
+    // cnode(sens)-->cnode(tuple_getitem)
+    auto sens_cnode = node->cast<CNodePtr>();
+    AnfNodePtr expect_tuple_getitem = sens_cnode->input(0);
+    MS_EXCEPTION_IF_NULL(expect_tuple_getitem);
+    if (!expect_tuple_getitem->isa<CNode>()) {
+      continue;
+    }
+
+    auto expect_tuple_getitem_cnode = expect_tuple_getitem->cast<CNodePtr>();
+    if (!IsSomePrimitive(expect_tuple_getitem_cnode, TUPLE_GETITEM)) {
+      continue;
+    }
+
+    // cnode(sens)-->cnode(tuple_getitem)-->cnode
+    AnfNodePtr expect_anonymous = expect_tuple_getitem_cnode->input(1);
+    MS_EXCEPTION_IF_NULL(expect_anonymous);
+    if (!expect_anonymous->isa<CNode>()) {
+      continue;
+    }
+
+    // cnode(sens)-->cnode(tuple_getitem)-->cnode-->cnode(J)
+    auto expect_anonymous_cnode = expect_anonymous->cast<CNodePtr>();
+    AnfNodePtr expect_j = expect_anonymous_cnode->input(0);
+    MS_EXCEPTION_IF_NULL(expect_j);
+    if (!expect_j->isa<CNode>()) {
+      continue;
+    }
+    auto expect_j_cnode = expect_j->cast<CNodePtr>();
+    if (!IsSomePrimitive(expect_j_cnode, J)) {
+      continue;
+    }
+
+    if (!IsValueNode<FuncGraph>(expect_j_cnode->input(1))) {
+      MS_LOG(EXCEPTION) << "Sens can't find the corresponding graph.";
+    }
+    auto func_graph = GetValueNode<FuncGraphPtr>(expect_j_cnode->input(1));
+    auto loss_cnode = FindLossCNode(func_graph);
+    std::pair<CNodePtr, CNodePtr> sens_loss_pair = std::make_pair(sens_cnode, loss_cnode);
+    sens_loss_pairs.push_back(sens_loss_pair);
+  }
+  return sens_loss_pairs;
+}
+
 void ParallelCommunication(const FuncGraphPtr &root, const std::vector<AnfNodePtr> &all_nodes,
                            const FuncGraphManagerPtr &manager) {
   MS_EXCEPTION_IF_NULL(root);
   MS_EXCEPTION_IF_NULL(manager);
   TensorRedistribution tensor_redistribution;
-  AnfNodePtr grad_sens_node = nullptr;
 
-  std::vector<CNodePtr> loss_cnode = FindLossCNodeFromRoot(root);
+  std::vector<std::pair<CNodePtr, CNodePtr>> sens_loss_pairs = GetSensLossPairs(root);
+  bool has_backward = !sens_loss_pairs.empty();
   // split sens must before inserting the operators.
-  for (auto &node : all_nodes) {
+  for (auto &pair : sens_loss_pairs) {
     // If the shape of grad-sens tensor is not [] or [1], use get tensor slice to handel it.
     // If the type of sens node is not Tensor, it is unsupported now, do nothing default.
-    StepSplitSens(node);
+    StepSplitSens(pair);
   }
 
   for (auto &node : all_nodes) {
     MS_EXCEPTION_IF_NULL(node);
     if (node->isa<CNode>()) {
       auto cnode = node->cast<CNodePtr>();
-      MS_EXCEPTION_IF_NULL(cnode);
       if (!IsValueNode<Primitive>(cnode->input(0))) {
         continue;
       }
@@ -1965,11 +1966,6 @@ void ParallelCommunication(const FuncGraphPtr &root, const std::vector<AnfNodePt
         continue;
       }
 
-      bool is_loss_cnode = false;
-      auto iter = std::find(loss_cnode.begin(), loss_cnode.end(), cnode);
-      if (iter != loss_cnode.end()) {
-        is_loss_cnode = true;
-      }
       // insert forward ops
       InsertForwardOps(distribute_operator, cnode);
 
@@ -1977,7 +1973,9 @@ void ParallelCommunication(const FuncGraphPtr &root, const std::vector<AnfNodePt
       StepRedistribution(cnode, distribute_operator, cnode, tensor_redistribution, cnode);
 
       // insert backward ops
-      BackwardCommunication(distribute_operator, cnode, is_loss_cnode);
+      if (has_backward) {
+        BackwardCommunication(distribute_operator, cnode, sens_loss_pairs);
+      }
 
       // StepReplace
       StepReplace(distribute_operator, cnode);
@@ -2099,7 +2097,6 @@ void SetForwardFlag(const std::vector<AnfNodePtr> &all_nodes) {
       continue;
     }
     auto cnode = node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
     if (!IsValueNode<Primitive>(cnode->input(0))) {
       continue;
     }
@@ -2117,7 +2114,6 @@ void SetForwardFlag(const AnfNodeSet &all_nodes) {
       continue;
     }
     auto cnode = node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
     if (!IsValueNode<Primitive>(cnode->input(0))) {
       continue;
     }
@@ -2146,7 +2142,6 @@ std::vector<AnfNodePtr> FindRootForwardCNode(const FuncGraphPtr &graph, const An
       continue;
     }
     auto cnode = node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
     auto root_node_id = node->UniqueIdThroughCopy();
     if (loss_cnode_id == root_node_id) {
       root_forward_nodes = DeepLinkedGraphSearch(cnode);
diff --git a/mindspore/ccsrc/parallel/step_parallel.h b/mindspore/ccsrc/parallel/step_parallel.h
index b0d128f515..745794912b 100644
--- a/mindspore/ccsrc/parallel/step_parallel.h
+++ b/mindspore/ccsrc/parallel/step_parallel.h
@@ -82,7 +82,8 @@ std::pair<bool, CNodePtr> FindCNode(const AnfNodePtr &anode, const std::string &
 
 void InsertMirrorOps(const MirrorOps &mirror_ops, const CNodePtr &node);
 
-void BackwardCommunication(const OperatorInfoPtr &distribute_operator, const CNodePtr &node, bool is_loss_node);
+void BackwardCommunication(const OperatorInfoPtr &distribute_operator, const CNodePtr &node,
+                           const std::vector<std::pair<CNodePtr, CNodePtr>> &sens_loss_pairs);
 
 // Generate and init parallel operator
 OperatorInfoPtr OperatorInstance(const PrimitivePtr &prim, const PrimitiveAttrs &attrs,
diff --git a/tests/ut/python/parallel/test_forward_graph.py b/tests/ut/python/parallel/test_forward_graph.py
new file mode 100644
index 0000000000..76cd5b4178
--- /dev/null
+++ b/tests/ut/python/parallel/test_forward_graph.py
@@ -0,0 +1,82 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import numpy as np
+import mindspore as ms
+from mindspore import context, Tensor, Parameter
+from mindspore.nn import Cell
+from mindspore.ops import operations as P
+from mindspore.common.api import _executor
+
+
+class Net(Cell):
+    def __init__(self, mul_weight, strategy1=None, strategy2=None):
+        super().__init__()
+        self.mul = P.Mul().set_strategy(strategy1)
+        self.neg = P.Neg().set_strategy(strategy2)
+        self.mul_weight = Parameter(mul_weight, "w1")
+
+    def construct(self, x, b):
+        out = self.mul(x, self.mul_weight)
+        out = self.neg(out)
+        return out, b
+
+
+_x = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
+_w1 = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
+_b = Tensor(np.ones([128, 64, 32]), dtype=ms.float32)
+
+
+def compile(net):
+    _executor.compile(net, _x,  _b)
+    context.reset_auto_parallel_context()
+
+
+def test_forward_graph_data_parallel():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
+    strategy1 = ((16, 1, 1), (16, 1, 1))
+    strategy2 = ((16, 1, 1), )
+    net = Net(_w1, strategy1, strategy2)
+    compile(net)
+
+
+def test_forward_graph_model_parallel():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
+    strategy1 = ((1, 1, 16), (1, 1, 16))
+    strategy2 = ((1, 1, 16), )
+    net = Net(_w1, strategy1, strategy2)
+    compile(net)
+
+
+def test_forward_graph_hybrid_parallel():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
+    strategy1 = ((2, 2, 4), (2, 2, 4))
+    strategy2 = ((2, 2, 4), )
+    net = Net(_w1, strategy1, strategy2)
+    compile(net)
+
+
+def test_forward_graph_auto_parallel():
+    context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
+    net = Net(_w1)
+    compile(net)
+
+
+def test_forward_graph_repeat_calc():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
+    strategy1 = ((2, 2, 4), (2, 2, 4))
+    strategy2 = ((1, 2, 2), )
+    net = Net(_w1, strategy1, strategy2)
+    compile(net)
+

From 664f2628e5576a5a104a5b2493eb362bb4550b8e Mon Sep 17 00:00:00 2001
From: limingqi107 <limingqi@huawei.com>
Date: Sun, 26 Apr 2020 14:55:44 +0800
Subject: [PATCH 078/242] optimize gpu allReduce alloc memory performance

---
 .../ccsrc/device/gpu/gpu_kernel_runtime.cc    | 43 ++++++++++++++-----
 .../ccsrc/device/gpu/gpu_kernel_runtime.h     |  3 ++
 mindspore/ccsrc/device/memory_manager.cc      |  2 +-
 .../ccsrc/session/anf_runtime_algorithm.cc    |  8 ----
 4 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
index b3b364b00c..7eea5501d5 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
@@ -261,8 +261,7 @@ void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph
   auto &kernels = graph->execution_order();
   for (auto &kernel : kernels) {
     MS_EXCEPTION_IF_NULL(kernel);
-    auto kernel_name = AnfAlgo::GetCNodeName(kernel);
-    if (kernel_name == kAllReduceOpName) {
+    if (AnfAlgo::IsCommunicationOp(kernel)) {
       AllocCommunicationOpInputDynamicRes(kernel);
       AllocCommunicationOpOutputDynamicRes(kernel);
     }
@@ -272,27 +271,31 @@ void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph
 void GPUKernelRuntime::AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
   MS_EXCEPTION_IF_NULL(mem_manager_);
+  bool is_need_alloc_memory = false;
+  bool is_need_free_memory = false;
   size_t total_size = 0;
   std::vector<size_t> size_list;
   DeviceAddressPtrList addr_list;
   for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(kernel); ++i) {
     auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i);
     MS_EXCEPTION_IF_NULL(device_address);
-    // The inputs of communication kernel are not released.
-    if (device_address->ptr_ != nullptr) {
-      MS_LOG(INFO) << "The inputs of communication kernel are not released.";
-      mem_manager_->FreeMemFromMemPool(device_address);
+    if (device_address->ptr_ == nullptr) {
+      is_need_alloc_memory = true;
+    } else {
+      is_need_free_memory = true;
     }
     total_size += device_address->size_;
     size_list.emplace_back(device_address->size_);
     addr_list.emplace_back(device_address);
   }
-  mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list);
+  AllocCommunicationOpMemory(is_need_alloc_memory, is_need_free_memory, addr_list, total_size, size_list);
 }
 
 void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel) {
   MS_EXCEPTION_IF_NULL(kernel);
   MS_EXCEPTION_IF_NULL(mem_manager_);
+  bool is_need_alloc_memory = false;
+  bool is_need_free_memory = false;
   size_t total_size = 0;
   std::vector<size_t> size_list;
   DeviceAddressPtrList addr_list;
@@ -302,15 +305,33 @@ void GPUKernelRuntime::AllocCommunicationOpOutputDynamicRes(const mindspore::Anf
   for (size_t i = 0; i < output_sizes.size(); ++i) {
     auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i);
     MS_EXCEPTION_IF_NULL(device_address);
-    // The outputs of communication kernel are not released.
-    if (device_address->ptr_ != nullptr) {
-      MS_LOG(INFO) << "The outputs of communication kernel are not released.";
-      mem_manager_->FreeMemFromMemPool(device_address);
+    if (device_address->ptr_ == nullptr) {
+      is_need_alloc_memory = true;
+    } else {
+      is_need_free_memory = true;
     }
     total_size += output_sizes[i];
     size_list.emplace_back(output_sizes[i]);
     addr_list.emplace_back(device_address);
   }
+  AllocCommunicationOpMemory(is_need_alloc_memory, is_need_free_memory, addr_list, total_size, size_list);
+}
+
+void GPUKernelRuntime::AllocCommunicationOpMemory(bool is_need_alloc_memory, bool is_need_free_memory,
+                                                  const DeviceAddressPtrList addr_list, size_t total_size,
+                                                  std::vector<size_t> size_list) {
+  if (!is_need_alloc_memory) {
+    return;
+  }
+  if (is_need_free_memory) {
+    for (const auto &iter : addr_list) {
+      MS_EXCEPTION_IF_NULL(iter);
+      // Free the inputs/outputs of communication kernel which are not released.
+      if (iter->ptr_ != nullptr) {
+        mem_manager_->FreeMemFromMemPool(iter);
+      }
+    }
+  }
   mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list);
 }
 
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
index 33d4b4be70..6f0eefc27a 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.h
@@ -58,6 +58,9 @@ class GPUKernelRuntime : public KernelRuntime {
   void AllocCommunicationOpDynamicRes(const session::KernelGraph *graph);
   void AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel);
   void AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel);
+  void AllocCommunicationOpMemory(bool is_need_alloc_memory, bool is_need_free_memory,
+                                  const DeviceAddressPtrList addr_list, size_t total_size,
+                                  std::vector<size_t> size_list);
   void FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, const AddressPtrList &kernel_workspaces,
                             uint32_t graph_id);
   std::unordered_map<uint32_t, MemReuseUtilPtr> mem_reuse_util_map_;
diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc
index dce54495b0..8dd8dfb5e0 100644
--- a/mindspore/ccsrc/device/memory_manager.cc
+++ b/mindspore/ccsrc/device/memory_manager.cc
@@ -172,7 +172,7 @@ void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList ad
                                                    std::vector<size_t> size_list) {
   auto device_ptr_list = MallocContinuousMemFromMemPool(total_size, size_list);
   if (addr_list.size() != device_ptr_list.size()) {
-    MS_LOG(EXCEPTION) << "The size of device list is not equal  to the size of address list.";
+    MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list.";
   }
   for (size_t i = 0; i < addr_list.size(); i++) {
     MS_EXCEPTION_IF_NULL(device_ptr_list[i]);
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
index 3d5be5298a..e1a18d95da 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
@@ -514,10 +514,6 @@ const DeviceAddress *AnfRuntimeAlgorithm::GetOutputAddr(const AnfNodePtr &node,
       MS_LOG(EXCEPTION) << node->DebugString() << "Invalid nop node";
     }
   }
-  if (output_idx > GetOutputTensorNum(node)) {
-    MS_LOG(EXCEPTION) << "The index [" << output_idx << "] is out of range of the node's output size [ "
-                      << GetOutputTensorNum(node) << "#node:[ " << node->DebugString() << "]";
-  }
   auto kernel_info = node->kernel_info();
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetOutputAddr(output_idx);
@@ -539,10 +535,6 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableOutputAddr(const AnfNodePtr &nod
       MS_LOG(EXCEPTION) << node->DebugString() << "Invalid nop node.";
     }
   }
-  if (output_idx > GetOutputTensorNum(node)) {
-    MS_LOG(EXCEPTION) << "The index [" << output_idx << "] is out of range of the node's output size [ "
-                      << GetOutputTensorNum(node) << "#node:[ " << node->DebugString() << "]";
-  }
   auto kernel_info = node->kernel_info();
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetMutableOutputAddr(output_idx);

From 207822943607df38591a6bada09685742c732709 Mon Sep 17 00:00:00 2001
From: fary86 <fary.fanrui@huawei.com>
Date: Tue, 14 Apr 2020 01:29:08 +0800
Subject: [PATCH 079/242] Add prim name to error message for array_ops

---
 mindspore/_checkparam.py                    | 244 +-----------
 mindspore/ccsrc/optimizer/ad/dfunctor.cc    |   2 +-
 mindspore/nn/layer/pooling.py               |  11 +-
 mindspore/ops/operations/array_ops.py       | 398 +++++++++-----------
 tests/ut/python/ops/test_array_ops_check.py | 159 ++++++++
 tests/ut/python/ops/test_tensor_slice.py    |   4 +-
 tests/vm_impl/vm_me.py                      |  12 +-
 7 files changed, 358 insertions(+), 472 deletions(-)
 create mode 100755 tests/ut/python/ops/test_array_ops_check.py

diff --git a/mindspore/_checkparam.py b/mindspore/_checkparam.py
index 707ca748b4..78288ad090 100644
--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@@ -210,7 +210,7 @@ class Validator:
                 type_names = []
                 for t in valid_values:
                     type_names.append(str(t))
-                types_info = '[' + ", ".join(type_names) + ']'
+                types_info = '[' + ', '.join(type_names) + ']'
                 raise TypeError(f'For \'{prim_name}\' type of `{arg_key}` should be in {types_info},'
                                 f' but got {elem_type}.')
             return (arg_key, elem_type)
@@ -320,224 +320,6 @@ class Validator:
         raise TypeError(f"{msg_prefix} `{arg_name}` must be float.")
 
 
-class ParamValidator:
-    """Parameter validator. NOTICE: this class will be replaced by `class Validator`"""
-
-    @staticmethod
-    def equal(arg_name, arg_value, cond_str, cond):
-        """Judging valid value."""
-        if not cond:
-            raise ValueError(f'The `{arg_name}` must be {cond_str}, but got {arg_value}.')
-
-    @staticmethod
-    def check(arg_name, arg_value, value_name, value, rel=Rel.EQ):
-        """This method is only used for check int values, since when compare float values,
-        we need consider float error."""
-        rel_fn = Rel.get_fns(rel)
-        if not rel_fn(arg_value, value):
-            rel_str = Rel.get_strs(rel).format(f'{value_name}: {value}')
-            raise ValueError(f'The `{arg_name}` should be {rel_str}, but got {arg_value}.')
-
-    @staticmethod
-    def check_integer(arg_name, arg_value, value, rel):
-        """Integer value judgment."""
-        rel_fn = Rel.get_fns(rel)
-        type_mismatch = not isinstance(arg_value, int) or isinstance(arg_value, bool)
-        if type_mismatch or not rel_fn(arg_value, value):
-            rel_str = Rel.get_strs(rel).format(value)
-            raise ValueError(f'The `{arg_name}` should be an int and must {rel_str}, but got {arg_value}.')
-        return arg_value
-
-    @staticmethod
-    def check_shape_length(arg_name, arg_value, value, rel):
-        """Shape length judgment."""
-        rel_fn = Rel.get_fns(rel)
-        type_mismatch = not isinstance(arg_value, int)
-        if type_mismatch or not rel_fn(arg_value, value):
-            rel_str = Rel.get_strs(rel).format(value)
-            raise ValueError(f'The length of `{arg_name}` should be an int and must {rel_str}, but got {arg_value}')
-        return arg_value
-
-    @staticmethod
-    def check_int_range(arg_name, arg_value, lower_limit, upper_limit, rel):
-        """This method is only used for check int values,
-        since when compare float values, we need consider float error."""
-        rel_fn = Rel.get_fns(rel)
-        type_mismatch = not isinstance(arg_value, int)
-        if type_mismatch or not rel_fn(arg_value, lower_limit, upper_limit):
-            rel_str = Rel.get_strs(rel).format(lower_limit, upper_limit)
-            raise ValueError(f'The `{arg_name}` should be an int in range {rel_str}, but got {arg_value}.')
-        return arg_value
-
-    @staticmethod
-    def check_isinstance(arg_name, arg_value, classes):
-        """Check arg isinstance of classes"""
-        if not isinstance(arg_value, classes):
-            raise ValueError(f'The `{arg_name}` should be isinstance of {classes}, but got {arg_value}.')
-        return arg_value
-
-    @staticmethod
-    def check_number_range(arg_name, arg_value, lower_limit, upper_limit, rel):
-        """Is it necessary to consider error when comparing float values."""
-        rel_fn = Rel.get_fns(rel)
-        if not rel_fn(arg_value, lower_limit, upper_limit):
-            rel_str = Rel.get_strs(rel).format(lower_limit, upper_limit)
-            raise ValueError(f'The `{arg_name}` should be in range {rel_str}, but got {arg_value}.')
-        return arg_value
-
-    @staticmethod
-    def check_subclass(arg_name, type_, template_type, with_type_of=True):
-        """Check whether some type is subclass of another type"""
-        if not isinstance(template_type, Iterable):
-            template_type = (template_type,)
-        if not any([mstype.issubclass_(type_, x) for x in template_type]):
-            type_str = (type(type_).__name__ if isinstance(type_, (tuple, list)) else "") + str(type_)
-            raise TypeError(f'The {"type of" if with_type_of else ""} `{arg_name}` should be subclass'
-                            f' of {",".join((str(x) for x in template_type))}, but got {type_str}.')
-
-    @staticmethod
-    def check_args_tensor(args):
-        """Check whether args are all tensor."""
-        if not isinstance(args, dict):
-            raise TypeError("The args should be a dict.")
-        for arg, value in args.items():
-            ParamValidator.check_subclass(arg, value, mstype.tensor)
-
-    @staticmethod
-    def check_bool(arg_name, arg_value):
-        """Check arg isinstance of bool"""
-        if not isinstance(arg_value, bool):
-            raise ValueError(f'The `{arg_name}` should be isinstance of bool, but got {arg_value}.')
-        return arg_value
-
-    @staticmethod
-    def check_type(arg_name, arg_value, valid_types):
-        """Type checking."""
-        def raise_error_msg():
-            """func for raising error message when check failed"""
-            type_names = [t.__name__ for t in valid_types]
-            num_types = len(valid_types)
-            raise TypeError(f'The type of `{arg_name}` should be {"one of " if num_types > 1 else ""}'
-                            f'{type_names if num_types > 1 else type_names[0]}, but got {type(arg_value).__name__}.')
-
-        if isinstance(arg_value, type(mstype.tensor)):
-            arg_value = arg_value.element_type()
-        # Notice: bool is subclass of int, so `check_type('x', True, [int])` will check fail, and
-        #         `check_type('x', True, [bool, int])` will check pass
-        if isinstance(arg_value, bool) and bool not in tuple(valid_types):
-            raise_error_msg()
-        if isinstance(arg_value, tuple(valid_types)):
-            return arg_value
-        raise_error_msg()
-
-    @staticmethod
-    def check_typename(arg_name, arg_type, valid_types):
-        """Does it contain the _name_ attribute."""
-
-        def get_typename(t):
-            return t.__name__ if hasattr(t, '__name__') else str(t)
-
-        if isinstance(arg_type, type(mstype.tensor)):
-            arg_type = arg_type.element_type()
-
-        if arg_type in valid_types:
-            return arg_type
-        type_names = [get_typename(t) for t in valid_types]
-        if len(valid_types) == 1:
-            raise ValueError(f'The type of `{arg_name}` should be {type_names[0]},'
-                             f' but got {get_typename(arg_type)}.')
-        raise ValueError(f'The type of `{arg_name}` should be one of {type_names},'
-                         f' but got {get_typename(arg_type)}.')
-
-    @staticmethod
-    def check_string(arg_name, arg_value, valid_values):
-        """String type judgment."""
-        if isinstance(arg_value, str) and arg_value in valid_values:
-            return arg_value
-        if len(valid_values) == 1:
-            raise ValueError(f'The `{arg_name}` should be str and must be {valid_values[0]},'
-                             f' but got {arg_value}.')
-        raise ValueError(f'The `{arg_name}` should be str and must be one of {valid_values},'
-                         f' but got {arg_value}.')
-
-    @staticmethod
-    def check_type_same(args, valid_values):
-        """Determine whether the types are the same."""
-        name = list(args.keys())[0]
-        value = list(args.values())[0]
-        if isinstance(value, type(mstype.tensor)):
-            value = value.element_type()
-        for arg_name, arg_value in args.items():
-            if isinstance(arg_value, type(mstype.tensor)):
-                arg_value = arg_value.element_type()
-
-            if arg_value not in valid_values:
-                raise TypeError(f'The `{arg_name}` should be in {valid_values},'
-                                f' but `{arg_name}` is {arg_value}.')
-            if arg_value != value:
-                raise TypeError(f'`{arg_name}` should be same as `{name}`,'
-                                f' but `{arg_name}` is {arg_value}, `{name}` is {value}.')
-
-    @staticmethod
-    def check_two_types_same(arg1_name, arg1_type, arg2_name, arg2_type):
-        """Determine whether the types of two variables are the same."""
-        if arg1_type != arg2_type:
-            raise TypeError(f'The type of `{arg1_name}` and `{arg2_name}` should be same.')
-
-    @staticmethod
-    def check_value_on_integer(arg_name, arg_value, value, rel):
-        """Judging integer type."""
-        rel_fn = Rel.get_fns(rel)
-        type_match = isinstance(arg_value, int)
-        if type_match and (not rel_fn(arg_value, value)):
-            rel_str = Rel.get_strs(rel).format(value)
-            raise ValueError(f'The `{arg_name}` should be an int and must {rel_str}, but got {arg_value}.')
-        return arg_value
-
-    @staticmethod
-    def check_param_equal(param1_name, param1_value, param2_name, param2_value):
-        """Judging the equality of parameters."""
-        if param1_value != param2_value:
-            raise ValueError(f"`{param1_name}` must equal `{param2_name}`,"
-                             f" but got `{param1_name}` = {param1_value},"
-                             f" `{param2_name}` = {param2_value}.")
-
-    @staticmethod
-    def check_const_input(arg_name, arg_value):
-        """Check valid value."""
-        if arg_value is None:
-            raise ValueError(f'The `{arg_name}` must be a const input, but got {arg_value}.')
-
-    @staticmethod
-    def check_float_positive(arg_name, arg_value):
-        """Float type judgment."""
-        if isinstance(arg_value, float):
-            if arg_value > 0:
-                return arg_value
-            raise ValueError(f"The `{arg_name}` must be positive, but got {arg_value}.")
-
-        raise TypeError(f"`{arg_name}` must be float!")
-
-    @staticmethod
-    def check_pad_value_by_mode(op_name, pad_mode, padding):
-        """Validate value of padding according to pad_mode"""
-        if pad_mode != 'pad' and padding != 0:
-            raise ValueError(f"For op '{op_name}', padding must be zero when pad_mode is '{pad_mode}'.")
-        return padding
-
-    @staticmethod
-    def check_empty_shape_input(arg_name, arg_value):
-        """Check zeros value."""
-        if 0 in arg_value:
-            raise ValueError(f"Input `{arg_name}` cannot be empty.")
-
-    @staticmethod
-    def check_scalar_shape_input(arg_name, arg_value):
-        """Check scalar shape input."""
-        if arg_value != []:
-            raise ValueError(f"Input `{arg_name}` shape should be (). got {arg_value}")
-
-
 def check_int(input_param):
     """Int type judgment."""
     if isinstance(input_param, int) and not isinstance(input_param, bool):
@@ -653,30 +435,6 @@ def check_output_data(data):
         raise RuntimeError('Executor return data ' + str(data) + ', please check your net or input data.')
 
 
-def check_axis_type_int(axis):
-    """Check axis type."""
-    if not isinstance(axis, int):
-        raise TypeError('Wrong type for axis, should be int.')
-
-
-def check_axis_range(axis, rank):
-    """Check axis range."""
-    if not -rank <= axis < rank:
-        raise ValueError('The axis should be in range [{}, {}),'' but got {}.'.format(-rank, rank, axis))
-
-
-def check_attr_int(attr_name, attr):
-    """Check int type."""
-    if not isinstance(attr, int):
-        raise TypeError("The attr {} should be int, but got {}.".format(attr_name, type(attr)))
-
-
-def check_t_in_range(t):
-    """Check input range."""
-    if t not in (mstype.float16, mstype.float32, mstype.float64, mstype.int32, mstype.int64):
-        raise ValueError("The param T should be (float16, float32, float64, int32, int64).")
-
-
 once = _expand_tuple(1)
 twice = _expand_tuple(2)
 triple = _expand_tuple(3)
diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.cc b/mindspore/ccsrc/optimizer/ad/dfunctor.cc
index 3e1aa6e555..33f919e2ac 100644
--- a/mindspore/ccsrc/optimizer/ad/dfunctor.cc
+++ b/mindspore/ccsrc/optimizer/ad/dfunctor.cc
@@ -175,7 +175,7 @@ AdjointPtr DFunctor::MapMorphism(const AnfNodePtr &morph) {
   UpdateAdjoint(node_adjoint);
   anfnode_to_adjoin_[morph] = node_adjoint;
   if (cnode_morph->stop_gradient()) {
-    MS_LOG(WARNING) << "MapMorphism node " << morph->ToString() << " is stopped.";
+    MS_LOG(DEBUG) << "MapMorphism node " << morph->ToString() << " is stopped.";
     return node_adjoint;
   }
 
diff --git a/mindspore/nn/layer/pooling.py b/mindspore/nn/layer/pooling.py
index 6cf06de029..0569a8ada6 100644
--- a/mindspore/nn/layer/pooling.py
+++ b/mindspore/nn/layer/pooling.py
@@ -19,7 +19,6 @@ from mindspore._checkparam import Validator as validator
 from ... import context
 from ..cell import Cell
 from ..._checkparam import Rel
-from ..._checkparam import ParamValidator
 
 
 class _PoolNd(Cell):
@@ -265,11 +264,11 @@ class AvgPool1d(_PoolNd):
                  stride=1,
                  pad_mode="valid"):
         super(AvgPool1d, self).__init__(kernel_size, stride, pad_mode)
-        ParamValidator.check_type('kernel_size', kernel_size, [int,])
-        ParamValidator.check_type('stride', stride, [int,])
-        self.pad_mode = ParamValidator.check_string('pad_mode', pad_mode.upper(), ['VALID', 'SAME'])
-        ParamValidator.check_integer("kernel_size", kernel_size, 1, Rel.GE)
-        ParamValidator.check_integer("stride", stride, 1, Rel.GE)
+        validator.check_value_type('kernel_size', kernel_size, [int], self.cls_name)
+        validator.check_value_type('stride', stride, [int], self.cls_name)
+        self.pad_mode = validator.check_string('pad_mode', pad_mode.upper(), ['VALID', 'SAME'], self.cls_name)
+        validator.check_integer("kernel_size", kernel_size, 1, Rel.GE, self.cls_name)
+        validator.check_integer("stride", stride, 1, Rel.GE, self.cls_name)
         self.kernel_size = (1, kernel_size)
         self.stride = (1, stride)
         self.avg_pool = P.AvgPool(ksize=self.kernel_size,
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index 67f86bf87b..2638658357 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -24,7 +24,7 @@ import itertools
 import numbers
 import numpy as np
 
-from ..._checkparam import ParamValidator as validator
+from ..._checkparam import Validator as validator
 from ..._checkparam import Rel
 from ...common import dtype as mstype
 from ...common.tensor import Tensor
@@ -32,12 +32,12 @@ from ..operations.math_ops import _infer_shape_reduce
 from .._utils import _get_concat_offset
 from ..primitive import Primitive, PrimitiveWithInfer, prim_attr_register
 
-def _check_infer_attr_reduce(axis, keep_dims):
-    validator.check_type('keep_dims', keep_dims, [bool])
-    validator.check_type('axis', axis, [int, tuple])
+def _check_infer_attr_reduce(axis, keep_dims, prim_name):
+    validator.check_value_type('keep_dims', keep_dims, [bool], prim_name)
+    validator.check_value_type('axis', axis, [int, tuple], prim_name)
     if isinstance(axis, tuple):
         for index, value in enumerate(axis):
-            validator.check_type('axis[%d]' % index, value, [int])
+            validator.check_value_type('axis[%d]' % index, value, [int], prim_name)
 
 
 class ExpandDims(PrimitiveWithInfer):
@@ -74,13 +74,11 @@ class ExpandDims(PrimitiveWithInfer):
         self.init_prim_io_names(inputs=['x', 'axis'], outputs=['output'])
 
     def __infer__(self, x, axis):
-        validator.check_subclass("input_x", x['dtype'], mstype.tensor)
+        validator.check_subclass("input_x", x['dtype'], mstype.tensor, self.name)
         x_shape = list(x['shape'])
         axis_v = axis['value']
         rank = len(x_shape)
-        validator.check_const_input('axis', axis_v)
-        validator.check_type("axis", axis_v, [int])
-        validator.check_int_range('axis', axis_v, -rank - 1, rank, Rel.INC_BOTH)
+        validator.check_int_range('axis', axis_v, -rank - 1, rank, Rel.INC_BOTH, self.name)
         if axis_v < 0:
             axis_v = rank + 1 + axis_v
         x_shape.insert(axis_v, 1)
@@ -110,7 +108,7 @@ class DType(PrimitiveWithInfer):
         """init DType"""
 
     def __infer__(self, x):
-        validator.check_subclass("input_x", x['dtype'], mstype.tensor)
+        validator.check_subclass("input_x", x['dtype'], mstype.tensor, self.name)
         out = {'shape': (),
                'dtype': mstype.type_type,
                'value': x['dtype'].element_type()}
@@ -144,19 +142,17 @@ class SameTypeShape(PrimitiveWithInfer):
 
     def __call__(self, x, y):
         """run in PyNative mode"""
-        if x.dtype() != y.dtype():
-            raise TypeError(f"The {x} and {y} should be same dtype.")
-        if x.shape() != y.shape():
-            raise TypeError(f"The {x} and {y} should have same shape.")
+        validator.check_subclass('x', x.dtype(), mstype.tensor, self.name)
+        validator.check_subclass('y', y.dtype(), mstype.tensor, self.name)
+        validator.check('x dtype', x.dtype(), 'y dtype', y.dtype(), Rel.EQ, self.name, TypeError)
+        validator.check('x shape', x.shape(), 'y shape', y.shape(), Rel.EQ, self.name)
         return x
 
     def __infer__(self, x, y):
-        if x['dtype'] != y['dtype']:
-            raise TypeError(f"The {x} and {y} should be same dtype,"
-                            f" but got {x['dtype']} {y['dtype']}.")
-        if x['shape'] != y['shape']:
-            raise ValueError(f"The {x} and {y} should be same shape,"
-                             f" but got {x['shape']} {y['shape']}.")
+        validator.check_subclass('x', x['dtype'], mstype.tensor, self.name)
+        validator.check_subclass('y', y['dtype'], mstype.tensor, self.name)
+        validator.check('x dtype', x['dtype'], 'y dtype', y['dtype'], Rel.EQ, self.name, TypeError)
+        validator.check('x shape', x['shape'], 'y shape', y['shape'], Rel.EQ, self.name)
         return x
 
 
@@ -191,8 +187,8 @@ class Cast(PrimitiveWithInfer):
         src_type = x['dtype']
         dst_type = t['value']
 
-        validator.check_subclass("input_x", src_type, [mstype.tensor, mstype.number])
-        validator.check_subclass("type", dst_type, mstype.number, with_type_of=False)
+        validator.check_subclass("input_x", src_type, [mstype.tensor, mstype.number], self.name)
+        validator.check_subclass("type", dst_type, mstype.number, self.name)
 
         if isinstance(src_type, type(mstype.tensor)):
             src_type = x['dtype'].element_type()
@@ -238,8 +234,8 @@ class IsSubClass(PrimitiveWithInfer):
         sub_type_t = sub_type['value']
         type_v = type_['value']
 
-        validator.check_type("sub_type", sub_type_t, [mstype.Type])
-        validator.check_type("type_", type_v, [mstype.Type])
+        validator.check_value_type("sub_type", sub_type_t, [mstype.Type], self.name)
+        validator.check_value_type("type_", type_v, [mstype.Type], self.name)
 
         value = mstype.issubclass_(sub_type_t, type_v)
 
@@ -273,8 +269,8 @@ class IsInstance(PrimitiveWithInfer):
         sub_type_t = inst['dtype']
         type_v = type_['value']
 
-        validator.check_const_input("inst", inst['value'])
-        validator.check_type("type_", type_v, [mstype.Type])
+        validator.check_const_input("inst", inst['value'], self.name)
+        validator.check_value_type("type_", type_v, [mstype.Type], self.name)
 
         value = mstype.issubclass_(sub_type_t, type_v)
 
@@ -316,14 +312,13 @@ class Reshape(PrimitiveWithInfer):
     def __infer__(self, x, shape):
         shape_v = shape['value']
         x_shp = x['shape']
-        validator.check_subclass("x", x['dtype'], mstype.tensor)
-        validator.check_const_input("shape", shape_v)
-        validator.check_type("shape", shape_v, [tuple])
+        validator.check_subclass("x", x['dtype'], mstype.tensor, self.name)
+        validator.check_value_type("shape", shape_v, [tuple], self.name)
         shape_v = list(shape_v)
         neg_index = -1
         dim_prod = 1
         for i, shp_i in enumerate(shape_v):
-            validator.check_type("shape[%d]" % i, shp_i, [int])
+            validator.check_value_type("shape[%d]" % i, shp_i, [int], self.name)
             if shp_i == -1:
                 if neg_index != -1:
                     raise ValueError(f'The shape can only has one -1 at most, but {shape_v}.')
@@ -332,7 +327,7 @@ class Reshape(PrimitiveWithInfer):
                 dim_prod *= shp_i
         arr_prod = np.prod(x_shp)
         if dim_prod <= 0 or arr_prod % dim_prod != 0:
-            raise ValueError(f'The product of shape should > 0 and'
+            raise ValueError(f'For \'{self.name}\' the product of shape should > 0 and'
                              f' can be divided by prod of input {arr_prod},'
                              f' but shape {shape}, product of shape {dim_prod}.')
 
@@ -340,7 +335,7 @@ class Reshape(PrimitiveWithInfer):
             shape_v[neg_index] = int(arr_prod / dim_prod)
             dim_prod *= shape_v[neg_index]
         if dim_prod != arr_prod:
-            raise ValueError(f'The shape arg for reshape must match array''s size'
+            raise ValueError(f'For \'{self.name}\' The shape arg for reshape must match array''s size'
                              f' input shape {arr_prod}, shape {dim_prod}.')
 
         value = None
@@ -406,10 +401,10 @@ class Squeeze(PrimitiveWithInfer):
     def __init__(self, axis=()):
         """init Squeeze"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
-        validator.check_type('axis', axis, [int, tuple])
+        validator.check_value_type('axis', axis, [int, tuple], self.name)
         if isinstance(axis, tuple):
-            for item in axis:
-                validator.check_type("item", item, [int])
+            for idx, item in enumerate(axis):
+                validator.check_value_type("axis[%d]" % idx, item, [int], self.name)
         else:
             self.axis = (axis,)
             self.add_prim_attr("axis", (axis,))
@@ -422,14 +417,14 @@ class Squeeze(PrimitiveWithInfer):
             ret = [d for d in x_shape if d != 1]
         else:
             for a in axis:
-                validator.check_int_range('axis or its elements', a, -ndim, ndim - 1, Rel.INC_BOTH)
+                validator.check_int_range('axis or its elements', a, -ndim, ndim - 1, Rel.INC_BOTH, self.name)
                 if x_shape[a] != 1:
                     raise ValueError('Cannot select an axis to squeeze out which has size not equal to one.')
             ret = [x_shape[i] for i in range(ndim) if not (i in axis or (i - ndim) in axis)]
         return ret
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("x", x_dtype, mstype.tensor)
+        validator.check_subclass("x", x_dtype, mstype.tensor, self.name)
         return x_dtype
 
 
@@ -467,14 +462,13 @@ class Transpose(PrimitiveWithInfer):
         if len(x_shape) != len(p_value):
             raise ValueError('The dimension of x and perm must be equal.')
 
-        validator.check_const_input("perm", p_value)
-        validator.check_type("p_value", p_value, [tuple])
-        validator.check_subclass("x_type", x_type, mstype.tensor)
+        validator.check_value_type("p_value", p_value, [tuple], self.name)
+        validator.check_subclass("x_type", x_type, mstype.tensor, self.name)
 
         tmp = list(p_value)
         for i, dim in enumerate(p_value):
-            validator.check_integer("perm[%d]" % i, dim, 0, Rel.GE)
-            validator.check_integer("perm[%d]" % i, dim, len(p_value), Rel.LT)
+            validator.check_integer("perm[%d]" % i, dim, 0, Rel.GE, self.name)
+            validator.check_integer("perm[%d]" % i, dim, len(p_value), Rel.LT, self.name)
             tmp.remove(dim)
             if dim in tmp:
                 raise ValueError('The value of perm is wrong.')
@@ -517,15 +511,13 @@ class GatherV2(PrimitiveWithInfer):
         self.init_prim_io_names(inputs=['params', 'indices', 'axis'], outputs=['output'])
 
     def __infer__(self, params, indices, axis):
-        validator.check_subclass("params", params['dtype'], mstype.tensor)
-        validator.check_subclass("indices", indices['dtype'], mstype.tensor)
-        validator.check_subclass("axis", axis['dtype'], mstype.int_)
-        validator.check_typename("element of indices", indices['dtype'], mstype.int_type)
-        validator.check_const_input("axis", axis['value'])
+        validator.check_subclass("params", params['dtype'], mstype.tensor, self.name)
+        validator.check_tensor_type_same({"indices": indices['dtype']}, mstype.int_type, self.name)
+        validator.check_subclass("axis", axis['dtype'], mstype.int_, self.name)
         axis_v = axis['value']
         params_shp = params['shape']
         rank = len(params_shp)
-        validator.check_int_range("axis", axis_v, -rank, rank, Rel.INC_LEFT)
+        validator.check_int_range("axis", axis_v, -rank, rank, Rel.INC_LEFT, self.name)
         if axis_v < 0:
             axis_v += rank
         out_shape = params_shp[:axis_v] + indices['shape'] + params_shp[axis_v + 1:]
@@ -564,19 +556,20 @@ class Split(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, axis=0, output_num=1):
         """init Split"""
-        validator.check_type("axis", axis, [int])
-        validator.check_type("output_num", output_num, [int])
+        validator.check_value_type("axis", axis, [int], self.name)
+        validator.check_value_type("output_num", output_num, [int], self.name)
         self.axis = axis
         self.output_num = output_num
 
     def __infer__(self, x):
-        validator.check_subclass("x", x['dtype'], mstype.tensor)
+        validator.check_subclass("x", x['dtype'], mstype.tensor, self.name)
         x_shape = list(x['shape'])
         dim = len(x_shape)
-        validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT)
-        validator.check_integer("output_num", self.output_num, 0, Rel.GT)
+        validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT, self.name)
+        validator.check_integer("output_num", self.output_num, 0, Rel.GT, self.name)
         output_valid_check = x_shape[self.axis] % self.output_num
-        validator.check_integer("the dimension which to split divides output_num", output_valid_check, 0, Rel.EQ)
+        validator.check_integer("the dimension which to split divides output_num", output_valid_check, 0, Rel.EQ,
+                                self.name)
         x_shape[self.axis] = int(x_shape[self.axis] / self.output_num)
         out_shapes = []
         out_dtypes = []
@@ -615,7 +608,7 @@ class Rank(PrimitiveWithInfer):
         """init Rank"""
 
     def __infer__(self, x):
-        validator.check_subclass("x", x['dtype'], mstype.tensor)
+        validator.check_subclass("x", x['dtype'], mstype.tensor, self.name)
         out = {'shape': None,
                'dtype': None,
                'value': len(x['shape'])}
@@ -647,15 +640,14 @@ class TruncatedNormal(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, seed=0, dtype=mstype.float32):
         """init TruncatedNormal"""
-        validator.check_type('seed', seed, [int])
-        validator.check_typename('dtype', dtype, mstype.number_type)
+        validator.check_value_type('seed', seed, [int], self.name)
+        validator.check_type_same({'dtype': dtype}, mstype.number_type, self.name)
 
     def __infer__(self, shape):
         shape_value = shape['value']
-        validator.check_const_input("shape", shape_value)
-        validator.check_type("shape", shape_value, [tuple])
+        validator.check_value_type("shape", shape_value, [tuple], self.name)
         for i, value in enumerate(shape_value):
-            validator.check_integer(f'{i}th value of shape', value, 0, Rel.GT)
+            validator.check_integer(f'{i}th value of shape', value, 0, Rel.GT, self.name)
         out = {'shape': shape_value,
                'dtype': mstype.tensor_type(self.dtype),
                'value': None}
@@ -687,7 +679,7 @@ class Size(PrimitiveWithInfer):
 
     def __infer__(self, x):
         size = 1
-        validator.check_subclass("x", x['dtype'], mstype.tensor)
+        validator.check_subclass("x", x['dtype'], mstype.tensor, self.name)
         shp = x['shape']
         if not shp:
             size = 0
@@ -723,25 +715,20 @@ class Fill(PrimitiveWithInfer):
         """init Fill"""
 
     def __infer__(self, dtype, dims, x):
-        validator.check_const_input("type", dtype['value'])
-        validator.check_const_input("shape", dims['value'])
-        validator.check_const_input("value", x['value'])
-        validator.check_type("shape", dims['value'], [tuple])
-        validator.check_type("value", x['value'], [numbers.Number, bool])
-        for item in dims['value']:
-            validator.check_type("item", item, [int])
-            validator.check_integer("item", item, 0, Rel.GT)
-        x_dtype = dtype['value']
+        validator.check_value_type("shape", dims['value'], [tuple], self.name)
+        validator.check_value_type("value", x['value'], [numbers.Number, bool], self.name)
+        for idx, item in enumerate(dims['value']):
+            validator.check_integer("dims[%d]" % idx, item, 0, Rel.GT, self.name)
         valid_types = [mstype.bool_, mstype.int8, mstype.int32, mstype.int64,
                        mstype.uint8, mstype.uint32, mstype.uint64,
                        mstype.float16, mstype.float32, mstype.float64]
-        validator.check_typename("value", x_dtype, valid_types)
-        x_nptype = mstype.dtype_to_nptype(x_dtype)
+        validator.check_type_same({"value": dtype['value']}, valid_types, self.name)
+        x_nptype = mstype.dtype_to_nptype(dtype['value'])
         ret = np.full(dims['value'], x['value'], x_nptype)
         out = {
             'value': Tensor(ret),
             'shape': dims['value'],
-            'dtype': x_dtype,
+            'dtype': x['dtype'],
         }
         return out
 
@@ -772,8 +759,7 @@ class OnesLike(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("x", x_dtype, mstype.tensor)
-        validator.check_typename('x_dtype', x_dtype, mstype.number_type + (mstype.bool_,))
+        validator.check_tensor_type_same({'x': x_dtype}, mstype.number_type + (mstype.bool_,), self.name)
         return x_dtype
 
 
@@ -804,8 +790,7 @@ class ZerosLike(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("x", x_dtype, mstype.tensor)
-        validator.check_typename('x_dtype', x_dtype, mstype.number_type + (mstype.bool_,))
+        validator.check_tensor_type_same({'x': x_dtype}, mstype.number_type + (mstype.bool_,), self.name)
         return x_dtype
 
 
@@ -830,14 +815,13 @@ class TupleToArray(PrimitiveWithInfer):
         """init TupleToArray"""
 
     def infer_value(self, x):
-        validator.check_const_input("x", x)
-        validator.check_type("x", x, [tuple])
-        validator.check("size of x", len(x), '', 0, Rel.GT)
+        validator.check_value_type("x", x, [tuple], self.name)
+        validator.check("size of x", len(x), '', 0, Rel.GT, self.name)
         dtype = type(x[0])
         for i, item in enumerate(x):
-            validator.check_type(f"x[{i}]", item, [numbers.Number])
+            validator.check_value_type(f"x[{i}]", item, [numbers.Number], self.name)
         if not all(isinstance(item, dtype) for item in x):
-            raise TypeError("All elements of input x must be have same type.")
+            raise TypeError("For \'{self.name}\' all elements of input x must be have same type.")
         if isinstance(x[0], int):
             ret = np.array(x, np.int32)
         else:
@@ -867,8 +851,7 @@ class ScalarToArray(PrimitiveWithInfer):
         pass
 
     def infer_value(self, x):
-        validator.check_const_input("x", x)
-        validator.check_type("x", x, [int, float])
+        validator.check_value_type("x", x, [int, float], self.name)
         if isinstance(x, int):
             ret = np.array(x, np.int32)
         else:
@@ -899,9 +882,8 @@ class ScalarToTensor(PrimitiveWithInfer):
         pass
 
     def infer_value(self, x, dtype=mstype.float32):
-        validator.check_const_input("x", x)
-        validator.check_type("x", x, [int, float])
-        validator.check_subclass("dtype", dtype, mstype.number, with_type_of=False)
+        validator.check_value_type("x", x, [int, float], self.name)
+        validator.check_subclass("dtype", dtype, mstype.number, self.name)
         data_type = mstype.dtype_to_nptype(dtype)
         return Tensor(np.array(x, data_type))
 
@@ -943,15 +925,14 @@ class InvertPermutation(PrimitiveWithInfer):
     def __infer__(self, x):
         x_shp = x['shape']
         x_value = x['value']
-        validator.check_const_input("shape", x_shp)
-        validator.check_type("shape", x_shp, [tuple])
+        validator.check_value_type("shape", x_shp, [tuple], self.name)
         z = [x_value[i] for i in range(len(x_value))]
         z.sort()
 
         y = [None]*len(x_value)
         for i, value in enumerate(x_value):
-            validator.check_type("input[%d]" % i, value, [int])
-            validator.check(f'value', z[i], f'index', i)
+            validator.check_value_type("input[%d]" % i, value, [int], self.name)
+            validator.check(f'value', z[i], f'index', i, Rel.EQ, self.name)
             y[value] = i
             z.append(value)
         return {'shape': x_shp,
@@ -986,8 +967,8 @@ class Argmax(PrimitiveWithInfer):
     def __init__(self, axis=-1, output_type=mstype.int64):
         """init Argmax"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
-        validator.check_type("axis", axis, [int])
-        validator.check_typename('output_type', output_type, [mstype.int32, mstype.int64])
+        validator.check_value_type("axis", axis, [int], self.name)
+        validator.check_type_same({'output': output_type}, [mstype.int32, mstype.int64], self.name)
         self.axis = axis
         self.add_prim_attr('output_type', output_type)
 
@@ -996,14 +977,13 @@ class Argmax(PrimitiveWithInfer):
         if axis is None:
             axis = 0
         x_rank = len(x_shape)
-        validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT)
+        validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT, self.name)
         axis = axis + x_rank if axis < 0 else axis
         ouput_shape = [x_shape[i] for i in range(x_rank) if i != axis]
         return ouput_shape
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("input_x", x_dtype, mstype.tensor)
-        validator.check_typename('input_x', x_dtype, [mstype.float32, mstype.float16])
+        validator.check_subclass("input_x", x_dtype, mstype.tensor, self.name)
         return mstype.tensor_type(self.output_type)
 
 
@@ -1035,7 +1015,7 @@ class Argmin(PrimitiveWithInfer):
     def __init__(self, axis=-1, output_type=mstype.int64):
         """init Argmin"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
-        validator.check_type("axis", axis, [int])
+        validator.check_value_type("axis", axis, [int], self.name)
         self.axis = axis
         self.add_prim_attr('output_type', output_type)
 
@@ -1044,13 +1024,13 @@ class Argmin(PrimitiveWithInfer):
         if axis is None:
             axis = 0
         x_rank = len(x_shape)
-        validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT)
+        validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT, self.name)
         axis = axis + x_rank if axis < 0 else axis
         ouput_shape = [x_shape[i] for i in range(x_rank) if i != axis]
         return ouput_shape
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("input_x", x_dtype, mstype.tensor)
+        validator.check_subclass("input_x", x_dtype, mstype.tensor, self.name)
         return mstype.tensor_type(self.output_type)
 
 
@@ -1087,17 +1067,17 @@ class ArgMaxWithValue(PrimitiveWithInfer):
         """init ArgMaxWithValue"""
         self.axis = axis
         self.keep_dims = keep_dims
-        _check_infer_attr_reduce(axis, keep_dims)
+        _check_infer_attr_reduce(axis, keep_dims, self.name)
 
     def infer_shape(self, x_shape):
         axis = self.axis
         x_rank = len(x_shape)
-        validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT)
+        validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT, self.name)
         ouput_shape = _infer_shape_reduce(x_shape, self.axis, self.keep_dims, self.name)
         return ouput_shape, ouput_shape
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("input_x", x_dtype, mstype.tensor)
+        validator.check_subclass("input_x", x_dtype, mstype.tensor, self.name)
         return mstype.tensor_type(mstype.int32), x_dtype
 
 
@@ -1133,17 +1113,17 @@ class ArgMinWithValue(PrimitiveWithInfer):
         """init ArgMinWithValue"""
         self.axis = axis
         self.keep_dims = keep_dims
-        _check_infer_attr_reduce(axis, keep_dims)
+        _check_infer_attr_reduce(axis, keep_dims, self.name)
 
     def infer_shape(self, x_shape):
         axis = self.axis
         x_rank = len(x_shape)
-        validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT)
+        validator.check_int_range("axis", axis, -x_rank, x_rank, Rel.INC_LEFT, self.name)
         ouput_shape = _infer_shape_reduce(x_shape, self.axis, self.keep_dims, self.name)
         return ouput_shape, ouput_shape
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("input_x", x_dtype, mstype.tensor)
+        validator.check_subclass("input_x", x_dtype, mstype.tensor, self.name)
         return mstype.tensor_type(mstype.int32), x_dtype
 
 
@@ -1183,13 +1163,11 @@ class Tile(PrimitiveWithInfer):
     def __infer__(self, x, multiples):
         multiples_v = multiples['value']
         x_shp = x['shape']
-        validator.check_const_input("shape", multiples_v)
-        validator.check_type("shape", multiples_v, [tuple])
+        validator.check_value_type("shape", multiples_v, [tuple], self.name)
         for i, multiple in enumerate(multiples_v):
-            validator.check_type("multiples[%d]" % i, multiple, [int])
-        validator.check_typename('x', x['dtype'],
-                                 [mstype.int16, mstype.int32, mstype.bool_,
-                                  mstype.float16, mstype.float32])
+            validator.check_value_type("multiples[%d]" % i, multiple, [int], self.name)
+        valid_types = [mstype.int16, mstype.int32, mstype.bool_, mstype.float16, mstype.float32]
+        validator.check_tensor_type_same({'x': x['dtype']}, valid_types, self.name)
         len_sub = len(multiples_v) - len(x_shp)
         multiples_w = None
         if len_sub == 0:
@@ -1199,7 +1177,8 @@ class Tile(PrimitiveWithInfer):
                 x_shp.insert(0, 1)
             multiples_w = multiples_v
         elif len_sub < 0:
-            raise ValueError("The length of multiples can not be smaller than the length of dimension in input_x.")
+            raise ValueError(f'For \'{self.name}\' the length of multiples can not be smaller than '
+                             f'the length of dimension in input_x.')
         for i, a in enumerate(multiples_w):
             x_shp[i] *= a
         value = None
@@ -1246,23 +1225,23 @@ class UnsortedSegmentSum(PrimitiveWithInfer):
     def __infer__(self, x, segment_ids, num_segments):
         x_type = x['dtype']
         x_shp = x['shape']
-        validator.check_subclass("input_x", x_type, mstype.tensor)
-        validator.check_type("x_shape", x_shp, [list])
+        validator.check_subclass("input_x", x_type, mstype.tensor, self.name)
+        validator.check_value_type("x_shape", x_shp, [list], self.name)
         x_shp_len = len(x_shp)
-        validator.check_integer("rank of input_x", x_shp_len, 0, Rel.GT)
+        validator.check_integer("rank of input_x", x_shp_len, 0, Rel.GT, self.name)
         segment_ids_shp = segment_ids['shape']
         segment_ids_type = segment_ids['dtype']
-        validator.check_subclass("segment_ids", segment_ids_type, mstype.tensor)
-        validator.check_type("segment_ids", segment_ids_shp, [list])
+        validator.check_subclass("segment_ids", segment_ids_type, mstype.tensor, self.name)
+        validator.check_value_type("segment_ids", segment_ids_shp, [list], self.name)
         segment_ids_shp_len = len(segment_ids_shp)
-        validator.check_integer("rank of segment_ids", segment_ids_shp_len, 0, Rel.GT)
+        validator.check_integer("rank of segment_ids", segment_ids_shp_len, 0, Rel.GT, self.name)
         validator.check(f'rank of input_x', len(x_shp),
-                        'rank of segments_id', len(segment_ids_shp), Rel.GE)
+                        'rank of segments_id', len(segment_ids_shp), Rel.GE, self.name)
         for i, value in enumerate(segment_ids_shp):
-            validator.check("ids[%d]" % i, value, 'input[%d]' % i, x_shp[i])
+            validator.check("ids[%d]" % i, value, 'input[%d]' % i, x_shp[i], Rel.EQ, self.name)
         num_segments_v = num_segments['value']
-        validator.check_type('num_segments', num_segments_v, [int])
-        validator.check_integer("num_segments", num_segments_v, 0, Rel.GT)
+        validator.check_value_type('num_segments', num_segments_v, [int], self.name)
+        validator.check_integer("num_segments", num_segments_v, 0, Rel.GT, self.name)
         shp = [num_segments_v]
         shp += x_shp[segment_ids_shp_len:]
         out = {'shape': shp,
@@ -1306,7 +1285,7 @@ class Concat(PrimitiveWithInfer):
     def __init__(self, axis=0):
         """init Tile"""
         self.__setattr_flag__ = True
-        validator.check_type("axis", axis, [int])
+        validator.check_value_type("axis", axis, [int], self.name)
 
     def __infer__(self, input_x):
         axis = self.axis
@@ -1323,25 +1302,25 @@ class Concat(PrimitiveWithInfer):
         return out
 
 
-def _get_pack_shape(x_shape, x_type, axis):
+def _get_pack_shape(x_shape, x_type, axis, prim_name):
     """for pack output shape"""
-    validator.check_type("shape", x_shape, [tuple, list])
-    validator.check_integer("len of input_x shape", len(x_shape), 0, Rel.GT)
-    validator.check_subclass("shape0", x_type[0], mstype.tensor)
-    validator.check_integer("len of input_x0 shape", len(x_shape[0]), 0, Rel.GT)
+    validator.check_value_type("shape", x_shape, [tuple, list], prim_name)
+    validator.check_integer("len of input_x shape", len(x_shape), 0, Rel.GT, prim_name)
+    validator.check_subclass("shape0", x_type[0], mstype.tensor, prim_name)
+    validator.check_integer("len of input_x0 shape", len(x_shape[0]), 0, Rel.GT, prim_name)
     rank_base = len(x_shape[0])
     N = len(x_shape)
     out_shape = x_shape[0]
-    validator.check_int_range('axis', axis, -rank_base - 1, rank_base, Rel.INC_BOTH)
+    validator.check_int_range('axis', axis, -rank_base - 1, rank_base, Rel.INC_BOTH, prim_name)
     if axis < 0:
         axis = axis + rank_base + 1
     for i in range(1, N):
         v = x_shape[i]
-        validator.check('len of x_shape[%d]' % i, len(v), 'len of rank_base', rank_base)
-        validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0])
+        validator.check('len of x_shape[%d]' % i, len(v), 'len of rank_base', rank_base, Rel.EQ, prim_name)
+        validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0], Rel.EQ, prim_name)
         for j in range(rank_base):
             if v[j] != x_shape[0][j]:
-                raise ValueError("Pack evaluator element %d shape in input can not pack with first element" % i)
+                raise ValueError(f"For \'{prim_name}\' element {i} shape in input can not pack with first element")
     out_shape.insert(axis, N)
     return out_shape
 
@@ -1376,14 +1355,14 @@ class Pack(PrimitiveWithInfer):
     def __init__(self, axis=0):
         """init Pack"""
         self.__setattr_flag__ = True
-        validator.check_type("axis", axis, [int])
+        validator.check_value_type("axis", axis, [int], self.name)
         self.axis = axis
 
     def __infer__(self, value):
         x_shape = value['shape']
         x_type = value['dtype']
         self.add_prim_attr('num', len(x_shape))
-        all_shape = _get_pack_shape(x_shape, x_type, self.axis)
+        all_shape = _get_pack_shape(x_shape, x_type, self.axis, self.name)
         out = {'shape': all_shape,
                'dtype': x_type[0],
                'value': None}
@@ -1429,22 +1408,23 @@ class Unpack(PrimitiveWithInfer):
     def __init__(self, axis=0):
         """init Unpack"""
         self.__setattr_flag__ = True
-        validator.check_type("axis", axis, [int])
+        validator.check_value_type("axis", axis, [int], self.name)
         self.axis = axis
 
     def __infer__(self, x):
-        validator.check_subclass("x", x['dtype'], mstype.tensor)
+        validator.check_subclass("x", x['dtype'], mstype.tensor, self.name)
         x_shape = list(x['shape'])
         dim = len(x_shape)
-        validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT)
+        validator.check_int_range('axis value', self.axis, -dim, dim, Rel.INC_LEFT, self.name)
         if self.axis < 0:
             self.axis = self.axis + dim
         output_num = x_shape[self.axis]
-        validator.check_type("num", output_num, [int])
-        validator.check_integer("output_num", output_num, 0, Rel.GT)
+        validator.check_value_type("num", output_num, [int], self.name)
+        validator.check_integer("output_num", output_num, 0, Rel.GT, self.name)
         self.add_prim_attr('num', output_num)
         output_valid_check = x_shape[self.axis] - output_num
-        validator.check_integer("The dimension which to unpack divides output_num", output_valid_check, 0, Rel.EQ)
+        validator.check_integer("The dimension which to unpack divides output_num", output_valid_check, 0, Rel.EQ,
+                                self.name)
         out_shapes = []
         out_dtypes = []
         out_shape = x_shape[:self.axis] + x_shape[self.axis + 1:]
@@ -1486,8 +1466,8 @@ class Slice(PrimitiveWithInfer):
     def __infer__(self, x, begin, size):
         x_shape = x['shape']
         x_shp_len = len(x_shape)
-        validator.check_const_input('begin', begin['value'])
-        validator.check_const_input('size', size['value'])
+        validator.check_const_input('begin', begin['value'], self.name)
+        validator.check_const_input('size', size['value'], self.name)
         begin_v, size_v = begin['value'], size['value']
         if begin_v is None or size_v is None:
             return {'shape': None,
@@ -1499,7 +1479,8 @@ class Slice(PrimitiveWithInfer):
         for i in range(x_shp_len):
             if x_shape[i] < begin_v[i] + size_v[i]:
                 y = begin_v[i] + size_v[i]
-                raise ValueError("Slice shape can not bigger than orign shape %d, %d." % (x_shape[i], y))
+                raise ValueError("For '%s' slice shape can not bigger than orign shape %d, %d." %
+                                 (self.name, x_shape[i], y))
         return {'shape': size_v,
                 'dtype': x['dtype'],
                 'value': None}
@@ -1565,11 +1546,11 @@ class Select(PrimitiveWithInfer):
 
     def infer_dtype(self, cond_type, x_type, y_type):
         self.add_prim_attr('T', x_type)
-        validator.check_subclass("x_type", x_type, mstype.tensor)
-        validator.check_subclass("y_type", y_type, mstype.tensor)
-        validator.check_typename("cond_type", cond_type, [mstype.bool_])
+        validator.check_subclass("x_type", x_type, mstype.tensor, self.name)
+        validator.check_subclass("y_type", y_type, mstype.tensor, self.name)
+        validator.check_tensor_type_same({"cond": cond_type}, [mstype.bool_], self.name)
         if x_type != y_type:
-            raise TypeError('The x_type %s must be the same as y_type %s.' % (x_type, y_type))
+            raise TypeError('\'%s\' the x_type %s must be the same as y_type %s.' % (self.name, x_type, y_type))
         return x_type
 
 
@@ -1637,26 +1618,23 @@ class StridedSlice(PrimitiveWithInfer):
                  shrink_axis_mask=0):
         """init StrideSlice"""
         self.init_prim_io_names(inputs=['x', 'begin', 'end', 'strides'], outputs=['output'])
-        validator.check_type('begin_mask', begin_mask, [int])
-        validator.check_type('end_mask', end_mask, [int])
-        validator.check_type('ellipsis_mask', ellipsis_mask, [int])
-        validator.check_type('new_axis_mask', new_axis_mask, [int])
-        validator.check_type('shrink_axis_mask', shrink_axis_mask, [int])
+        validator.check_value_type('begin_mask', begin_mask, [int], self.name)
+        validator.check_value_type('end_mask', end_mask, [int], self.name)
+        validator.check_value_type('ellipsis_mask', ellipsis_mask, [int], self.name)
+        validator.check_value_type('new_axis_mask', new_axis_mask, [int], self.name)
+        validator.check_value_type('shrink_axis_mask', shrink_axis_mask, [int], self.name)
 
     def __infer__(self, x, begin, end, strides):
         begin_v, end_v, strides_v = begin['value'], end['value'], strides['value']
-        validator.check_const_input("begin", begin_v)
-        validator.check_const_input("end", end_v)
-        validator.check_const_input("strides", strides_v)
-        validator.check_type("begin", begin_v, [tuple])
-        validator.check_type("end", end_v, [tuple])
-        validator.check_type("strides", strides_v, [tuple])
+        validator.check_value_type("begin", begin_v, [tuple], self.name)
+        validator.check_value_type("end", end_v, [tuple], self.name)
+        validator.check_value_type("strides", strides_v, [tuple], self.name)
 
         x_shape = x['shape']
         x_shp_len = len(x_shape)
         if len(begin_v) != x_shp_len or len(end_v) != x_shp_len or len(strides_v) != x_shp_len:
-            raise ValueError(f"The length of begin index{begin_v}, end index{end_v} and strides{strides_v} "
-                             f"must be equal to the dims({x_shp_len}) of input.")
+            raise ValueError(f"For \'{self.name}\' the length of begin index{begin_v}, end index{end_v} and "
+                             f"strides{strides_v} must be equal to the dims({x_shp_len}) of input.")
 
         ret_shape = []
         append_dimensions = []
@@ -1669,8 +1647,8 @@ class StridedSlice(PrimitiveWithInfer):
                 append_dimensions.append(x_shape[x_shp_len - 1 - len(append_dimensions)])
                 continue
             if i < (len(shrink_pos) - 2) and shrink_pos[i] == '1':
-                validator.check_integer(f'begin[{i}]', begin_v[i], -x_shape[i], Rel.GE)
-                validator.check_integer(f'begin[{i}]', begin_v[i], x_shape[i], Rel.LT)
+                validator.check_integer(f'begin[{i}]', begin_v[i], -x_shape[i], Rel.GE, self.name)
+                validator.check_integer(f'begin[{i}]', begin_v[i], x_shape[i], Rel.LT, self.name)
                 continue
 
             begin_idx = begin_v[i]
@@ -1680,9 +1658,9 @@ class StridedSlice(PrimitiveWithInfer):
                 begin_idx = 0
             if self.end_mask:
                 end_idx = x_shape[i]
-            validator.check_integer(f'begin[{i}]', begin_idx, x_shape[i], Rel.LE)
-            validator.check_integer(f'end[{i}]', end_idx, x_shape[i], Rel.LE)
-            validator.check_integer(f'strides[{i}]', strides_idx, 0, Rel.NE)
+            validator.check_integer(f'begin[{i}]', begin_idx, x_shape[i], Rel.LE, self.name)
+            validator.check_integer(f'end[{i}]', end_idx, x_shape[i], Rel.LE, self.name)
+            validator.check_integer(f'strides[{i}]', strides_idx, 0, Rel.NE, self.name)
             if strides_idx > 0:
                 # If sliced forward , end_idx >= begin_idx
                 validator.check(f'begin[{i}]', begin_idx, f'end[{i}]', end_idx, Rel.LE)
@@ -1736,7 +1714,7 @@ class Diag(PrimitiveWithInfer):
         """init Diag"""
 
     def infer_dtype(self, x_type):
-        validator.check_subclass('input_x', x_type, mstype.tensor)
+        validator.check_subclass('input_x', x_type, mstype.tensor, self.name)
         return x_type
 
     def infer_shape(self, x_shape):
@@ -1748,7 +1726,7 @@ class Diag(PrimitiveWithInfer):
     def infer_value(self, x):
         if x is None:
             return None
-        validator.check("input x rank", len(x.shape()), "", 1)
+        validator.check_integer("input x rank", len(x.shape()), 1, Rel.EQ, self.name)
         ret = np.diag(x.asnumpy())
         return Tensor(ret)
 
@@ -1783,13 +1761,13 @@ class DiagPart(PrimitiveWithInfer):
         """init DiagPart"""
 
     def infer_dtype(self, x_type):
-        validator.check_subclass('input_x', x_type, mstype.tensor)
+        validator.check_subclass('input_x', x_type, mstype.tensor, self.name)
         return x_type
 
     def infer_shape(self, x_shape):
         if len(x_shape)%2 != 0 or \
                 not x_shape:
-            raise ValueError(f"DiagPart input rank must be non-zero and even, but got rank {len(x_shape)}, "
+            raise ValueError(f"For \'{self.name}\' input rank must be non-zero and even, but got rank {len(x_shape)}, "
                              f"with shapes {x_shape}")
         length = len(x_shape) // 2
         ret_shape = x_shape[0:length]
@@ -1798,7 +1776,7 @@ class DiagPart(PrimitiveWithInfer):
     def infer_value(self, x):
         if x is None:
             return None
-        validator.check("x rank", len(x.shape()), "", 2)
+        validator.check("x rank", len(x.shape()), "", 2, Rel.EQ, self.name)
         ret = np.diag(x.asnumpy())
         return Tensor(ret)
 
@@ -1826,12 +1804,10 @@ class Eye(PrimitiveWithInfer):
         """init Eye"""
 
     def infer_value(self, n, m, t):
-        validator.check_type("n", n, [int])
-        validator.check_integer("n", n, 0, Rel.GT)
-        validator.check_type("m", m, [int])
-        validator.check_integer("m", m, 0, Rel.GT)
+        validator.check_integer("n", n, 0, Rel.GT, self.name)
+        validator.check_integer("m", m, 0, Rel.GT, self.name)
         args = {"dtype": t}
-        validator.check_type_same(args, mstype.number_type + (mstype.bool_,))
+        validator.check_type_same(args, mstype.number_type + (mstype.bool_,), self.name)
         np_type = mstype.dtype_to_nptype(t)
         ret = np.eye(n, m, dtype=np_type)
         return Tensor(ret)
@@ -1866,16 +1842,15 @@ class ScatterNd(PrimitiveWithInfer):
 
     def __infer__(self, indices, update, shape):
         shp = shape['value']
-        validator.check_subclass("indices_dtype", indices['dtype'], mstype.tensor)
-        validator.check_subclass("update_dtype", update['dtype'], mstype.tensor)
-        validator.check_typename("indices_dtype", indices['dtype'], mstype.int_type)
-        validator.check_type("shape", shp, [tuple])
+        validator.check_subclass("update_dtype", update['dtype'], mstype.tensor, self.name)
+        validator.check_tensor_type_same({"indices": indices['dtype']}, mstype.int_type, self.name)
+        validator.check_value_type("shape", shp, [tuple], self.name)
         for i, x in enumerate(shp):
-            validator.check_integer("shape[%d]" % i, x, 0, Rel.GT)
+            validator.check_integer("shape[%d]" % i, x, 0, Rel.GT, self.name)
 
         indices_shape, update_shape = indices["shape"], update["shape"]
         if indices_shape[0] != update_shape[0]:
-            raise ValueError('The indices_shape[0] and update_shape[0] must be equal.')
+            raise ValueError(f'For \'{self.name}\' The indices_shape[0] and update_shape[0] must be equal.')
 
         return {'shape': shp,
                 'dtype': update['dtype'],
@@ -1913,7 +1888,7 @@ class ResizeNearestNeighbor(PrimitiveWithInfer):
         self.init_prim_io_names(inputs=['image_in'], outputs=['image_out'])
 
     def infer_shape(self, x):
-        validator.check('the dimension of input_x', len(x), '', 2, Rel.GE)
+        validator.check('the dimension of input_x', len(x), '', 2, Rel.GE, self.name)
         return tuple(x)[:-2] + tuple(self.size)
 
     def infer_dtype(self, x):
@@ -1947,13 +1922,12 @@ class GatherNd(PrimitiveWithInfer):
 
     def infer_shape(self, x_shape, indices_shape):
         validator.check('the dimension of x', len(x_shape),
-                        'the dimension of indices', indices_shape[-1], Rel.GE)
+                        'the dimension of indices', indices_shape[-1], Rel.GE, self.name)
         return indices_shape[:-1] + x_shape[indices_shape[-1]:]
 
     def infer_dtype(self, x_dtype, indices_dtype):
-        validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
-        validator.check_subclass("indices_dtype", indices_dtype, mstype.tensor)
-        validator.check_typename("indices_dtype", indices_dtype, mstype.int_type)
+        validator.check_subclass("x_dtype", x_dtype, mstype.tensor, self.name)
+        validator.check_tensor_type_same({"indices": indices_dtype}, mstype.int_type, self.name)
         return x_dtype
 
 
@@ -1995,12 +1969,9 @@ class ScatterNdUpdate(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, x_dtype, indices_dtype, value_dtype):
-        validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
-        validator.check_subclass("indices_dtype", indices_dtype, mstype.tensor)
-        validator.check_subclass("value_dtype", value_dtype, mstype.tensor)
-        validator.check_typename('indices_dtype', indices_dtype, mstype.int_type)
-        args = {"x_dtype": x_dtype, "value_dtype": value_dtype}
-        validator.check_type_same(args, (mstype.bool_,) + mstype.number_type)
+        validator.check_tensor_type_same({'indices': indices_dtype}, mstype.int_type, self.name)
+        args = {"x": x_dtype, "value": value_dtype}
+        validator.check_tensor_type_same(args, (mstype.bool_,) + mstype.number_type, self.name)
         return x_dtype
 
 
@@ -2038,7 +2009,7 @@ class SpaceToDepth(PrimitiveWithInfer):
     def __init__(self, block_size):
         """Init SpaceToDepth"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
-        validator.check_type('block_size', block_size, [int])
+        validator.check_value_type('block_size', block_size, [int], self.name)
         validator.check('block_size', block_size, '', 2, Rel.GE)
         self.block_size = block_size
         self.add_prim_attr("data_format", "NCHW")
@@ -2048,7 +2019,7 @@ class SpaceToDepth(PrimitiveWithInfer):
         out_shape = copy.deepcopy(x_shape)
         for i in range(2):
             if out_shape[i+2] % self.block_size != 0:
-                raise ValueError(f'SpaceToDepth input shape[{i+2}] {out_shape[i+2]} should be '
+                raise ValueError(f'For \'{self.name}\' input shape[{i+2}] {out_shape[i+2]} should be '
                                  f'fully divided by block_size {self.block_size}')
             out_shape[i+2] //= self.block_size
 
@@ -2056,7 +2027,7 @@ class SpaceToDepth(PrimitiveWithInfer):
         return out_shape
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
+        validator.check_subclass("x_dtype", x_dtype, mstype.tensor, self.name)
         return x_dtype
 
 
@@ -2096,8 +2067,8 @@ class DepthToSpace(PrimitiveWithInfer):
     def __init__(self, block_size):
         """Init DepthToSpace"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
-        validator.check_type('block_size', block_size, [int])
-        validator.check('block_size', block_size, '', 2, Rel.GE)
+        validator.check_value_type('block_size', block_size, [int], self.name)
+        validator.check('block_size', block_size, '', 2, Rel.GE, self.name)
         self.block_size = block_size
         self.add_prim_attr("data_format", "NCHW")
 
@@ -2107,12 +2078,13 @@ class DepthToSpace(PrimitiveWithInfer):
         for i in range(2):
             out_shape[i+2] *= self.block_size
 
-        validator.check('x_shape[1] % (block_size*block_size)', x_shape[1] % (self.block_size*self.block_size), '', 0)
+        validator.check_integer('x_shape[1] % (block_size*block_size)', x_shape[1] % (self.block_size*self.block_size),
+                                0, Rel.EQ, self.name)
         out_shape[1] //= self.block_size * self.block_size
         return out_shape
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
+        validator.check_subclass("x_dtype", x_dtype, mstype.tensor, self.name)
         return x_dtype
 
 
@@ -2159,27 +2131,26 @@ class SpaceToBatch(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, block_size, paddings):
         """Init SpaceToBatch"""
-        validator.check_type('block_size', block_size, [int])
-        validator.check('block_size', block_size, '', 1, Rel.GT)
+        validator.check_value_type('block_size', block_size, [int], self.name)
+        validator.check('block_size', block_size, '', 1, Rel.GT, self.name)
         self.block_size = block_size
-        validator.check('paddings shape', np.array(paddings).shape, '', (2, 2))
+        validator.check('paddings shape', np.array(paddings).shape, '', (2, 2), Rel.EQ, self.name)
         for elem in itertools.chain(*paddings):
-            validator.check_type('paddings element', elem, [int])
+            validator.check_value_type('paddings element', elem, [int], self.name)
         self.paddings = paddings
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("input_x", x_dtype, mstype.tensor)
-        validator.check_typename('input_x', x_dtype, mstype.number_type)
+        validator.check_tensor_type_same({'input_x': x_dtype}, mstype.number_type, self.name)
         return x_dtype
 
     def infer_shape(self, x_shape):
-        validator.check('rank of input_x', len(x_shape), '', 4)
+        validator.check_integer('rank of input_x', len(x_shape), 4, Rel.EQ, self.name)
         out_shape = copy.deepcopy(x_shape)
         for i in range(2):
             padded = out_shape[i+2] + self.paddings[i][0] + \
                      self.paddings[i][1]
             if padded % self.block_size != 0:
-                raise ValueError(f'padded[{i}] {padded} should be divisible by '
+                raise ValueError(f'For \'{self.name}\' padded[{i}] {padded} should be divisible by '
                                  f'block_size {self.block_size}')
             out_shape[i+2] = padded // self.block_size
         out_shape[0] *= self.block_size * self.block_size
@@ -2227,17 +2198,16 @@ class BatchToSpace(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, block_size, crops):
         """Init BatchToSpace"""
-        validator.check_type('block_size', block_size, [int])
-        validator.check('block_size', block_size, '', 1, Rel.GT)
+        validator.check_value_type('block_size', block_size, [int], self.name)
+        validator.check('block_size', block_size, '', 1, Rel.GT, self.name)
         self.block_size = block_size
         validator.check('crops shape', np.array(crops).shape, '', (2, 2))
         for elem in itertools.chain(*crops):
-            validator.check_type('crops element', elem, [int])
+            validator.check_value_type('crops element', elem, [int], self.name)
         self.crops = crops
 
     def infer_dtype(self, x_dtype):
-        validator.check_subclass("input_x", x_dtype, mstype.tensor)
-        validator.check_typename('input_x', x_dtype, mstype.number_type)
+        validator.check_tensor_type_same({'input_x': x_dtype}, mstype.number_type, self.name)
         return x_dtype
 
     def infer_shape(self, x_shape):
@@ -2246,11 +2216,11 @@ class BatchToSpace(PrimitiveWithInfer):
         for i in range(2):
             x_block_prod = out_shape[i+2] * self.block_size
             crops_sum = self.crops[i][0] + self.crops[i][1]
-            validator.check("x block shape prod", x_block_prod, 'crops sum', crops_sum, Rel.GT)
+            validator.check("x block shape prod", x_block_prod, 'crops sum', crops_sum, Rel.GT, self.name)
             out_shape[i+2] = x_block_prod - crops_sum
         block_size_prod = self.block_size * self.block_size
         if out_shape[0] % block_size_prod != 0:
-            raise ValueError(f'input_x dimension 0 {out_shape[0]}  should be divisible by '
+            raise ValueError(f'For \'{self.name}\' input_x dimension 0 {out_shape[0]}  should be divisible by '
                              f'block_size_prod {block_size_prod}')
         out_shape[0] = out_shape[0] // block_size_prod
         return out_shape
diff --git a/tests/ut/python/ops/test_array_ops_check.py b/tests/ut/python/ops/test_array_ops_check.py
new file mode 100755
index 0000000000..f7b77bbb5b
--- /dev/null
+++ b/tests/ut/python/ops/test_array_ops_check.py
@@ -0,0 +1,159 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+""" test ops """
+import functools
+import numpy as np
+from mindspore import ops
+from mindspore.ops import functional as F
+from mindspore.ops import operations as P
+from mindspore.ops.operations import _grad_ops as G
+import mindspore.ops.composite as C
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.common.parameter import Parameter
+from ..ut_filter import non_graph_engine
+from mindspore.common.api import _executor
+
+from ....mindspore_test_framework.mindspore_test import mindspore_test
+from ....mindspore_test_framework.pipeline.forward.compile_forward\
+    import (pipeline_for_compile_forward_ge_graph_for_case_by_case_config,
+            pipeline_for_compile_forward_ge_graph_for_case_by_case_config_exception)
+from ....mindspore_test_framework.pipeline.gradient.compile_gradient\
+    import pipeline_for_compile_grad_ge_graph_for_case_by_case_config
+
+
+class ExpandDimsNet(nn.Cell):
+    def __init__(self, axis):
+        super(ExpandDimsNet, self).__init__()
+        self.axis = axis
+        self.op = P.ExpandDims()
+
+    def construct(self, x):
+        return self.op(x, self.axis)
+
+
+class IsInstanceNet(nn.Cell):
+    def __init__(self, inst):
+        super(IsInstanceNet, self).__init__()
+        self.inst = inst
+        self.op = P.IsInstance()
+
+    def construct(self, t):
+        return self.op(self.inst, t)
+
+
+class ReshapeNet(nn.Cell):
+    def __init__(self, shape):
+        super(ReshapeNet, self).__init__()
+        self.shape = shape
+        self.op = P.Reshape()
+
+    def construct(self, x):
+        return self.op(x, self.shape)
+
+
+raise_set = [
+    # input is scala, not Tensor
+    ('ExpandDims0', {
+        'block': (P.ExpandDims(), {'exception': TypeError, 'error_keywords': ['ExpandDims']}),
+        'desc_inputs': [5.0, 1],
+        'skip': ['backward']}),
+    # axis is as a parameter
+    ('ExpandDims1', {
+        'block': (P.ExpandDims(), {'exception': TypeError, 'error_keywords': ['ExpandDims']}),
+        'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), 1],
+        'skip': ['backward']}),
+    # axis as an attribute, but less then lower limit
+    ('ExpandDims2', {
+        'block': (ExpandDimsNet(-4), {'exception': ValueError, 'error_keywords': ['ExpandDims']}),
+        'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32))],
+        'skip': ['backward']}),
+    # axis as an attribute, but greater then upper limit
+    ('ExpandDims3', {
+        'block': (ExpandDimsNet(3), {'exception': ValueError, 'error_keywords': ['ExpandDims']}),
+        'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32))],
+        'skip': ['backward']}),
+
+    # input is scala, not Tensor
+    ('DType0', {
+        'block': (P.DType(), {'exception': TypeError, 'error_keywords': ['DType']}),
+        'desc_inputs': [5.0],
+        'skip': ['backward']}),
+
+    # input x scala, not Tensor
+    ('SameTypeShape0', {
+        'block': (P.SameTypeShape(), {'exception': TypeError, 'error_keywords': ['SameTypeShape']}),
+        'desc_inputs': [5.0, Tensor(np.ones([3, 4]).astype(np.float32))],
+        'skip': ['backward']}),
+    # input y scala, not Tensor
+    ('SameTypeShape1', {
+        'block': (P.SameTypeShape(), {'exception': TypeError, 'error_keywords': ['SameTypeShape']}),
+        'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), 5.0],
+        'skip': ['backward']}),
+    # type of x and y not match
+    ('SameTypeShape2', {
+        'block': (P.SameTypeShape(), {'exception': TypeError, 'error_keywords': ['SameTypeShape']}),
+        'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), Tensor(np.ones([3, 4]).astype(np.int32))],
+        'skip': ['backward']}),
+    # shape of x and y not match
+    ('SameTypeShape3', {
+        'block': (P.SameTypeShape(), {'exception': ValueError, 'error_keywords': ['SameTypeShape']}),
+        'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), Tensor(np.ones([3, 3]).astype(np.float32))],
+        'skip': ['backward']}),
+
+    # sub_type is None
+    ('IsSubClass0', {
+        'block': (P.IsSubClass(), {'exception': TypeError, 'error_keywords': ['IsSubClass']}),
+        'desc_inputs': [None, mstype.number],
+        'skip': ['backward']}),
+    # type_ is None
+    ('IsSubClass1', {
+        'block': (P.IsSubClass(), {'exception': TypeError, 'error_keywords': ['IsSubClass']}),
+        'desc_inputs': [mstype.number, None],
+        'skip': ['backward']}),
+
+    # inst is var
+    ('IsInstance0', {
+        'block': (P.IsInstance(), {'exception': ValueError, 'error_keywords': ['IsInstance']}),
+        'desc_inputs': [5.0, mstype.number],
+        'skip': ['backward']}),
+    # t is not mstype.Type
+    ('IsInstance1', {
+        'block': (IsInstanceNet(5.0), {'exception': TypeError, 'error_keywords': ['IsInstance']}),
+        'desc_inputs': [None],
+        'skip': ['backward']}),
+
+    # input x is scalar, not Tensor
+    ('Reshape0', {
+        'block': (P.Reshape(), {'exception': TypeError, 'error_keywords': ['Reshape']}),
+        'desc_inputs': [5.0, (1, 2)],
+        'skip': ['backward']}),
+   # input shape is var
+    ('Reshape1', {
+        'block': (P.Reshape(), {'exception': TypeError, 'error_keywords': ['Reshape']}),
+        'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32)), (2, 3, 2)],
+        'skip': ['backward']}),
+   # element of shape is not int
+    ('Reshape3', {
+        'block': (ReshapeNet((2, 3.0, 2)), {'exception': TypeError, 'error_keywords': ['Reshape']}),
+        'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.float32))],
+        'skip': ['backward']}),
+]
+
+
+@mindspore_test(pipeline_for_compile_forward_ge_graph_for_case_by_case_config_exception)
+def test_check_exception():
+    return raise_set
diff --git a/tests/ut/python/ops/test_tensor_slice.py b/tests/ut/python/ops/test_tensor_slice.py
index 58af9bc273..f713b1ea0c 100644
--- a/tests/ut/python/ops/test_tensor_slice.py
+++ b/tests/ut/python/ops/test_tensor_slice.py
@@ -383,7 +383,7 @@ def test_tensor_slice_reduce_out_of_bounds_neg():
     net = NetWork()
     with pytest.raises(ValueError) as ex:
         net(input_tensor)
-    assert "The `begin[0]` should be an int and must greater or equal to -6, but got -7" in str(ex.value)
+    assert "For 'StridedSlice' the `begin[0]` should be an int and must greater or equal to -6, but got `-7`" in str(ex.value)
 
 
 def test_tensor_slice_reduce_out_of_bounds_positive():
@@ -400,4 +400,4 @@ def test_tensor_slice_reduce_out_of_bounds_positive():
     net = NetWork()
     with pytest.raises(ValueError) as ex:
         net(input_tensor)
-    assert "The `begin[0]` should be an int and must less than 6, but got 6" in str(ex.value)
+    assert "For 'StridedSlice' the `begin[0]` should be an int and must less than 6, but got `6`" in str(ex.value)
diff --git a/tests/vm_impl/vm_me.py b/tests/vm_impl/vm_me.py
index da7fc1ecbe..82b0324fb5 100644
--- a/tests/vm_impl/vm_me.py
+++ b/tests/vm_impl/vm_me.py
@@ -16,7 +16,7 @@
 
 import numpy as np
 from mindspore._checkparam import Rel
-from mindspore._checkparam import ParamValidator as validator
+from mindspore._checkparam import Validator as validator
 
 
 def avg_pooling(x, pool_h, pool_w, stride):
@@ -32,7 +32,7 @@ def avg_pooling(x, pool_h, pool_w, stride):
     Returns:
         numpy.ndarray, an output array after applying average pooling on input array.
     """
-    validator.check_integer("stride", stride, 0, Rel.GT)
+    validator.check_integer("stride", stride, 0, Rel.GT, None)
     num, channel, height, width = x.shape
     out_h = (height - pool_h)//stride + 1
     out_w = (width - pool_w)//stride + 1
@@ -217,7 +217,7 @@ def conv2d(x, weight, bias=None, stride=1, pad=0,
            dilation=1, groups=1, padding_mode='zeros'):
     """Convolution 2D."""
     # pylint: disable=unused-argument
-    validator.check_type('stride', stride, (int, tuple))
+    validator.check_value_type('stride', stride, (int, tuple), None)
     if isinstance(stride, int):
         stride = (stride, stride)
     elif len(stride) == 4:
@@ -229,7 +229,7 @@ def conv2d(x, weight, bias=None, stride=1, pad=0,
                          f"a tuple of two positive int numbers, but got {stride}")
     stride_h = stride[0]
     stride_w = stride[1]
-    validator.check_type('dilation', dilation, (int, tuple))
+    validator.check_value_type('dilation', dilation, (int, tuple), None)
     if isinstance(dilation, int):
         dilation = (dilation, dilation)
     elif len(dilation) == 4:
@@ -384,7 +384,7 @@ def matmul(x, w, b=None):
 
 def max_pooling(x, pool_h, pool_w, stride):
     """Max pooling."""
-    validator.check_integer("stride", stride, 0, Rel.GT)
+    validator.check_integer("stride", stride, 0, Rel.GT, None)
     num, channel, height, width = x.shape
     out_h = (height - pool_h)//stride + 1
     out_w = (width - pool_w)//stride + 1
@@ -427,7 +427,7 @@ def max_pool_grad_with_argmax(x, dout, arg_max, pool_h, pool_w, stride):
 
 def max_pool_with_argmax(x, pool_h, pool_w, stride):
     """Max pooling with argmax."""
-    validator.check_integer("stride", stride, 0, Rel.GT)
+    validator.check_integer("stride", stride, 0, Rel.GT, None)
     num, channel, height, width = x.shape
     out_h = (height - pool_h)//stride + 1
     out_w = (width - pool_w)//stride + 1

From f1cec60dc85f63bb2745f221888940f7745f38b2 Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Sun, 26 Apr 2020 15:35:11 +0800
Subject: [PATCH 080/242] upload resnet101 scripts

---
 example/resnet101_imagenet/README.md          | 139 +++++++++++++
 example/resnet101_imagenet/config.py          |  42 ++++
 example/resnet101_imagenet/crossentropy.py    |  36 ++++
 example/resnet101_imagenet/dataset.py         |  89 +++++++++
 example/resnet101_imagenet/eval.py            |  84 ++++++++
 example/resnet101_imagenet/lr_generator.py    | 113 +++++++++++
 .../run_distribute_train.sh                   |  54 ++++++
 example/resnet101_imagenet/run_infer.sh       |  52 +++++
 .../run_standalone_train.sh                   |  46 +++++
 example/resnet101_imagenet/train.py           | 113 +++++++++++
 example/resnet101_imagenet/var_init.py        | 183 ++++++++++++++++++
 mindspore/model_zoo/resnet.py                 |  21 ++
 12 files changed, 972 insertions(+)
 create mode 100644 example/resnet101_imagenet/README.md
 create mode 100755 example/resnet101_imagenet/config.py
 create mode 100755 example/resnet101_imagenet/crossentropy.py
 create mode 100755 example/resnet101_imagenet/dataset.py
 create mode 100755 example/resnet101_imagenet/eval.py
 create mode 100755 example/resnet101_imagenet/lr_generator.py
 create mode 100755 example/resnet101_imagenet/run_distribute_train.sh
 create mode 100755 example/resnet101_imagenet/run_infer.sh
 create mode 100755 example/resnet101_imagenet/run_standalone_train.sh
 create mode 100755 example/resnet101_imagenet/train.py
 create mode 100755 example/resnet101_imagenet/var_init.py

diff --git a/example/resnet101_imagenet/README.md b/example/resnet101_imagenet/README.md
new file mode 100644
index 0000000000..bc653675f2
--- /dev/null
+++ b/example/resnet101_imagenet/README.md
@@ -0,0 +1,139 @@
+# ResNet101 Example
+ 
+## Description
+ 
+This is an example of training ResNet101 with ImageNet dataset in MindSpore.
+
+## Requirements
+
+- Install [MindSpore](https://www.mindspore.cn/install/en).
+
+- Download the dataset [ImageNet](http://image-net.org/download).
+ 
+> Unzip the ImageNet dataset to any path you want, the folder should include train and eval dataset as follows:
+ 
+```
+.
+└─dataset
+    ├─ilsvrc
+    │
+    └─validation_preprocess
+```
+
+## Example structure
+ 
+```shell
+.
+├── crossentropy.py                 # CrossEntropy loss function
+├── var_init.py                     # weight initial
+├── config.py                       # parameter configuration
+├── dataset.py                      # data preprocessing
+├── eval.py                         # eval net
+├── lr_generator.py                 # generate learning rate
+├── run_distribute_train.sh         # launch distributed training(8p)
+├── run_infer.sh                    # launch evaluating
+├── run_standalone_train.sh         # launch standalone training(1p)
+└── train.py                        # train net
+```
+ 
+## Parameter configuration
+ 
+Parameters for both training and evaluating can be set in config.py.
+ 
+```
+"class_num": 1001,                # dataset class number
+"batch_size": 32,                 # batch size of input tensor
+"loss_scale": 1024,               # loss scale
+"momentum": 0.9,                  # momentum optimizer
+"weight_decay": 1e-4,             # weight decay
+"epoch_size": 120,                # epoch sizes for training
+"buffer_size": 1000,              # number of queue size in data preprocessing
+"image_height": 224,              # image height
+"image_width": 224,               # image width
+"save_checkpoint": True,          # whether save checkpoint or not
+"save_checkpoint_steps": 500,     # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step
+"keep_checkpoint_max": 40,        # only keep the last keep_checkpoint_max checkpoint
+"save_checkpoint_path": "./",     # path to save checkpoint relative to the executed path
+"lr_init": 0.01,                  # initial learning rate
+"lr_end": 0.00001,                # final learning rate
+"lr_max": 0.1,                    # maximum learning rate
+"warmup_epochs": 0,               # number of warmup epoch
+"lr_decay_mode": "cosine"         # decay mode for generating learning rate
+"label_smooth": 1,                # label_smooth
+"label_smooth_factor": 0.1,       # label_smooth_factor
+"lr": 0.1                         # base learning rate
+```
+
+## Running the example
+
+### Train
+ 
+#### Usage
+
+```
+# distributed training
+sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]
+ 
+# standalone training
+sh run_standalone_train.sh [DATASET_PATH]
+```
+ 
+#### Launch
+ 
+```bash
+# distributed training example(8p)
+sh run_distribute_train.sh rank_table_8p.json dataset/ilsvrc
+ 
+# standalone training example（1p）
+sh run_standalone_train.sh dataset/ilsvrc
+```
+ 
+> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
+
+#### Result
+ 
+Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in log.
+
+ 
+```
+# distribute training result(8p)
+epoch: 1 step: 5004, loss is 4.805483
+epoch: 2 step: 5004, loss is 3.2121816
+epoch: 3 step: 5004, loss is 3.429647
+epoch: 4 step: 5004, loss is 3.3667371
+epoch: 5 step: 5004, loss is 3.1718972
+...
+epoch: 67 step: 5004, loss is 2.2768745
+epoch: 68 step: 5004, loss is 1.7223864
+epoch: 69 step: 5004, loss is 2.0665488
+epoch: 70 step: 5004, loss is 1.8717369
+...
+```
+
+### Infer
+ 
+#### Usage
+ 
+```
+# infer
+sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH]
+```
+ 
+#### Launch
+ 
+```bash
+# infer with checkpoint
+sh run_infer.sh dataset/validation_preprocess/ train_parallel0/resnet-120_5004.ckpt
+
+```
+ 
+> checkpoint can be produced in training process.
+ 
+
+#### Result
+ 
+Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.
+ 
+```
+result: {'top_5_accuracy': 0.9429417413572343, 'top_1_accuracy': 0.7853513124199744} ckpt=train_parallel0/resnet-120_5004.ckpt
+```
diff --git a/example/resnet101_imagenet/config.py b/example/resnet101_imagenet/config.py
new file mode 100755
index 0000000000..0ad37c8678
--- /dev/null
+++ b/example/resnet101_imagenet/config.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+network config setting, will be used in train.py and eval.py
+"""
+from easydict import EasyDict as ed
+
+config = ed({
+    "class_num": 1001,
+    "batch_size": 32,
+    "loss_scale": 1024,
+    "momentum": 0.9,
+    "weight_decay": 1e-4,
+    "epoch_size": 120,
+    "buffer_size": 1000,
+    "image_height": 224,
+    "image_width": 224,
+    "save_checkpoint": True,
+    "save_checkpoint_steps": 500,
+    "keep_checkpoint_max": 40,
+    "save_checkpoint_path": "./",
+    "lr_init": 0.01,
+    "lr_end": 0.00001,
+    "lr_max": 0.1,
+    "warmup_epochs": 0,
+    "lr_decay_mode": "cosine",
+    "label_smooth": 1,
+    "label_smooth_factor": 0.1,
+    "lr": 0.1
+})
diff --git a/example/resnet101_imagenet/crossentropy.py b/example/resnet101_imagenet/crossentropy.py
new file mode 100755
index 0000000000..e636b8529e
--- /dev/null
+++ b/example/resnet101_imagenet/crossentropy.py
@@ -0,0 +1,36 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+from mindspore.nn.loss.loss import _Loss
+from mindspore.ops import operations as P
+from mindspore.ops import functional as F
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+import mindspore.nn as nn
+ 
+"""define loss function for network"""
+class CrossEntropy(_Loss):
+    def __init__(self, smooth_factor=0., num_classes=1001):
+        super(CrossEntropy, self).__init__()
+        self.onehot = P.OneHot()
+        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
+        self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32)
+        self.ce = nn.SoftmaxCrossEntropyWithLogits()
+        self.mean = P.ReduceMean(False)
+ 
+    def construct(self, logit, label):
+        one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
+        loss = self.ce(logit, one_hot_label)
+        loss = self.mean(loss, 0)
+        return loss
diff --git a/example/resnet101_imagenet/dataset.py b/example/resnet101_imagenet/dataset.py
new file mode 100755
index 0000000000..920e1c093c
--- /dev/null
+++ b/example/resnet101_imagenet/dataset.py
@@ -0,0 +1,89 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+create train or eval dataset.
+"""
+import os
+import mindspore.common.dtype as mstype
+import mindspore.dataset.engine as de
+import mindspore.dataset.transforms.vision.c_transforms as C
+import mindspore.dataset.transforms.c_transforms as C2
+from config import config
+
+def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
+    """
+    create a train or evaluate dataset
+    Args:
+        dataset_path(string): the path of dataset.
+        do_train(bool): whether dataset is used for train or eval.
+        repeat_num(int): the repeat times of dataset. Default: 1
+        batch_size(int): the batch size of dataset. Default: 32
+
+    Returns:
+        dataset
+    """
+    device_num = int(os.getenv("RANK_SIZE"))
+    rank_id = int(os.getenv("RANK_ID"))
+
+    if device_num == 1:
+        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
+    else:
+        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
+                                     num_shards=device_num, shard_id=rank_id)
+    resize_height = 224
+    rescale = 1.0 / 255.0
+    shift = 0.0
+
+    # define map operations
+    decode_op = C.Decode()
+
+    random_resize_crop_op = C.RandomResizedCrop(resize_height, (0.08, 1.0), (0.75, 1.33), max_attempts=100)
+    horizontal_flip_op = C.RandomHorizontalFlip(rank_id / (rank_id + 1))
+    resize_op_256 = C.Resize((256, 256))
+    center_crop = C.CenterCrop(224)
+    rescale_op = C.Rescale(rescale, shift)
+    normalize_op = C.Normalize((0.475, 0.451, 0.392), (0.275, 0.267, 0.278))
+    changeswap_op = C.HWC2CHW()
+
+    trans=[]
+    if do_train:
+        trans = [decode_op,
+                 random_resize_crop_op,
+                 horizontal_flip_op,
+                 rescale_op,
+                 normalize_op,
+                 changeswap_op]
+
+    else:
+        trans = [decode_op,
+                 resize_op_256,
+                 center_crop,
+                 rescale_op,
+                 normalize_op,
+                 changeswap_op]
+
+    type_cast_op = C2.TypeCast(mstype.int32)
+
+    ds = ds.map(input_columns="image", operations=trans)
+    ds = ds.map(input_columns="label", operations=type_cast_op)
+
+    # apply shuffle operations
+    ds = ds.shuffle(buffer_size=config.buffer_size)
+    # apply batch operations
+    ds = ds.batch(batch_size, drop_remainder=True)
+    # apply dataset repeat operation
+    ds = ds.repeat(repeat_num)
+
+    return ds
diff --git a/example/resnet101_imagenet/eval.py b/example/resnet101_imagenet/eval.py
new file mode 100755
index 0000000000..00fe825e91
--- /dev/null
+++ b/example/resnet101_imagenet/eval.py
@@ -0,0 +1,84 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+eval.
+"""
+import os
+import argparse
+import random
+import numpy as np
+from dataset import create_dataset
+from config import config
+from mindspore import context
+from mindspore.model_zoo.resnet import resnet101
+from mindspore.parallel._auto_parallel_context import auto_parallel_context
+from mindspore.train.model import Model, ParallelMode
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+import mindspore.dataset.engine as de
+from mindspore.communication.management import init
+from crossentropy import CrossEntropy
+
+random.seed(1)
+np.random.seed(1)
+de.config.set_seed(1)
+
+parser = argparse.ArgumentParser(description='Image classification')
+parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
+parser.add_argument('--device_num', type=int, default=1, help='Device num.')
+parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
+parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
+parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
+parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
+args_opt = parser.parse_args()
+
+device_id = int(os.getenv('DEVICE_ID'))
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id)
+context.set_context(enable_task_sink=True)
+context.set_context(enable_loop_sink=True)
+context.set_context(enable_mem_reuse=True)
+
+if __name__ == '__main__':
+    if args_opt.do_eval:
+        context.set_context(enable_hccl=False)
+    else:
+        if args_opt.run_distribute:
+            context.set_context(enable_hccl=True)
+            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                              mirror_mean=True, parameter_broadcast=True)
+            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+            init()
+        else:
+            context.set_context(enable_hccl=False)
+
+    epoch_size = config.epoch_size
+    net = resnet101(class_num=config.class_num)
+
+    if not config.label_smooth:
+        config.label_smooth_factor = 0.0
+    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+
+    if args_opt.do_eval:
+        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
+        step_size = dataset.get_dataset_size()
+
+        if args_opt.checkpoint_path:
+            param_dict = load_checkpoint(args_opt.checkpoint_path)
+            load_param_into_net(net, param_dict)
+        net.set_train(False)
+
+        model = Model(net, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'})
+        res = model.eval(dataset)
+        print("result:", res, "ckpt=", args_opt.checkpoint_path)
diff --git a/example/resnet101_imagenet/lr_generator.py b/example/resnet101_imagenet/lr_generator.py
new file mode 100755
index 0000000000..b2271a1382
--- /dev/null
+++ b/example/resnet101_imagenet/lr_generator.py
@@ -0,0 +1,113 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""learning rate generator"""
+import numpy as np
+import math
+ 
+
+def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
+    lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
+    lr = float(init_lr) + lr_inc * current_step
+    return lr
+
+def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch):
+    """
+    generate learning rate array with cosine
+
+    Args:
+       lr(float): base learning rate
+       steps_per_epoch(int): steps size of one epoch
+       warmup_epochs(int): number of warmup epochs
+       max_epoch(int): total epochs of training
+    Returns:
+       np.array, learning rate array
+    """
+    base_lr = lr
+    warmup_init_lr = 0
+    total_steps = int(max_epoch * steps_per_epoch)
+    warmup_steps = int(warmup_epochs * steps_per_epoch)
+    decay_steps = total_steps - warmup_steps
+
+    lr_each_step = []
+    for i in range(total_steps):
+        if i < warmup_steps:
+            lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr)
+        else:
+            linear_decay = (total_steps - i) / decay_steps
+            cosine_decay = 0.5 * (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps))
+            decayed = linear_decay * cosine_decay + 0.00001
+            lr = base_lr * decayed
+        lr_each_step.append(lr)
+    return np.array(lr_each_step).astype(np.float32)  
+
+def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
+    """
+    generate learning rate array
+
+    Args:
+       global_step(int): total steps of the training
+       lr_init(float): init learning rate
+       lr_end(float): end learning rate
+       lr_max(float): max learning rate
+       warmup_epochs(int): number of warmup epochs
+       total_epochs(int): total epoch of training
+       steps_per_epoch(int): steps of one epoch
+       lr_decay_mode(string): learning rate decay mode, including steps, poly or default
+
+    Returns:
+       np.array, learning rate array
+    """
+    lr_each_step = []
+    total_steps = steps_per_epoch * total_epochs
+    warmup_steps = steps_per_epoch * warmup_epochs
+    if lr_decay_mode == 'steps':
+        decay_epoch_index = [0.3 * total_steps, 0.6 * total_steps, 0.8 * total_steps]
+        for i in range(total_steps):
+            if i < decay_epoch_index[0]:
+                lr = lr_max
+            elif i < decay_epoch_index[1]:
+                lr = lr_max * 0.1
+            elif i < decay_epoch_index[2]:
+                lr = lr_max * 0.01
+            else:
+                lr = lr_max * 0.001
+            lr_each_step.append(lr)
+    elif lr_decay_mode == 'poly':
+        if warmup_steps != 0:
+            inc_each_step = (float(lr_max) - float(lr_init)) / float(warmup_steps)
+        else:
+            inc_each_step = 0
+        for i in range(total_steps):
+            if i < warmup_steps:
+                lr = float(lr_init) + inc_each_step * float(i)
+            else:
+                base = (1.0 - (float(i) - float(warmup_steps)) / (float(total_steps) - float(warmup_steps)))
+                lr = float(lr_max) * base * base
+                if lr < 0.0:
+                    lr = 0.0
+            lr_each_step.append(lr)
+    else:
+        for i in range(total_steps):
+            if i < warmup_steps:
+                lr = lr_init + (lr_max - lr_init) * i / warmup_steps
+            else:
+                lr = lr_max - (lr_max - lr_end) * (i - warmup_steps) / (total_steps - warmup_steps)
+            lr_each_step.append(lr)
+
+    current_step = global_step
+    lr_each_step = np.array(lr_each_step).astype(np.float32)
+    learning_rate = lr_each_step[current_step:]
+
+    return learning_rate
diff --git a/example/resnet101_imagenet/run_distribute_train.sh b/example/resnet101_imagenet/run_distribute_train.sh
new file mode 100755
index 0000000000..5165f58cab
--- /dev/null
+++ b/example/resnet101_imagenet/run_distribute_train.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 2 ]
+then 
+    echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATASET_PATH]"
+exit 1
+fi
+
+if [ ! -f $1 ]
+then 
+    echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
+exit 1
+fi 
+
+if [ ! -d $2 ]
+then 
+    echo "error: DATASET_PATH=$2 is not a directory"
+exit 1
+fi 
+
+ulimit -u unlimited
+export DEVICE_NUM=8
+export RANK_SIZE=8
+export MINDSPORE_HCCL_CONFIG_PATH=$1
+export RANK_TABLE_FILE=$1
+
+for((i=0; i<${DEVICE_NUM}; i++))
+do
+    export DEVICE_ID=$i
+    export RANK_ID=$i
+    rm -rf ./train_parallel$i
+    mkdir ./train_parallel$i
+    cp *.py ./train_parallel$i
+    cp *.sh ./train_parallel$i
+    cd ./train_parallel$i || exit
+    echo "start training for rank $RANK_ID, device $DEVICE_ID"
+    env > env.log
+    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log &
+    cd ..
+done
diff --git a/example/resnet101_imagenet/run_infer.sh b/example/resnet101_imagenet/run_infer.sh
new file mode 100755
index 0000000000..5df659275e
--- /dev/null
+++ b/example/resnet101_imagenet/run_infer.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 2 ]
+then 
+    echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
+exit 1
+fi
+
+if [ ! -d $1 ]
+then 
+    echo "error: DATASET_PATH=$1 is not a directory"
+exit 1
+fi 
+
+if [ ! -f $2 ]
+then 
+    echo "error: CHECKPOINT_PATH=$2 is not a file"
+exit 1
+fi 
+
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export RANK_SIZE=$DEVICE_NUM
+export RANK_ID=0
+
+if [ -d "infer" ];
+then
+    rm -rf ./infer
+fi
+mkdir ./infer
+cp *.py ./infer
+cp *.sh ./infer
+cd ./infer || exit
+env > env.log
+echo "start infering for device $DEVICE_ID"
+python eval.py --do_eval=True --dataset_path=$1 --checkpoint_path=$2 &> log &
+cd ..
diff --git a/example/resnet101_imagenet/run_standalone_train.sh b/example/resnet101_imagenet/run_standalone_train.sh
new file mode 100755
index 0000000000..9ba5742515
--- /dev/null
+++ b/example/resnet101_imagenet/run_standalone_train.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 1 ]
+then 
+    echo "Usage: sh run_standalone_train.sh [DATASET_PATH]"
+exit 1
+fi
+
+if [ ! -d $1 ]
+then 
+    echo "error: DATASET_PATH=$1 is not a directory"
+exit 1
+fi 
+
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export RANK_ID=0
+export RANK_SIZE=1
+
+if [ -d "train" ];
+then
+    rm -rf ./train
+fi
+mkdir ./train
+cp *.py ./train
+cp *.sh ./train
+cd ./train || exit
+echo "start training for device $DEVICE_ID"
+env > env.log
+python train.py --do_train=True --dataset_path=$1 &> log &
+cd ..
diff --git a/example/resnet101_imagenet/train.py b/example/resnet101_imagenet/train.py
new file mode 100755
index 0000000000..2df6c3bad4
--- /dev/null
+++ b/example/resnet101_imagenet/train.py
@@ -0,0 +1,113 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""train_imagenet."""
+import os
+import argparse
+import random
+import numpy as np
+from dataset import create_dataset
+from lr_generator import get_lr
+from config import config
+from mindspore import context
+from mindspore import Tensor
+from mindspore.model_zoo.resnet import resnet101
+from mindspore.parallel._auto_parallel_context import auto_parallel_context
+from mindspore.nn.optim.momentum import Momentum
+from mindspore.train.model import Model, ParallelMode
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
+from mindspore.train.loss_scale_manager import FixedLossScaleManager
+import mindspore.dataset.engine as de
+from mindspore.communication.management import init
+import mindspore.nn as nn
+from crossentropy import CrossEntropy
+from var_init import default_recurisive_init, KaimingNormal
+from mindspore.common import initializer as weight_init
+
+random.seed(1)
+np.random.seed(1)
+de.config.set_seed(1)
+
+parser = argparse.ArgumentParser(description='Image classification')
+parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
+parser.add_argument('--device_num', type=int, default=1, help='Device num.')
+parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.')
+parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.')
+parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
+args_opt = parser.parse_args()
+
+device_id = int(os.getenv('DEVICE_ID'))
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id)
+context.set_context(enable_task_sink=True)
+context.set_context(enable_loop_sink=True)
+context.set_context(enable_mem_reuse=True)
+
+if __name__ == '__main__':
+    if args_opt.do_eval:
+        context.set_context(enable_hccl=False)
+    else:
+        if args_opt.run_distribute:
+            context.set_context(enable_hccl=True)
+            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                              mirror_mean=True, parameter_broadcast=True)
+            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+            init()
+        else:
+            context.set_context(enable_hccl=False)
+
+    epoch_size = config.epoch_size
+    net = resnet101(class_num=config.class_num)
+    
+    # weight init
+    default_recurisive_init(net)
+    for name, cell in net.cells_and_names():
+        if isinstance(cell, nn.Conv2d):
+            cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5),
+                                                                mode='fan_out', nonlinearity='relu'),
+                                                                cell.weight.default_input.shape(),
+                                                                cell.weight.default_input.dtype())
+            
+    if not config.label_smooth:
+        config.label_smooth_factor = 0.0
+    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) 
+
+    if args_opt.do_train:
+        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
+                                 repeat_num=epoch_size, batch_size=config.batch_size)
+        step_size = dataset.get_dataset_size()
+        loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
+
+        # learning rate strategy
+        if config.lr_decay_mode == 'cosine':
+            lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size))
+        else:
+            lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
+                               warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
+                               lr_decay_mode='poly'))
+            
+        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
+                       config.weight_decay, config.loss_scale)
+
+        model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', keep_batchnorm_fp32=False, loss_scale_manager=loss_scale, metrics={'acc'}) 
+
+        time_cb = TimeMonitor(data_size=step_size)
+        loss_cb = LossMonitor()
+        cb = [time_cb, loss_cb]
+        if config.save_checkpoint:
+            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,
+                                         keep_checkpoint_max=config.keep_checkpoint_max)
+            ckpt_cb = ModelCheckpoint(prefix="resnet", directory=config.save_checkpoint_path, config=config_ck)
+            cb += [ckpt_cb]
+        model.train(epoch_size, dataset, callbacks=cb)
diff --git a/example/resnet101_imagenet/var_init.py b/example/resnet101_imagenet/var_init.py
new file mode 100755
index 0000000000..af4cd64b3b
--- /dev/null
+++ b/example/resnet101_imagenet/var_init.py
@@ -0,0 +1,183 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""weight initial"""
+import math
+import numpy as np
+from mindspore.common import initializer as init
+import mindspore.nn as nn
+from mindspore import Tensor
+
+ 
+def calculate_gain(nonlinearity, param=None):
+    r"""Return the recommended gain value for the given nonlinearity function.
+    The values are as follows:
+ 
+    ================= ====================================================
+    nonlinearity      gain
+    ================= ====================================================
+    Linear / Identity :math:`1`
+    Conv{1,2,3}D      :math:`1`
+    Sigmoid           :math:`1`
+    Tanh              :math:`\frac{5}{3}`
+    ReLU              :math:`\sqrt{2}`
+    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
+    ================= ====================================================
+ 
+    Args:
+        nonlinearity: the non-linear function (`nn.functional` name)
+        param: optional parameter for the non-linear function
+ 
+    """
+    linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
+    if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
+        return 1
+    elif nonlinearity == 'tanh':
+        return 5.0 / 3
+    elif nonlinearity == 'relu':
+        return math.sqrt(2.0)
+    elif nonlinearity == 'leaky_relu':
+        if param is None:
+            negative_slope = 0.01
+        elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float):
+            # True/False are instances of int, hence check above
+            negative_slope = param
+        else:
+            raise ValueError("negative_slope {} not a valid number".format(param))
+        return math.sqrt(2.0 / (1 + negative_slope ** 2))
+    else:
+        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) 
+
+def _calculate_correct_fan(array, mode):
+    mode = mode.lower()
+    valid_modes = ['fan_in', 'fan_out']
+    if mode not in valid_modes:
+        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
+ 
+    fan_in, fan_out = _calculate_fan_in_and_fan_out(array)
+    return fan_in if mode == 'fan_in' else fan_out 
+ 
+
+def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+    r"""Fills the input `Tensor` with values according to the method
+    described in `Delving deep into rectifiers: Surpassing human-level
+    performance on ImageNet classification` - He, K. et al. (2015), using a
+    uniform distribution. The resulting tensor will have values sampled from
+    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where
+ 
+    .. math::
+        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
+ 
+    Also known as He initialization.
+ 
+    Args:
+        array: an n-dimensional `tensor`
+        a: the negative slope of the rectifier used after this layer (only
+        used with ``'leaky_relu'``)
+        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
+            preserves the magnitude of the variance of the weights in the
+            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
+            backwards pass.
+        nonlinearity: the non-linear function (`nn.functional` name),
+            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
+    """
+ 
+    fan = _calculate_correct_fan(array, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
+    return np.random.uniform(-bound, bound, array.shape)
+ 
+
+def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+    r"""Fills the input `Tensor` with values according to the method
+    described in `Delving deep into rectifiers: Surpassing human-level
+    performance on ImageNet classification` - He, K. et al. (2015), using a
+    normal distribution. The resulting tensor will have values sampled from
+    :math:`\mathcal{N}(0, \text{std}^2)` where
+ 
+    .. math::
+        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
+
+    Also known as He initialization.
+ 
+    Args:
+        array: an n-dimensional `tensor`
+        a: the negative slope of the rectifier used after this layer (only
+        used with ``'leaky_relu'``)
+        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
+            preserves the magnitude of the variance of the weights in the
+            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
+            backwards pass.
+        nonlinearity: the non-linear function (`nn.functional` name),
+            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
+    """
+    fan = _calculate_correct_fan(array, mode)
+    gain = calculate_gain(nonlinearity, a)
+    std = gain / math.sqrt(fan)
+    return np.random.normal(0, std, array.shape)
+ 
+def _calculate_fan_in_and_fan_out(array):
+    dimensions = len(array.shape)
+    if dimensions < 2:
+        raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions")
+ 
+    num_input_fmaps = array.shape[1]
+    num_output_fmaps = array.shape[0]
+    receptive_field_size = 1
+    if dimensions > 2:
+        receptive_field_size = array[0][0].size
+    fan_in = num_input_fmaps * receptive_field_size
+    fan_out = num_output_fmaps * receptive_field_size
+ 
+    return fan_in, fan_out
+ 
+class KaimingUniform(init.Initializer):
+    def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+        super(KaimingUniform, self).__init__()
+        self.a = a
+        self.mode = mode
+        self.nonlinearity = nonlinearity
+ 
+    def _initialize(self, arr):
+        tmp = kaiming_uniform_(arr, self.a, self.mode, self.nonlinearity)
+        init._assignment(arr, tmp) 
+
+class KaimingNormal(init.Initializer):
+    def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
+        super(KaimingNormal, self).__init__()
+        self.a = a
+        self.mode = mode
+        self.nonlinearity = nonlinearity
+ 
+    def _initialize(self, arr):
+        tmp = kaiming_normal_(arr, self.a, self.mode, self.nonlinearity)
+        init._assignment(arr, tmp)
+
+def default_recurisive_init(custom_cell):
+    for name, cell in custom_cell.cells_and_names():
+        if isinstance(cell, nn.Conv2d):
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), cell.weight.default_input.shape(), cell.weight.default_input.dtype())
+            if cell.bias is not None:
+                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
+                bound = 1 / math.sqrt(fan_in)
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, cell.bias.default_input.shape()), cell.bias.default_input.dtype())
+        elif isinstance(cell, nn.Dense):
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), cell.weight.default_input.shape(), cell.weight.default_input.dtype())
+            if cell.bias is not None:
+                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
+                bound = 1 / math.sqrt(fan_in)
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, cell.bias.default_input.shape()), cell.bias.default_input.dtype())
+        elif isinstance(cell, nn.BatchNorm2d) or isinstance(cell, nn.BatchNorm1d):
+            pass
diff --git a/mindspore/model_zoo/resnet.py b/mindspore/model_zoo/resnet.py
index 9d010eede1..a243ff5a2a 100755
--- a/mindspore/model_zoo/resnet.py
+++ b/mindspore/model_zoo/resnet.py
@@ -260,3 +260,24 @@ def resnet50(class_num=10):
                   [256, 512, 1024, 2048],
                   [1, 2, 2, 2],
                   class_num)
+
+def resnet101(class_num=1001):
+    """
+    Get ResNet101 neural network.
+
+    Args:
+        class_num (int): Class number.
+
+    Returns:
+        Cell, cell instance of ResNet101 neural network.
+
+    Examples:
+        >>> net = resnet101(1001)
+    """
+    return ResNet(ResidualBlock,
+                  [3, 4, 23, 3],
+                  [64, 256, 512, 1024],
+                  [256, 512, 1024, 2048],
+                  [1, 2, 2, 2],
+                  class_num)
+    

From 230e77f923f76676c58065885a9bb99d3be450ac Mon Sep 17 00:00:00 2001
From: huanghui <huanghui44@huawei.com>
Date: Sun, 26 Apr 2020 10:35:32 +0800
Subject: [PATCH 081/242] fix confusionmulgrad fusion pass cannot work

---
 .../ir_fusion/confusion_mul_grad_fusion.cc    | 38 +++++++++++++++++--
 .../confusion_mul_grad_fusion_test.cc         |  5 ---
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
index 47098379bf..caea9599c1 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.cc
@@ -72,6 +72,38 @@ AnfNodePtr GetMul0(const FuncGraphPtr &graph, const AnfNodePtr &input2, const An
   }
   return mul0;
 }
+
+bool QuitFusion(const FuncGraphPtr &graph, const AnfNodePtr &mul0_anf, const AnfNodePtr &reduce_sum) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mul0_anf);
+  MS_EXCEPTION_IF_NULL(reduce_sum);
+  if (!mul0_anf->isa<CNode>()) {
+    return true;
+  }
+  auto mul0 = mul0_anf->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(mul0);
+
+  // when network is _VirtualDatasetCell, quit fusion
+  if (mul0->fullname_with_scope().find("network-_VirtualDatasetCell") != std::string::npos) {
+    return true;
+  }
+
+  auto manager = graph->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  if (manager->node_users().find(reduce_sum) == manager->node_users().end()) {
+    MS_LOG(EXCEPTION) << "node has no output in manager";
+  }
+  const AnfNodeIndexSet &outputs_set = manager->node_users()[reduce_sum];
+  auto it = std::find_if(outputs_set.begin(), outputs_set.end(), [&mul0](const std::pair<AnfNodePtr, int> &node_index) {
+    return node_index.first == mul0->input(1) || node_index.first == mul0;
+  });
+  if (it != outputs_set.end()) {
+    MS_LOG(INFO) << "ReduceSum's output node is mul0's input or mul0! If do fusion, graph will exist a circle";
+    return true;
+  }
+
+  return false;
+}
 }  // namespace
 
 const BaseRef ConfusionMulGradFusion::DefinePattern() const {
@@ -90,9 +122,6 @@ const AnfNodePtr ConfusionMulGradFusion::Process(const FuncGraphPtr &graph, cons
   auto reduce_sum = node->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(reduce_sum);
   auto mul1 = reduce_sum->input(1);
-  if (mul1->fullname_with_scope().find("bert/encoder") == std::string::npos) {
-    return nullptr;
-  }
   if (IsUsedByOthers(graph, mul1)) {
     MS_LOG(INFO) << "Mul1 is used by others, quit fusion!";
     return nullptr;
@@ -102,6 +131,9 @@ const AnfNodePtr ConfusionMulGradFusion::Process(const FuncGraphPtr &graph, cons
     MS_LOG(INFO) << "Mul0 do not exist, quit fusion";
     return nullptr;
   }
+  if (QuitFusion(graph, mul0, node)) {
+    return nullptr;
+  }
 
   auto fusion_node = CreateFusionNode(graph, reduce_sum, mul0, input3);
   std::vector<AnfNodePtr> fusion_node_outputs;
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
index 4b5d38d375..e3bf09d2cb 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion_test.cc
@@ -32,11 +32,6 @@ class TestHWOptimizeConfusionMulGradFusion : public BackendCommon {
 TEST_F(TestHWOptimizeConfusionMulGradFusion, test_fusion) {
   FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_confusion_mul_grad_fusion", "before");
   EXPECT_NE(g, nullptr);
-  auto bert_scope = std::make_shared<Scope>("bert/encoder");
-  for (auto node : TopoSort(g->get_return())) {
-    node->set_scope(bert_scope);
-  }
-
   std::vector<int> shp{1, 1, 1, 1};
   auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
   AbstractBasePtrList args_spec_list;

From b84deeeb5c52137442fd8bc9b767862a26de4a07 Mon Sep 17 00:00:00 2001
From: candanzg <zhangshucheng@huawei.com>
Date: Sun, 26 Apr 2020 14:55:30 +0800
Subject: [PATCH 082/242] fix summary tage check error

Signed-off-by: candanzg <zhangshucheng@huawei.com>
---
 mindspore/ccsrc/operator/prim_debug.cc                     | 4 ++--
 .../train/summary/test_summary_ops_params_valid_check.py   | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/mindspore/ccsrc/operator/prim_debug.cc b/mindspore/ccsrc/operator/prim_debug.cc
index c8db775320..d73c34bf85 100644
--- a/mindspore/ccsrc/operator/prim_debug.cc
+++ b/mindspore/ccsrc/operator/prim_debug.cc
@@ -51,7 +51,7 @@ AbstractBasePtr InferImplScalarSummary(const AnalysisEnginePtr &, const Primitiv
   // Reomve the force check to support batch set summary use 'for' loop
   auto item_v = descriptions->BuildValue();
   if (!item_v->isa<StringImm>()) {
-    MS_LOG(ERROR) << "First parameter shoule be string";
+    MS_EXCEPTION(TypeError) << "Summary first parameter should be string";
   }
 
   return std::make_shared<AbstractScalar>(kAnyValue, kBool);
@@ -75,7 +75,7 @@ AbstractBasePtr InferImplTensorSummary(const AnalysisEnginePtr &, const Primitiv
   // Reomve the force check to support batch set summary use 'for' loop
   auto item_v = descriptions->BuildValue();
   if (!item_v->isa<StringImm>()) {
-    MS_LOG(WARNING) << "Summary first parameter must be string";
+    MS_EXCEPTION(TypeError) << "Summary first parameter should be string";
   }
 
   return std::make_shared<AbstractScalar>(kAnyValue, std::make_shared<Bool>());
diff --git a/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py b/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py
index 23c85d398c..ab1eb88d96 100644
--- a/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py
+++ b/tests/ut/python/train/summary/test_summary_ops_params_valid_check.py
@@ -22,6 +22,7 @@ import os
 import logging
 import random
 import numpy as np
+import pytest
 from mindspore.train.summary.summary_record import SummaryRecord
 from mindspore.common.tensor import Tensor
 import mindspore.nn as nn
@@ -180,7 +181,8 @@ def test_summary_use_invalid_tag_None():
 def test_summary_use_invalid_tag_Bool():
     log.debug("begin test_summary_use_invalid_tag_Bool")
     net = SummaryDemoTag(True, True, True)
-    run_case(net)
+    with pytest.raises(TypeError):
+        run_case(net)
     log.debug("finished test_summary_use_invalid_tag_Bool")
 
 
@@ -196,7 +198,8 @@ def test_summary_use_invalid_tag_null():
 def test_summary_use_invalid_tag_Int():
     log.debug("begin test_summary_use_invalid_tag_Int")
     net = SummaryDemoTag(1, 2, 3)
-    run_case(net)
+    with pytest.raises(TypeError):
+        run_case(net)
     log.debug("finished test_summary_use_invalid_tag_Int")
 
 

From 60f266332645c15443dcca32a407c7186a571e0a Mon Sep 17 00:00:00 2001
From: yanghaitao <yanghaitao1@huawei.com>
Date: Sun, 26 Apr 2020 15:10:41 +0800
Subject: [PATCH 083/242] Check TfFileDataset num_shards and shard_id

---
 mindspore/dataset/engine/validators.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index dabeb2d424..c9f7d54f13 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -906,6 +906,8 @@ def check_textfiledataset(method):
 
         check_param_type(nreq_param_int, param_dict, int)
 
+        check_sampler_shuffle_shard_options(param_dict)
+
         return method(*args, **kwargs)
 
     return new_method

From 13bf42ba6fc18807da16c3b0e9a528e0f1f6d0aa Mon Sep 17 00:00:00 2001
From: chujinjin <chujinjin52@huawei.com>
Date: Mon, 20 Apr 2020 17:21:05 +0800
Subject: [PATCH 084/242] abstract input tensor

---
 mindspore/ccsrc/pynative/pynative_execute.cc | 118 ++++++++++++++++-
 mindspore/ccsrc/session/ascend_session.cc    |   6 +-
 mindspore/ccsrc/session/ascend_session.h     |   2 +-
 mindspore/ccsrc/session/gpu_session.cc       |   5 +-
 mindspore/ccsrc/session/gpu_session.h        |   2 +-
 mindspore/ccsrc/session/session_basic.cc     | 129 ++-----------------
 mindspore/ccsrc/session/session_basic.h      |   6 +-
 7 files changed, 135 insertions(+), 133 deletions(-)

diff --git a/mindspore/ccsrc/pynative/pynative_execute.cc b/mindspore/ccsrc/pynative/pynative_execute.cc
index 0d18dfb577..7feb1a4997 100644
--- a/mindspore/ccsrc/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pynative/pynative_execute.cc
@@ -30,7 +30,8 @@
 #include "pipeline/parse/data_converter.h"
 #include "pipeline/static_analysis/prim.h"
 #include "session/session_factory.h"
-
+#include "pre_activate/pass/const_input_to_attr_registry.h"
+#include "pre_activate/common/helper.h"
 #include "pynative/base.h"
 
 #ifdef ENABLE_GE
@@ -188,6 +189,117 @@ py::object RunOpInVM(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat
   return std::move(result);
 }
 
+bool RunOpConvertConstInputToAttr(const py::object &input_object, size_t input_index, const PrimitivePtr &op_prim,
+                                  const std::unordered_set<size_t> &input_attrs) {
+  MS_EXCEPTION_IF_NULL(op_prim);
+  auto input_names_value = op_prim->GetAttr(kAttrInputNames);
+  if (input_names_value == nullptr) {
+    return false;
+  }
+  auto input_names_vec = GetValue<std::vector<std::string>>(input_names_value);
+  if (input_index >= input_names_vec.size()) {
+    MS_LOG(EXCEPTION) << "The input index: " << input_index << " is large than the input names vector size!";
+  }
+
+  if (input_attrs.find(input_index) != input_attrs.end()) {
+    ValuePtr value = parse::data_converter::PyDataToValue(input_object);
+    MS_EXCEPTION_IF_NULL(value);
+    auto input_name = input_names_vec[input_index];
+    op_prim->set_attr(input_name, value);
+    return true;
+  }
+  return false;
+}
+
+void PlantTensorTupleToVector(const py::tuple &tuple_inputs, const PrimitivePtr &op_prim,
+                              std::vector<tensor::TensorPtr> *input_tensor) {
+  MS_EXCEPTION_IF_NULL(op_prim);
+  MS_EXCEPTION_IF_NULL(input_tensor);
+  for (const auto &input_object : tuple_inputs) {
+    if (!py::isinstance<tensor::Tensor>(input_object)) {
+      MS_LOG(EXCEPTION) << "The input object is not a tensor!";
+    }
+    auto tensor = py::cast<tensor::TensorPtr>(input_object);
+    MS_EXCEPTION_IF_NULL(tensor);
+    input_tensor->push_back(tensor);
+  }
+  op_prim->set_attr(kAttrDynInputSizes, MakeValue(std::vector<int>{SizeToInt(tuple_inputs.size())}));
+}
+
+void ConvertValueTupleToTensor(const py::object &input_object, std::vector<tensor::TensorPtr> *input_tensor) {
+  MS_EXCEPTION_IF_NULL(input_tensor);
+  ValuePtr input_value = parse::data_converter::PyDataToValue(input_object);
+  MS_EXCEPTION_IF_NULL(input_value);
+  if (!input_value->isa<ValueTuple>()) {
+    MS_LOG(EXCEPTION) << "The input object is not a value tuple!";
+  }
+  auto value_tuple = input_value->cast<ValueTuplePtr>();
+  MS_EXCEPTION_IF_NULL(value_tuple);
+  tensor::TensorPtr tensor_ptr = opt::CreateTupleTensor(value_tuple);
+  MS_EXCEPTION_IF_NULL(tensor_ptr);
+  input_tensor->push_back(tensor_ptr);
+}
+
+void ConvertPyObjectToTensor(const py::object &input_object, const PrimitivePtr &op_prim,
+                             std::vector<tensor::TensorPtr> *input_tensor) {
+  MS_EXCEPTION_IF_NULL(op_prim);
+  MS_EXCEPTION_IF_NULL(input_tensor);
+  tensor::TensorPtr tensor_ptr = nullptr;
+  if (py::isinstance<tensor::Tensor>(input_object)) {
+    tensor_ptr = py::cast<tensor::TensorPtr>(input_object);
+  } else if (py::isinstance<py::float_>(input_object)) {
+    tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::float_>(input_object), kFloat32);
+  } else if (py::isinstance<py::int_>(input_object)) {
+    tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::int_>(input_object), nullptr);
+  } else if (py::isinstance<py::list>(input_object)) {
+    tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::list>(input_object), nullptr);
+  } else if (py::isinstance<py::array>(input_object)) {
+    tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::array>(input_object), nullptr);
+  } else if (py::isinstance<py::tuple>(input_object)) {
+    auto tuple_inputs = py::cast<py::tuple>(input_object);
+    if (py::isinstance<tensor::Tensor>(tuple_inputs[0])) {
+      PlantTensorTupleToVector(tuple_inputs, op_prim, input_tensor);
+    } else {
+      ConvertValueTupleToTensor(input_object, input_tensor);
+    }
+    return;
+  } else {
+    MS_LOG(EXCEPTION) << "Run op inputs type is invalid!";
+  }
+  MS_EXCEPTION_IF_NULL(tensor_ptr);
+  input_tensor->push_back(tensor_ptr);
+}
+
+void ConstructInputTensor(const OpExecInfoPtr &op_run_info, std::vector<bool> *tensors_mask,
+                          std::vector<tensor::TensorPtr> *input_tensors) {
+  MS_EXCEPTION_IF_NULL(tensors_mask);
+  MS_EXCEPTION_IF_NULL(input_tensors);
+  PrimitivePtr op_prim = op_run_info->py_primitive;
+  MS_EXCEPTION_IF_NULL(op_prim);
+
+  if (op_run_info->op_inputs.size() != op_run_info->inputs_mask.size()) {
+    MS_LOG(EXCEPTION) << "Op input size " << op_run_info->op_inputs.size() << " should be equal to op input mask size "
+                      << op_run_info->inputs_mask.size();
+  }
+  opt::ConstInputToAttrInfoRegister reg;
+  bool reg_exist = opt::ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(op_run_info->op_name, &reg);
+  size_t input_num = op_run_info->op_inputs.size();
+  MS_LOG(INFO) << "py input size: " << input_num;
+  for (size_t index = 0; index < input_num; ++index) {
+    // convert const input to attr
+    if (reg_exist &&
+        RunOpConvertConstInputToAttr(op_run_info->op_inputs[index], index, op_prim, reg.GetConstInputAttrInfo())) {
+      continue;
+    }
+    // convert const and tuple input to tensor
+    ConvertPyObjectToTensor(op_run_info->op_inputs[index], op_prim, input_tensors);
+    // make tensors, weight : 1, data : 0
+    std::vector<bool> new_mask(input_tensors->size() - tensors_mask->size(),
+                               py::cast<bool>(op_run_info->inputs_mask[index]));
+    tensors_mask->insert(tensors_mask->end(), new_mask.begin(), new_mask.end());
+  }
+}
+
 py::object RunOpInMs(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *status) {
   MS_EXCEPTION_IF_NULL(op_exec_info);
   MS_LOG(INFO) << "Start run op[" << op_exec_info->op_name << "] with backend policy ms";
@@ -204,7 +316,9 @@ py::object RunOpInMs(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat
 
   std::string graph_info = GetSingleOpGraphInfo(op_exec_info);
   std::vector<tensor::TensorPtr> input_tensors;
-  session->BuildOp(*op_exec_info, graph_info, &input_tensors);
+  std::vector<bool> tensors_mask;
+  ConstructInputTensor(op_exec_info, &tensors_mask, &input_tensors);
+  session->BuildOp(*op_exec_info, graph_info, input_tensors, tensors_mask);
   py::tuple result = session->RunOp(*op_exec_info, graph_info, input_tensors);
   ms_context->set_enable_pynative_infer(false);
   *status = PYNATIVE_SUCCESS;
diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc
index 11ae3da6f7..253d2d08ae 100755
--- a/mindspore/ccsrc/session/ascend_session.cc
+++ b/mindspore/ccsrc/session/ascend_session.cc
@@ -250,11 +250,11 @@ void AscendSession::RunOpExecTask(const std::shared_ptr<KernelGraph> &kernel_gra
 }
 
 void AscendSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-                            std::vector<tensor::TensorPtr> *input_tensors) {
-  MS_EXCEPTION_IF_NULL(input_tensors);
+                            const std::vector<tensor::TensorPtr> &input_tensors,
+                            const std::vector<bool> &tensors_mask) {
   MS_LOG(INFO) << "Build op " << op_run_info.op_name << " start !";
   // construct graph include one op
-  auto graph = ConstructSingleOpGraph(op_run_info, input_tensors);
+  auto graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask);
   MS_EXCEPTION_IF_NULL(graph);
   opt::RunOpAscendBackendIRFusionOptimization(graph);
   // kernel select
diff --git a/mindspore/ccsrc/session/ascend_session.h b/mindspore/ccsrc/session/ascend_session.h
index 2d24691404..0b006256a1 100755
--- a/mindspore/ccsrc/session/ascend_session.h
+++ b/mindspore/ccsrc/session/ascend_session.h
@@ -42,7 +42,7 @@ class AscendSession : public SessionBasic {
   void RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) override;
   void BuildGraph(GraphId) override;
   void BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-               std::vector<tensor::TensorPtr> *input_tensors) override;
+               const std::vector<tensor::TensorPtr> &input_tensors, const std::vector<bool> &tensors_mask) override;
   py::tuple RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
                   const std::vector<tensor::TensorPtr> &input_tensors) override;
 
diff --git a/mindspore/ccsrc/session/gpu_session.cc b/mindspore/ccsrc/session/gpu_session.cc
index 4a9506913c..3a80382e9b 100644
--- a/mindspore/ccsrc/session/gpu_session.cc
+++ b/mindspore/ccsrc/session/gpu_session.cc
@@ -133,10 +133,9 @@ void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::Ten
 }
 
 void GPUSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-                         std::vector<tensor::TensorPtr> *input_tensors) {
+                         const std::vector<tensor::TensorPtr> &input_tensors, const std::vector<bool> &tensors_mask) {
   // Prepare the graph
-  MS_EXCEPTION_IF_NULL(input_tensors);
-  auto kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors);
+  auto kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask);
   MS_EXCEPTION_IF_NULL(kernel_graph);
   SelectKernel(kernel_graph);
   StartKernelRT();
diff --git a/mindspore/ccsrc/session/gpu_session.h b/mindspore/ccsrc/session/gpu_session.h
index 470c9b4799..2a3cc04b09 100644
--- a/mindspore/ccsrc/session/gpu_session.h
+++ b/mindspore/ccsrc/session/gpu_session.h
@@ -40,7 +40,7 @@ class GPUSession : public SessionBasic {
 
   void RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) override;
   void BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
-               std::vector<tensor::TensorPtr> *input_tensors) override;
+               const std::vector<tensor::TensorPtr> &input_tensors, const std::vector<bool> &tensors_mask) override;
   py::tuple RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
                   const std::vector<tensor::TensorPtr> &input_tensors) override;
 
diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc
index 40b69b75b3..cb9e5c4dc9 100755
--- a/mindspore/ccsrc/session/session_basic.cc
+++ b/mindspore/ccsrc/session/session_basic.cc
@@ -180,115 +180,6 @@ BaseRef CreatTupleForOutput(const AnfNodePtr &anf, const KernelGraph &graph,
   return ret;
 }
 
-bool RunOpConvertConstInputToAttr(const py::object &input_object, size_t input_index, const PrimitivePtr &op_prim,
-                                  const std::unordered_set<size_t> &input_attrs) {
-  MS_EXCEPTION_IF_NULL(op_prim);
-  auto input_names_value = op_prim->GetAttr(kAttrInputNames);
-  if (input_names_value == nullptr) {
-    return false;
-  }
-  auto input_names_vec = GetValue<std::vector<std::string>>(input_names_value);
-  if (input_index >= input_names_vec.size()) {
-    MS_LOG(EXCEPTION) << "The input index: " << input_index << " is large than the input names vector size!";
-  }
-
-  if (input_attrs.find(input_index) != input_attrs.end()) {
-    ValuePtr value = parse::data_converter::PyDataToValue(input_object);
-    MS_EXCEPTION_IF_NULL(value);
-    auto input_name = input_names_vec[input_index];
-    op_prim->set_attr(input_name, value);
-    return true;
-  }
-  return false;
-}
-
-void PlantTensorTupleToVector(const py::tuple &tuple_inputs, const PrimitivePtr &op_prim,
-                              std::vector<tensor::TensorPtr> *input_tensor) {
-  MS_EXCEPTION_IF_NULL(op_prim);
-  MS_EXCEPTION_IF_NULL(input_tensor);
-  for (const auto &input_object : tuple_inputs) {
-    if (!py::isinstance<tensor::Tensor>(input_object)) {
-      MS_LOG(EXCEPTION) << "The input object is not a tensor!";
-    }
-    auto tensor = py::cast<tensor::TensorPtr>(input_object);
-    MS_EXCEPTION_IF_NULL(tensor);
-    input_tensor->push_back(tensor);
-  }
-  op_prim->set_attr(kAttrDynInputSizes, MakeValue(std::vector<int>{SizeToInt(tuple_inputs.size())}));
-}
-
-void ConvertValueTupleToTensor(const py::object &input_object, std::vector<tensor::TensorPtr> *input_tensor) {
-  MS_EXCEPTION_IF_NULL(input_tensor);
-  ValuePtr input_value = parse::data_converter::PyDataToValue(input_object);
-  MS_EXCEPTION_IF_NULL(input_value);
-  if (!input_value->isa<ValueTuple>()) {
-    MS_LOG(EXCEPTION) << "The input object is not a value tuple!";
-  }
-  auto value_tuple = input_value->cast<ValueTuplePtr>();
-  MS_EXCEPTION_IF_NULL(value_tuple);
-  tensor::TensorPtr tensor_ptr = opt::CreateTupleTensor(value_tuple);
-  MS_EXCEPTION_IF_NULL(tensor_ptr);
-  input_tensor->push_back(tensor_ptr);
-}
-
-void ConvertPyObjectToTensor(const py::object &input_object, const PrimitivePtr &op_prim,
-                             std::vector<tensor::TensorPtr> *input_tensor) {
-  MS_EXCEPTION_IF_NULL(op_prim);
-  MS_EXCEPTION_IF_NULL(input_tensor);
-  tensor::TensorPtr tensor_ptr = nullptr;
-  if (py::isinstance<tensor::Tensor>(input_object)) {
-    tensor_ptr = py::cast<tensor::TensorPtr>(input_object);
-  } else if (py::isinstance<py::float_>(input_object)) {
-    tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::float_>(input_object), kFloat32);
-  } else if (py::isinstance<py::int_>(input_object)) {
-    tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::int_>(input_object), nullptr);
-  } else if (py::isinstance<py::list>(input_object)) {
-    tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::list>(input_object), nullptr);
-  } else if (py::isinstance<py::array>(input_object)) {
-    tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::array>(input_object), nullptr);
-  } else if (py::isinstance<py::tuple>(input_object)) {
-    auto tuple_inputs = py::cast<py::tuple>(input_object);
-    if (py::isinstance<tensor::Tensor>(tuple_inputs[0])) {
-      PlantTensorTupleToVector(tuple_inputs, op_prim, input_tensor);
-    } else {
-      ConvertValueTupleToTensor(input_object, input_tensor);
-    }
-    return;
-  } else {
-    MS_LOG(EXCEPTION) << "Run op inputs type is invalid!";
-  }
-  MS_EXCEPTION_IF_NULL(tensor_ptr);
-  input_tensor->push_back(tensor_ptr);
-}
-
-void ConvertInputPyobject(const OpRunInfo &op_run_info, const PrimitivePtr &op_prim,
-                          std::vector<tensor::TensorPtr> *input_tensors, std::vector<bool> *tensors_mask) {
-  MS_EXCEPTION_IF_NULL(op_prim);
-  MS_EXCEPTION_IF_NULL(input_tensors);
-  MS_EXCEPTION_IF_NULL(tensors_mask);
-  if (op_run_info.op_inputs.size() != op_run_info.inputs_mask.size()) {
-    MS_LOG(EXCEPTION) << "Op input size " << op_run_info.op_inputs.size() << " should be equal to op input mask size "
-                      << op_run_info.inputs_mask.size();
-  }
-  opt::ConstInputToAttrInfoRegister reg;
-  bool reg_exist = opt::ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(op_run_info.op_name, &reg);
-  size_t input_num = op_run_info.op_inputs.size();
-  MS_LOG(INFO) << "py input size: " << input_num;
-  for (size_t index = 0; index < input_num; ++index) {
-    // convert const input to attr
-    if (reg_exist &&
-        RunOpConvertConstInputToAttr(op_run_info.op_inputs[index], index, op_prim, reg.GetConstInputAttrInfo())) {
-      continue;
-    }
-    // convert const and tuple input to tensor
-    ConvertPyObjectToTensor(op_run_info.op_inputs[index], op_prim, input_tensors);
-    // make tensors, weight : 1, data : 0
-    std::vector<bool> new_mask(input_tensors->size() - tensors_mask->size(),
-                               py::cast<bool>(op_run_info.inputs_mask[index]));
-    tensors_mask->insert(tensors_mask->end(), new_mask.begin(), new_mask.end());
-  }
-}
-
 ValueNodePtr CreateNewValueNode(const AnfNodePtr &anf, KernelGraph *graph) {
   auto value_node = anf->cast<ValueNodePtr>();
   MS_EXCEPTION_IF_NULL(value_node);
@@ -747,26 +638,22 @@ void SessionBasic::CreateOutputNode(const CNodePtr &cnode, const std::shared_ptr
 }
 
 std::shared_ptr<KernelGraph> SessionBasic::ConstructSingleOpGraph(const OpRunInfo &op_run_info,
-                                                                  std::vector<tensor::TensorPtr> *input_tensors) {
-  MS_EXCEPTION_IF_NULL(input_tensors);
+                                                                  const std::vector<tensor::TensorPtr> &input_tensors,
+                                                                  const std::vector<bool> &tensors_mask) {
   auto graph = std::make_shared<KernelGraph>();
   std::vector<AnfNodePtr> inputs;
   // set input[0]
   PrimitivePtr op_prim = op_run_info.py_primitive;
-  if (op_prim == nullptr) {
-    op_prim = std::make_shared<Primitive>(op_run_info.op_name);
-  }
+  MS_EXCEPTION_IF_NULL(op_prim);
   inputs.push_back(std::make_shared<ValueNode>(op_prim));
   // set input parameter
-  std::vector<bool> tensors_mask;
-  ConvertInputPyobject(op_run_info, op_prim, input_tensors, &tensors_mask);
-  MS_LOG(INFO) << "Input tensor size: " << input_tensors->size();
-  if (input_tensors->size() != tensors_mask.size()) {
-    MS_LOG(EXCEPTION) << "Input tensors size " << input_tensors->size() << " should be equal to tensors mask size "
+  MS_LOG(INFO) << "Input tensor size: " << input_tensors.size();
+  if (input_tensors.size() != tensors_mask.size()) {
+    MS_LOG(EXCEPTION) << "Input tensors size " << input_tensors.size() << " should be equal to tensors mask size "
                       << tensors_mask.size();
   }
-  for (size_t i = 0; i < input_tensors->size(); ++i) {
-    auto parameter = ConstructRunOpParameter(graph, input_tensors->at(i), tensors_mask[i]);
+  for (size_t i = 0; i < input_tensors.size(); ++i) {
+    auto parameter = ConstructRunOpParameter(graph, input_tensors.at(i), tensors_mask[i]);
     inputs.push_back(parameter);
     graph->MutableInputs()->push_back(parameter);
   }
diff --git a/mindspore/ccsrc/session/session_basic.h b/mindspore/ccsrc/session/session_basic.h
index aa359c74d9..0fd0003cc9 100755
--- a/mindspore/ccsrc/session/session_basic.h
+++ b/mindspore/ccsrc/session/session_basic.h
@@ -61,7 +61,8 @@ class SessionBasic {
 
   virtual void RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) = 0;
 
-  virtual void BuildOp(const OpRunInfo &, const GraphInfo &, std::vector<tensor::TensorPtr> *input_tensors) {}
+  virtual void BuildOp(const OpRunInfo &, const GraphInfo &, const std::vector<tensor::TensorPtr> &input_tensors,
+                       const std::vector<bool> &tensors_mask) {}
 
   virtual py::tuple RunOp(const OpRunInfo &, const GraphInfo &, const std::vector<tensor::TensorPtr> &input_tensors) {
     return py::tuple();
@@ -99,7 +100,8 @@ class SessionBasic {
   CNodePtr ConstructOutput(const AnfNodePtrList &outputs, const std::shared_ptr<KernelGraph> &graph);
   // create a single run op graph
   std::shared_ptr<KernelGraph> ConstructSingleOpGraph(const OpRunInfo &op_run_info,
-                                                      std::vector<tensor::TensorPtr> *input_tensor);
+                                                      const std::vector<tensor::TensorPtr> &input_tensors,
+                                                      const std::vector<bool> &tensors_mask);
   // trans BaseRef list to py::tuple
   BaseRef TransformBaseRefListToTuple(const BaseRef &base_ref);
 

From 09cc5b4faabf39f4b406ee0a8e92809713fbd44e Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Sun, 26 Apr 2020 03:48:11 -0400
Subject: [PATCH 085/242] fix doc issue

---
 mindspore/train/model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mindspore/train/model.py b/mindspore/train/model.py
index 66b03ce06c..5b1a34e418 100755
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@@ -83,7 +83,7 @@ class Model:
         >>>         return out
         >>>
         >>> net = Net()
-        >>> loss = nn.SoftmaxCrossEntropyWithLogits()
+        >>> loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
         >>> optim = Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
         >>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None)
         >>> dataset = get_dataset()
@@ -400,7 +400,7 @@ class Model:
         Examples:
             >>> dataset = get_dataset()
             >>> net = Net()
-            >>> loss = nn.SoftmaxCrossEntropyWithLogits()
+            >>> loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
             >>> loss_scale_manager = FixedLossScaleManager()
             >>> optim = Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
             >>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None, loss_scale_manager=loss_scale_manager)
@@ -523,7 +523,7 @@ class Model:
         Examples:
             >>> dataset = get_dataset()
             >>> net = Net()
-            >>> loss = nn.SoftmaxCrossEntropyWithLogits()
+            >>> loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
             >>> model = Model(net, loss_fn=loss, optimizer=None, metrics={'acc'})
             >>> model.eval(dataset)
         """

From e071f04d4bade82d41412b7701f6edc2b6c1f349 Mon Sep 17 00:00:00 2001
From: wsc <wangshaocong1@huawei.com>
Date: Mon, 20 Apr 2020 11:19:25 +0800
Subject: [PATCH 086/242] Add ST test script of bert with loss scale

---
 .../Bert_NEZHA/bert_for_pre_training.py       |   2 +-
 .../models/bert/bert_tdt_lossscale.py         | 198 ++++++++++++++++++
 2 files changed, 199 insertions(+), 1 deletion(-)
 create mode 100644 tests/st/networks/models/bert/bert_tdt_lossscale.py

diff --git a/mindspore/model_zoo/Bert_NEZHA/bert_for_pre_training.py b/mindspore/model_zoo/Bert_NEZHA/bert_for_pre_training.py
index 046b2adbe2..53a0d03933 100644
--- a/mindspore/model_zoo/Bert_NEZHA/bert_for_pre_training.py
+++ b/mindspore/model_zoo/Bert_NEZHA/bert_for_pre_training.py
@@ -445,5 +445,5 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
             succ = False
         else:
             succ = self.optimizer(grads)
-        ret = (loss, cond)
+        ret = (loss, cond, scaling_sens)
         return F.depend(ret, succ)
diff --git a/tests/st/networks/models/bert/bert_tdt_lossscale.py b/tests/st/networks/models/bert/bert_tdt_lossscale.py
new file mode 100644
index 0000000000..cfd0b55697
--- /dev/null
+++ b/tests/st/networks/models/bert/bert_tdt_lossscale.py
@@ -0,0 +1,198 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""train bert network without lossscale"""
+
+import os
+import pytest
+import numpy as np
+from numpy import allclose
+import mindspore.common.dtype as mstype
+import mindspore.dataset.engine.datasets as de
+import mindspore.dataset.transforms.c_transforms as C
+from mindspore import context
+from mindspore.common.tensor import Tensor
+from mindspore.train.model import Model
+from mindspore.train.callback import Callback, LossMonitor
+from mindspore.train.loss_scale_manager import DynamicLossScaleManager
+from mindspore.model_zoo.Bert_NEZHA import BertConfig, BertNetworkWithLoss, BertTrainOneStepWithLossScaleCell
+from mindspore.nn.optim import Momentum
+from mindspore import log as logger
+_current_dir = os.path.dirname(os.path.realpath(__file__))
+DATA_DIR = ["/home/workspace/mindspore_dataset/bert/example/examples.tfrecord"]
+SCHEMA_DIR = "/home/workspace/mindspore_dataset/bert/example/datasetSchema.json"
+
+def get_config(version='base', batch_size=1):
+    """get config"""
+    if version == 'base':
+        bert_config = BertConfig(
+            batch_size=batch_size,
+            seq_length=128,
+            vocab_size=21136,
+            hidden_size=768,
+            num_hidden_layers=2,
+            num_attention_heads=12,
+            intermediate_size=3072,
+            hidden_act="gelu",
+            hidden_dropout_prob=0.1,
+            attention_probs_dropout_prob=0.1,
+            max_position_embeddings=512,
+            type_vocab_size=2,
+            initializer_range=0.02,
+            use_relative_positions=True,
+            input_mask_from_dataset=True,
+            token_type_ids_from_dataset=True,
+            dtype=mstype.float32,
+            compute_type=mstype.float32)
+    elif version == 'large':
+        bert_config = BertConfig(
+            batch_size=batch_size,
+            seq_length=128,
+            vocab_size=21136,
+            hidden_size=1024,
+            num_hidden_layers=2,
+            num_attention_heads=16,
+            intermediate_size=4096,
+            hidden_act="gelu",
+            hidden_dropout_prob=0.0,
+            attention_probs_dropout_prob=0.0,
+            max_position_embeddings=512,
+            type_vocab_size=2,
+            initializer_range=0.02,
+            use_relative_positions=True,
+            input_mask_from_dataset=True,
+            token_type_ids_from_dataset=True,
+            dtype=mstype.float32,
+            compute_type=mstype.float16)
+    elif version == 'large_mixed':
+        bert_config = BertConfig(
+            batch_size=batch_size,
+            seq_length=128,
+            vocab_size=21136,
+            hidden_size=1024,
+            num_hidden_layers=24,
+            num_attention_heads=16,
+            intermediate_size=4096,
+            hidden_act="gelu",
+            hidden_dropout_prob=0.0,
+            attention_probs_dropout_prob=0.0,
+            max_position_embeddings=512,
+            type_vocab_size=2,
+            initializer_range=0.02,
+            use_relative_positions=True,
+            input_mask_from_dataset=True,
+            token_type_ids_from_dataset=True,
+            dtype=mstype.float32,
+            compute_type=mstype.float32)
+    else:
+        bert_config = BertConfig(batch_size=batch_size)
+    return bert_config
+
+def me_de_train_dataset():
+    """test me de train dataset"""
+    # apply repeat operations
+    repeat_count = 1
+    ds = de.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["input_ids", "input_mask", "segment_ids",
+                                                               "next_sentence_labels", "masked_lm_positions",
+                                                               "masked_lm_ids", "masked_lm_weights"], shuffle=False)
+    type_cast_op = C.TypeCast(mstype.int32)
+    ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
+    ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
+    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
+    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
+    # apply batch operations
+    batch_size = int(os.getenv('BATCH_SIZE', '16'))
+    ds = ds.batch(batch_size, drop_remainder=True)
+    ds = ds.repeat(repeat_count)
+    return ds
+
+def weight_variable(shape):
+    """weight variable"""
+    np.random.seed(1)
+    ones = np.random.uniform(-0.1, 0.1, size=shape).astype(np.float32)
+    return Tensor(ones)
+
+class ModelCallback(Callback):
+    def __init__(self):
+        super(ModelCallback, self).__init__()
+        self.loss_list = []
+        self.overflow_list = []
+        self.lossscale_list = []
+
+    def step_end(self, run_context):
+        cb_params = run_context.original_args()
+        self.loss_list.append(cb_params.net_outputs[0])
+        self.overflow_list.append(cb_params.net_outputs[1])
+        self.lossscale_list.append(cb_params.net_outputs[2])
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_bert_tdt():
+    """test bert tdt"""
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", reserve_class_name_in_scope=False)
+    context.set_context(enable_task_sink=True)
+    context.set_context(enable_loop_sink=True)
+    context.set_context(enable_mem_reuse=True)
+    ds = me_de_train_dataset()
+    version = os.getenv('VERSION', 'large')
+    batch_size = int(os.getenv('BATCH_SIZE', '16'))
+    config = get_config(version=version, batch_size=batch_size)
+    netwithloss = BertNetworkWithLoss(config, True)
+    optimizer = Momentum(netwithloss.trainable_params(), learning_rate=2e-5, momentum=0.9)
+    scale_window = 3
+    scale_manager = DynamicLossScaleManager(2**32, 2, scale_window)
+    netwithgrads = BertTrainOneStepWithLossScaleCell(netwithloss, optimizer=optimizer, scale_update_cell=scale_manager.get_update_cell())
+    netwithgrads.set_train(True)
+    model = Model(netwithgrads)
+    callback = ModelCallback()
+    params = netwithloss.trainable_params()
+    for param in params:
+        value = param.default_input
+        name = param.name
+        if isinstance(value, Tensor):
+            if name.split('.')[-1] in ['weight']:
+                if name.split('.')[-3] in ['cls2']:
+                    logger.info("***************** BERT param name is 1 {}".format(name))
+                    param.default_input = weight_variable(value.asnumpy().shape)
+                else:
+                    logger.info("***************** BERT param name is 2 {}".format(name))
+                    tempshape = value.asnumpy().shape
+                    shape = (tempshape[1], tempshape[0])
+                    weight_value = weight_variable(shape).asnumpy()
+                    param.default_input = Tensor(np.transpose(weight_value, [1, 0]))
+            else:
+                logger.info("***************** BERT param name is 3 {}".format(name))
+                param.default_input = weight_variable(value.asnumpy().shape)
+    model.train(ds.get_repeat_count(), ds, callbacks=callback, dataset_sink_mode=False)
+
+    # assertion occurs while the loss_scale value is wrong
+    count = 0
+    for i in range(len(callback.overflow_list)):
+        if callback.overflow_list[i] == Tensor(True, mstype.bool_) and i > 0:
+            count = 0
+            assert callback.lossscale_list[i] == callback.lossscale_list[i - 1] * Tensor(0.5, mstype.float32)
+        if callback.overflow_list[i] == Tensor(False, mstype.bool_):
+            count = count + 1
+            if count == scale_window:
+                count = 0
+                assert callback.lossscale_list[i] == callback.lossscale_list[i - 1] * Tensor(2.0, mstype.float32)
+
+
+if __name__ == '__main__':
+    test_bert_tdt()

From 4750861054fa0f0085a11b24028ed16987072280 Mon Sep 17 00:00:00 2001
From: yangzhenzhang <285824651@qq.com>
Date: Sun, 26 Apr 2020 16:37:57 +0800
Subject: [PATCH 087/242] fix layernorm bug

---
 .../parallel/ops_info/layer_norm_info.cc      |  2 +-
 tests/ut/python/parallel/test_layer_norm.py   | 28 +++++++++++++------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc b/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc
index 3abfc3d2ed..5bdd24090f 100644
--- a/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/layer_norm_info.cc
@@ -69,7 +69,7 @@ Status LayerNormInfo::CheckStrategy(const StrategyPtr &strategy) {
   }
   // check input strategy
   for (size_t i = begin_norm_axis_; i < input_strategy.size(); ++i) {
-    if (input_strategy[begin_norm_axis_] != NO_SPLIT_STRATEGY) {
+    if (input_strategy[i] != NO_SPLIT_STRATEGY) {
       MS_LOG(ERROR) << name_ << ": Invalid input strategy " << ShapeToString(input_strategy);
       return FAILED;
     }
diff --git a/tests/ut/python/parallel/test_layer_norm.py b/tests/ut/python/parallel/test_layer_norm.py
index c65ee5fc8e..50f49dcc77 100644
--- a/tests/ut/python/parallel/test_layer_norm.py
+++ b/tests/ut/python/parallel/test_layer_norm.py
@@ -11,8 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+# ============================================================================
 import numpy as np
+import pytest
 import mindspore as ms
 from mindspore import context, Tensor, Parameter
 from mindspore.nn import Cell, TrainOneStepCell, Momentum
@@ -24,7 +25,7 @@ from mindspore.common.initializer import initializer
 class Net(Cell):
     def __init__(self, mul_weight, strategy1=None, strategy2=None, strategy3=None):
         super().__init__()
-        self.begin_norm_axis = -1
+        self.begin_norm_axis = 2
         self.begin_params_axis = 1
         self.mul = P.Mul().set_strategy(strategy1)
         self.layer_norm = P.LayerNorm(self.begin_norm_axis, self.begin_params_axis).set_strategy(strategy2)
@@ -64,18 +65,18 @@ def test_layer_norm_data_parallel():
 
 def test_layer_norm_model_parallel():
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
-    strategy1 = ((1, 1, 16, 1), (1, 1, 16, 1))
-    strategy2 = ((1, 1, 16, 1), (1, 16, 1), (1, 16, 1))
-    strategy3 = ((1, 1, 16, 1), (1, 1, 16, 1))
+    strategy1 = ((1, 16, 1, 1), (1, 16, 1, 1))
+    strategy2 = ((1, 16, 1, 1), (16, 1, 1), (16, 1, 1))
+    strategy3 = ((1, 16, 1, 1), (1, 16, 1, 1))
     net = Net(_w, strategy1, strategy2, strategy3)
     compile(net)
 
 
 def test_layer_norm_hybrid_parallel():
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
-    strategy1 = ((2, 2, 4, 1), (2, 2, 4, 1))
-    strategy2 = ((2, 2, 4, 1), (2, 4, 1), (2, 4, 1))
-    strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
+    strategy1 = ((2, 8, 1, 1), (2, 8, 1, 1))
+    strategy2 = ((2, 8, 1, 1), (8, 1, 1), (8, 1, 1))
+    strategy3 = ((2, 8, 1, 1), (2, 8, 1, 1))
     net = Net(_w, strategy1, strategy2, strategy3)
     compile(net)
 
@@ -89,8 +90,17 @@ def test_layer_norm_auto_parallel():
 def test_layer_norm_repeat_calc():
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
     strategy1 = ((2, 2, 4, 1), (2, 2, 4, 1))
-    strategy2 = ((1, 2, 2, 1), (2, 2, 1), (2, 2, 1))
+    strategy2 = ((2, 2, 1, 1), (2, 1, 1), (2, 1, 1))
     strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
     net = Net(_w, strategy1, strategy2, strategy3)
     compile(net)
 
+
+def test_layer_norm_wrong_strategy():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
+    strategy1 = ((2, 2, 4, 1), (2, 2, 4, 1))
+    strategy2 = ((1, 2, 1, 2), (2, 1, 2), (2, 1, 2))
+    strategy3 = ((2, 2, 4, 1), (2, 2, 4, 1))
+    net = Net(_w, strategy1, strategy2, strategy3)
+    with pytest.raises(RuntimeError):
+        compile(net)

From c5156f1e465e1f11e4003babda1d2da86ddc4e6b Mon Sep 17 00:00:00 2001
From: jinyaohui <jinyaohui@huawei.com>
Date: Sun, 26 Apr 2020 16:58:38 +0800
Subject: [PATCH 088/242] add init_device to init_dataset

---
 graphengine                                 |  2 +-
 mindspore/ccsrc/pipeline/init.cc            |  2 +-
 mindspore/ccsrc/pipeline/pipeline.cc        | 17 ++++++++++++-----
 mindspore/ccsrc/pipeline/pipeline.h         |  4 ++--
 mindspore/ccsrc/utils/context/ms_context.cc | 14 ++++++++++++++
 mindspore/ccsrc/utils/context/ms_context.h  |  2 ++
 mindspore/common/api.py                     |  4 ++--
 mindspore/nn/cell.py                        |  4 ++--
 8 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/graphengine b/graphengine
index 0c33e9d125..43f5d24337 160000
--- a/graphengine
+++ b/graphengine
@@ -1 +1 @@
-Subproject commit 0c33e9d12562953ca4bd6c03cb77da2c2da74acd
+Subproject commit 43f5d24337bf785251eefae2d810c7d5684194d6
diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc
index 04e6edc5c8..4620f18e2a 100644
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/init.cc
@@ -97,7 +97,7 @@ PYBIND11_MODULE(_c_expression, m) {
               py::arg("batch_size"), py::arg("types"), py::arg("shapes"), py::arg("input_indexs"),
               py::arg("phase") = py::str("dataset"), "Init and exec dataset.");
   (void)m.def("_set_dataset_mode_config", &mindspore::ConfigManager::SetDatasetModeConfig, "API for set dataset mode.");
-  (void)m.def("init_ge", &mindspore::pipeline::InitGe, "Init GE");
+  (void)m.def("init_backend", &mindspore::pipeline::InitBackend, "Init Backend.");
 
   (void)m.def("export_graph", &mindspore::pipeline::ExportGraph, "Export Graph.");
 
diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc
index 251a0c2d84..0c1c0a924b 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -236,7 +236,7 @@ py::dict ExecutorPy::GetAllreduceFusion(const std::string &phase) {
 
 void ExecutorPy::DelNetRes(const std::string &id) {
 #ifdef ENABLE_GE
-  FinalizeGe();
+  FinalizeBackend();
 #endif
   if (executor_ != nullptr) {
     bool flag = false;
@@ -680,6 +680,13 @@ bool InitExecDataset(const std::string &queue_name, int64_t iter_num, int64_t ba
                      const std::vector<TypePtr> &types, const std::vector<std::vector<int64_t>> &shapes,
                      const std::vector<int64_t> &input_indexes, const std::string &phase) {
   std::string name = MsContext::GetInstance()->backend_policy();
+#ifndef NO_DLIB
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  if (!ms_context->IsTsdOpened() || !ms_context->IsGeInited()) {
+    (void)InitBackend();
+  }
+#endif
   if (name == kMsConvert || name == kMsVm) {
     return InitExecDatasetVm(queue_name, iter_num, batch_size, types, shapes, input_indexes);
   }
@@ -758,7 +765,7 @@ void ResetOpId() { mindspore::id_generator::reset_id(); }
 
 void InitHccl() {
 #ifdef ENABLE_GE
-  (void)InitGe();
+  (void)InitBackend();
 #else
   mindspore::parse::python_adapter::set_python_env_flag(true);
   auto ms_context = MsContext::GetInstance();
@@ -780,7 +787,7 @@ void InitHccl() {
 
 void FinalizeHccl() {
 #ifdef ENABLE_GE
-  (void)FinalizeGe();
+  (void)FinalizeBackend();
 #else
   device::KernelRuntimeManager::Instance().ClearRuntimeResource();
 #endif
@@ -801,7 +808,7 @@ void ReleaseGeTsd() {
   }
 }
 
-void InitGe() {
+void InitBackend() {
   // set python env flag
   mindspore::parse::python_adapter::set_python_env_flag(true);
   // open tsd before ge initialize
@@ -813,7 +820,7 @@ void InitGe() {
   (void)ms_context->InitGe();
 }
 
-void FinalizeGe() {
+void FinalizeBackend() {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   (void)context_ptr->FinalizeGe();
diff --git a/mindspore/ccsrc/pipeline/pipeline.h b/mindspore/ccsrc/pipeline/pipeline.h
index 38d4f1937f..6a99d4dbcd 100644
--- a/mindspore/ccsrc/pipeline/pipeline.h
+++ b/mindspore/ccsrc/pipeline/pipeline.h
@@ -116,8 +116,8 @@ bool InitDistribute(const std::map<std::string, std::string> &options);
 void ResetOpId();
 void InitHccl();
 void FinalizeHccl();
-void InitGe();
-void FinalizeGe();
+void InitBackend();
+void FinalizeBackend();
 
 void ClearResAtexit();
 void ReleaseGeTsd();
diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc
index 3a2de9ba0c..46c28dec88 100644
--- a/mindspore/ccsrc/utils/context/ms_context.cc
+++ b/mindspore/ccsrc/utils/context/ms_context.cc
@@ -438,4 +438,18 @@ bool MsContext::PynativeInitGe() {
   is_pynative_ge_init_ = true;
   return true;
 }
+
+bool MsContext::IsTsdOpened() {
+  if (tsd_ref_ > 0) {
+    return true;
+  }
+  return false;
+}
+
+bool MsContext::IsGeInited() {
+  if (ge_ref_ > 0) {
+    return true;
+  }
+  return false;
+}
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/utils/context/ms_context.h b/mindspore/ccsrc/utils/context/ms_context.h
index 1d84061a8a..b2d594d10e 100644
--- a/mindspore/ccsrc/utils/context/ms_context.h
+++ b/mindspore/ccsrc/utils/context/ms_context.h
@@ -82,8 +82,10 @@ class MsContext {
 
   bool OpenTsd();
   bool CloseTsd(bool force = false);
+  bool IsTsdOpened();
   bool InitGe();
   bool FinalizeGe(bool force = false);
+  bool IsGeInited();
   void set_enable_hccl(bool enable_hccl) { enable_hccl_ = enable_hccl; }
   bool enable_hccl() const { return enable_hccl_; }
   bool PynativeInitGe();
diff --git a/mindspore/common/api.py b/mindspore/common/api.py
index 5016dd58bf..455e7a7f4f 100644
--- a/mindspore/common/api.py
+++ b/mindspore/common/api.py
@@ -22,7 +22,7 @@ from mindspore import context
 from mindspore import log as logger
 from mindspore.parallel._utils import _get_parallel_mode
 from .._c_expression import generate_key, Executor_, Tensor, MetaTensor
-from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_ge
+from .._c_expression import verify_inputs_signature, init_exec_dataset, _set_dataset_mode_config, init_backend
 from .tensor import Tensor as MsTensor
 
 # store ms_function class compiled pipeline cache
@@ -184,7 +184,7 @@ class _MindSporeFunction:
 
     @_wrap_func
     def __call__(self, *args):
-        init_ge()
+        init_backend()
         converted, arguments_dict, parse_method = _convert_function_arguments(self.fn, *args)
         if not converted:
             raise RuntimeError('Process function parameter is failure')
diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py
index 4980e90f3f..9cea668471 100755
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@@ -22,7 +22,7 @@ from ..common import dtype as mstype
 from ..common.api import _executor
 from .._checkparam import _check_str_by_regular
 from ..common.parameter import Parameter, ParameterTuple
-from .._c_expression import init_ge
+from .._c_expression import init_backend
 from ..ops.primitive import Primitive
 from ..parallel._tensor import _load_tensor_by_layout
 from ..parallel._utils import _get_parallel_mode
@@ -66,7 +66,7 @@ class Cell:
         self._phase = 'train'
         self._parameter_layout_dict = {}
         self._create_time = int(time.time() * 1e9)
-        init_ge()
+        init_backend()
         # call gc to release GE session resources used by non-used cell objects
         gc.collect()
         self._construct_inputs_num = 0

From c88edfb31dca3bf2702827dc231b6d2534b0cffe Mon Sep 17 00:00:00 2001
From: zhaozhenlong <zhaozhenlong1@huawei.com>
Date: Sun, 26 Apr 2020 10:17:28 +0800
Subject: [PATCH 089/242] psnr check two input same shape and type

---
 mindspore/nn/layer/image.py     | 11 ++++++++++
 tests/ut/python/nn/test_psnr.py | 37 +++++++++++++++++++++++++++++++++
 tests/ut/python/nn/test_ssim.py | 36 ++++++++++++++++++++++++++++++++
 3 files changed, 84 insertions(+)

diff --git a/mindspore/nn/layer/image.py b/mindspore/nn/layer/image.py
index b46ac4cd6e..3e139a2db5 100644
--- a/mindspore/nn/layer/image.py
+++ b/mindspore/nn/layer/image.py
@@ -95,6 +95,11 @@ def _gauss_kernel_helper(filter_size):
     g = Tensor(g)
     return filter_size, g
 
+@constexpr
+def _check_input_4d(input_shape, param_name, func_name):
+    if len(input_shape) != 4:
+        raise ValueError(f"{func_name} {param_name} should be 4d, but got shape {input_shape}")
+    return True
 
 class SSIM(Cell):
     r"""
@@ -146,6 +151,9 @@ class SSIM(Cell):
         self.mean = P.DepthwiseConv2dNative(channel_multiplier=1, kernel_size=filter_size)
 
     def construct(self, img1, img2):
+        _check_input_4d(F.shape(img1), "img1", "SSIM")
+        _check_input_4d(F.shape(img2), "img2", "SSIM")
+        P.SameTypeShape()(img1, img2)
         max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val)
         img1 = _convert_img_dtype_to_float32(img1, self.max_val)
         img2 = _convert_img_dtype_to_float32(img2, self.max_val)
@@ -236,6 +244,9 @@ class PSNR(Cell):
         self.max_val = max_val
 
     def construct(self, img1, img2):
+        _check_input_4d(F.shape(img1), "img1", "PSNR")
+        _check_input_4d(F.shape(img2), "img2", "PSNR")
+        P.SameTypeShape()(img1, img2)
         max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val)
         img1 = _convert_img_dtype_to_float32(img1, self.max_val)
         img2 = _convert_img_dtype_to_float32(img2, self.max_val)
diff --git a/tests/ut/python/nn/test_psnr.py b/tests/ut/python/nn/test_psnr.py
index 32e7b570aa..c07d246810 100644
--- a/tests/ut/python/nn/test_psnr.py
+++ b/tests/ut/python/nn/test_psnr.py
@@ -18,10 +18,12 @@ test psnr
 import numpy as np
 import pytest
 import mindspore.nn as nn
+from mindspore.common import dtype as mstype
 from mindspore.common.api import _executor
 from mindspore import Tensor
 
 
+
 class PSNRNet(nn.Cell):
     def __init__(self, max_val=1.0):
         super(PSNRNet, self).__init__()
@@ -59,3 +61,38 @@ def test_psnr_max_val_zero():
     max_val = 0
     with pytest.raises(ValueError):
         net = PSNRNet(max_val)
+
+def test_psnr_different_shape():
+    shape_1 = (8, 3, 16, 16)
+    shape_2 = (8, 3, 8, 8)
+    img1 = Tensor(np.random.random(shape_1))
+    img2 = Tensor(np.random.random(shape_2))
+    net = PSNRNet()
+    with pytest.raises(ValueError):
+        _executor.compile(net, img1, img2)
+
+def test_psnr_different_dtype():
+    dtype_1 = mstype.float32
+    dtype_2 = mstype.float16
+    img1 = Tensor(np.random.random((8, 3, 16, 16)), dtype=dtype_1)
+    img2 = Tensor(np.random.random((8, 3, 16, 16)), dtype=dtype_2)
+    net = PSNRNet()
+    with pytest.raises(TypeError):
+        _executor.compile(net, img1, img2)
+
+def test_psnr_invalid_5d_input():
+    shape_1 = (8, 3, 16, 16)
+    shape_2 = (8, 3, 8, 8)
+    invalid_shape = (8, 3, 16, 16, 1)
+    img1 = Tensor(np.random.random(shape_1))
+    invalid_img1 = Tensor(np.random.random(invalid_shape))
+    img2 = Tensor(np.random.random(shape_2))
+    invalid_img2 = Tensor(np.random.random(invalid_shape))
+
+    net = PSNRNet()
+    with pytest.raises(ValueError):
+        _executor.compile(net, invalid_img1, img2)
+    with pytest.raises(ValueError):
+        _executor.compile(net, img1, invalid_img2)
+    with pytest.raises(ValueError):
+        _executor.compile(net, invalid_img1, invalid_img2)
diff --git a/tests/ut/python/nn/test_ssim.py b/tests/ut/python/nn/test_ssim.py
index 77d065b100..7389c2dbda 100644
--- a/tests/ut/python/nn/test_ssim.py
+++ b/tests/ut/python/nn/test_ssim.py
@@ -18,6 +18,7 @@ test ssim
 import numpy as np
 import pytest
 import mindspore.nn as nn
+import mindspore.common.dtype as mstype
 from mindspore.common.api import _executor
 from mindspore import Tensor
 
@@ -93,3 +94,38 @@ def test_ssim_k1_k2_wrong_value():
         net = SSIMNet(k2=0.0)
     with pytest.raises(ValueError):
         net = SSIMNet(k2=-1.0)
+
+def test_ssim_different_shape():
+    shape_1 = (8, 3, 16, 16)
+    shape_2 = (8, 3, 8, 8)
+    img1 = Tensor(np.random.random(shape_1))
+    img2 = Tensor(np.random.random(shape_2))
+    net = SSIMNet()
+    with pytest.raises(ValueError):
+        _executor.compile(net, img1, img2)
+
+def test_ssim_different_dtype():
+    dtype_1 = mstype.float32
+    dtype_2 = mstype.float16
+    img1 = Tensor(np.random.random((8, 3, 16, 16)), dtype=dtype_1)
+    img2 = Tensor(np.random.random((8, 3, 16, 16)), dtype=dtype_2)
+    net = SSIMNet()
+    with pytest.raises(TypeError):
+        _executor.compile(net, img1, img2)
+
+def test_ssim_invalid_5d_input():
+    shape_1 = (8, 3, 16, 16)
+    shape_2 = (8, 3, 8, 8)
+    invalid_shape = (8, 3, 16, 16, 1)
+    img1 = Tensor(np.random.random(shape_1))
+    invalid_img1 = Tensor(np.random.random(invalid_shape))
+    img2 = Tensor(np.random.random(shape_2))
+    invalid_img2 = Tensor(np.random.random(invalid_shape))
+
+    net = SSIMNet()
+    with pytest.raises(ValueError):
+        _executor.compile(net, invalid_img1, img2)
+    with pytest.raises(ValueError):
+        _executor.compile(net, img1, invalid_img2)
+    with pytest.raises(ValueError):
+        _executor.compile(net, invalid_img1, invalid_img2)

From d2727d0504a9da52332527ae3a1ad5038c253251 Mon Sep 17 00:00:00 2001
From: "Etone.Chan" <etone.chan@huawei.com>
Date: Fri, 17 Apr 2020 17:42:37 +0800
Subject: [PATCH 090/242] add buffer fusion bnupdate eltwise pass

---
 .../ccsrc/kernel/tbe/tbe_kernel_build.cc      |    3 +-
 .../ascend/buffer_fusion/buffer_fusion.cc     |  263 ++--
 .../ascend/buffer_fusion/buffer_fusion.h      |    6 +-
 .../buffer_fusion/buffer_fusion_test.cc       | 1298 -----------------
 4 files changed, 182 insertions(+), 1388 deletions(-)
 delete mode 100644 tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc

diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
index 9ec20b3fbb..939e7146e6 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
@@ -722,8 +722,7 @@ bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode
                                                 std::vector<nlohmann::json> *output_desc_list) {
   auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
   if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
-    // wait anther pr: auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
-    auto output_used_nums = {SizeToInt(AnfAlgo::GetNodeAttr<std::size_t>(cnode, kAttrOutputUsedNum))};
+    auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int>>(cnode, kAttrOutputUsedNum);
     MS_LOG(INFO) << "This node's output has been reused, node name: " << cnode->fullname_with_scope();
     if (output_used_nums.size() != output_size) {
       MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
index 58b8a93516..abacb9137d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
@@ -17,6 +17,7 @@
 
 #include <vector>
 #include <tuple>
+#include <utility>
 #include <unordered_set>
 #include <unordered_map>
 #include <deque>
@@ -282,11 +283,17 @@ kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr
   // outputs format and data type
   std::vector<std::string> outputs_format;
   std::vector<TypeId> outputs_data_type;
-  for (size_t index = 0; index < outputs_list.size(); ++index) {
-    for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) {
-      auto kernel_with_index = AnfAlgo::VisitKernel(outputs_list[index], idx);
-      outputs_format.push_back(AnfAlgo::GetOutputFormat(kernel_with_index.first, kernel_with_index.second));
-      outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(kernel_with_index.first, kernel_with_index.second));
+  for (const auto &output : outputs_list) {
+    if (AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) {
+      auto tuple_getitem = output->cast<CNodePtr>();
+      MS_EXCEPTION_IF_NULL(tuple_getitem);
+      outputs_format.push_back(AnfAlgo::GetOutputFormat(
+        tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2))))));
+      outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(
+        tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2))))));
+    } else {
+      outputs_format.push_back(AnfAlgo::GetOutputFormat(output, 0));
+      outputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(output, 0));
     }
   }
   builder.SetInputsFormat(inputs_format);
@@ -320,32 +327,35 @@ AnfNodePtr CreateTupleGetItem(const AnfNodePtr &buffer_fusion_kernel, session::K
   return tuple_item;
 }
 
-void ReplaceOldNode(const std::vector<AnfNodePtr> &outputs_list, const AnfNodePtr &buffer_fusion_kernel,
-                    session::KernelGraph *kernel_graph) {
+void ReplaceInputNodeInOtherFusionScope(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos,
+                                        int32_t fusion_id, const AnfNodePtr &output_item,
+                                        const AnfNodePtr &replace_item) {
+  for (int32_t id = fusion_id + 1; id <= SizeToInt(buffer_fusion_infos->size()); ++id) {
+    auto itr = std::find((*buffer_fusion_infos)[id].inputs_list.begin(), (*buffer_fusion_infos)[id].inputs_list.end(),
+                         output_item);
+    if (itr != (*buffer_fusion_infos)[id].inputs_list.end()) {
+      MS_LOG(DEBUG) << "replace input of other pattern, id = " << id;
+      *itr = replace_item;
+    }
+  }
+}
+
+void ReplaceOldNode(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id,
+                    const AnfNodePtr &buffer_fusion_kernel, session::KernelGraph *kernel_graph) {
   MS_EXCEPTION_IF_NULL(kernel_graph);
   auto manager = kernel_graph->manager();
   MS_EXCEPTION_IF_NULL(manager);
-  if (outputs_list.size() == 1) {  // single output
-    (void)manager->Replace(outputs_list[0], buffer_fusion_kernel);
+  auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id];
+  if (buffer_fusion_info.outputs_list.size() == 1) {  // single output
+    (void)manager->Replace(buffer_fusion_info.outputs_list[0], buffer_fusion_kernel);
+    ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[0],
+                                       buffer_fusion_kernel);
   } else {  // multiple output
-    size_t real_idx = 0;
-    for (size_t index = 0; index < outputs_list.size(); ++index) {
-      if (AnfAlgo::GetOutputTensorNum(outputs_list[index]) == 1) {
-        auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++);
-        (void)manager->Replace(outputs_list[index], tuple_item);
-      } else {
-        std::vector<AnfNodePtr> make_tuple_inputs;
-        AbstractBasePtrList abstract_list;
-        make_tuple_inputs.push_back(NewValueNode(prim::kPrimMakeTuple));
-        for (size_t idx = 0; idx < AnfAlgo::GetOutputTensorNum(outputs_list[index]); ++idx) {
-          auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, real_idx++);
-          abstract_list.push_back(tuple_item->abstract());
-          make_tuple_inputs.push_back(tuple_item);
-        }
-        AnfNodePtr make_tuple = kernel_graph->NewCNode(make_tuple_inputs);
-        make_tuple->set_abstract(std::make_shared<abstract::AbstractTuple>(abstract_list));
-        (void)manager->Replace(outputs_list[index], make_tuple);
-      }
+    for (size_t index = 0; index < buffer_fusion_info.outputs_list.size(); ++index) {
+      auto tuple_item = CreateTupleGetItem(buffer_fusion_kernel, kernel_graph, index);
+      (void)manager->Replace(buffer_fusion_info.outputs_list[index], tuple_item);
+      ReplaceInputNodeInOtherFusionScope(buffer_fusion_infos, fusion_id, buffer_fusion_info.outputs_list[index],
+                                         tuple_item);
     }
   }
 }
@@ -406,38 +416,67 @@ void CheckCurrentNodeIsInput(const CNodePtr &node, const int32_t &cur_fusion_id,
   }
 }
 
-void InsertNode(const AnfNodePtr &node, std::vector<AnfNodePtr> *list) {
-  MS_EXCEPTION_IF_NULL(list);
-  if (std::find(list->begin(), list->end(), node) == list->end()) {
-    (void)list->insert(list->end(), node);
+void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph,
+                                   std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
+  MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
+  auto nodes = TopoSort(kernel_graph->get_return());
+  for (auto &node : nodes) {
+    MS_EXCEPTION_IF_NULL(node);
+    if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) {
+      auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(node, kOpAttrFusionId);
+      (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node);
+    }
   }
 }
 
-void CheckCurrentNodeIsOutput(const CNodePtr &node, const int32_t &cur_fusion_id,
-                              std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
-  MS_EXCEPTION_IF_NULL(node);
+void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
+                                  std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
   MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
-  for (auto &input : node->inputs()) {
-    MS_EXCEPTION_IF_NULL(input);
-    if (AnfAlgo::IsRealCNodeKernel(input) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) {
-      auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input, kOpAttrFusionId);
-      if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) {
-        BufferFusionInfo_t buffer_fusion_info;
-        (*buffer_fusion_infos)[fusion_id] = buffer_fusion_info;
-      }
-      if (fusion_id != cur_fusion_id) {
-        InsertNode(input, &((*buffer_fusion_infos)[fusion_id].outputs_list));
-      }
-    } else if (input->isa<CNode>()) {
-      for (auto &input_in : input->cast<CNodePtr>()->inputs()) {
-        if (AnfAlgo::IsRealCNodeKernel(input_in) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) {
-          auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input_in, kOpAttrFusionId);
-          if (buffer_fusion_infos->find(fusion_id) == buffer_fusion_infos->end()) {
-            BufferFusionInfo_t buffer_fusion_info;
-            (*buffer_fusion_infos)[fusion_id] = buffer_fusion_info;
+  auto manager = kernel_graph->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+
+  for (auto &buffer_fusion_info : *buffer_fusion_infos) {
+    auto fusion_id = buffer_fusion_info.first;
+    auto fusion_info = buffer_fusion_info.second;
+    for (const auto &node : fusion_info.anf_nodes) {
+      if (AnfAlgo::GetOutputTensorNum(node) == 1) {
+        for (auto use_node : manager->node_users()[node]) {
+          if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), use_node.first) ==
+              fusion_info.anf_nodes.end()) {
+            (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(node);
+            break;
+          }
+        }
+      } else {
+        int prev_idx = 0;
+        std::vector<AnfNodePtr> tuple_getitem_nodes;
+        std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(),
+                       std::back_inserter(tuple_getitem_nodes),
+                       [](const std::pair<AnfNodePtr, int> &use_node) { return use_node.first; });
+        std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(),
+                  [](const AnfNodePtr &node1, const AnfNodePtr &node2) {
+                    auto getitem1 = node1->cast<CNodePtr>();
+                    auto getitem2 = node2->cast<CNodePtr>();
+                    auto output_idx1 = GetValue<int>(GetValueNode(getitem1->input(2)));
+                    auto output_idx2 = GetValue<int>(GetValueNode(getitem2->input(2)));
+                    return output_idx1 < output_idx2;
+                  });
+        for (auto getitem : tuple_getitem_nodes) {
+          auto getitem_ptr = getitem->cast<CNodePtr>();
+          auto input2 = getitem_ptr->input(2);
+          auto output_idx = GetValue<int>(GetValueNode(input2));
+          for (int stub_idx = prev_idx; stub_idx < output_idx; ++stub_idx) {
+            auto stub_node = CreateTupleGetItem(node, kernel_graph, IntToSize(stub_idx));
+            (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(stub_node);
           }
-          if (fusion_id != cur_fusion_id) {
-            InsertNode(input_in, &((*buffer_fusion_infos)[fusion_id].outputs_list));
+          prev_idx = output_idx + 1;
+          for (auto item_use_node : manager->node_users()[getitem]) {
+            if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), item_use_node.first) ==
+                fusion_info.anf_nodes.end()) {
+              (*buffer_fusion_infos)[fusion_id].outputs_list.push_back(getitem);
+              break;
+            }
           }
         }
       }
@@ -445,15 +484,72 @@ void CheckCurrentNodeIsOutput(const CNodePtr &node, const int32_t &cur_fusion_id
   }
 }
 
-void GetFusionScopeNodeList(const session::KernelGraph &kernel_graph,
-                            std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
-  MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
-  auto nodes = TopoSort(kernel_graph.get_return());
-  for (auto &node : nodes) {
-    MS_EXCEPTION_IF_NULL(node);
-    if (AnfAlgo::IsRealCNodeKernel(node) && AnfAlgo::HasNodeAttr(kOpAttrFusionId, node)) {
-      auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(node, kOpAttrFusionId);
-      (*buffer_fusion_infos)[fusion_id].anf_nodes.push_back(node);
+void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph,
+                       std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  MS_EXCEPTION_IF_NULL(fused_set);
+  MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  auto conv = cnode->input(1);
+  if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) {
+    std::vector<int> output_used_num{SizeToInt(manager->node_users()[conv].size())};
+    AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), conv);
+    std::unordered_set<AnfNodePtr> record{cnode, conv};
+    candidate_fusion->push_back(record);
+    fused_set->insert(record.begin(), record.end());
+  }
+}
+
+void MatchBnupdateRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph,
+                       std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  MS_EXCEPTION_IF_NULL(fused_set);
+  MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  auto getitem = relu_input->cast<CNodePtr>();
+  auto bnupdate = getitem->input(1);
+  if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) {
+    std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0);
+    for (auto out_getitem : manager->node_users()[bnupdate]) {
+      auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>();
+      auto input2 = out_getitem_ptr->input(2);
+      auto output_idx = GetValue<int>(GetValueNode(input2));
+      output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size());
+    }
+    AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate);
+    std::unordered_set<AnfNodePtr> record{cnode, bnupdate};
+    candidate_fusion->push_back(record);
+    fused_set->insert(record.begin(), record.end());
+  }
+}
+
+void MatchBnupdateAddRelu(const CNodePtr &cnode, const AnfNodePtr &relu_input, const session::KernelGraph &kernel_graph,
+                          std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  MS_EXCEPTION_IF_NULL(fused_set);
+  MS_EXCEPTION_IF_NULL(candidate_fusion);
+  auto manager = kernel_graph.manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  auto add = relu_input->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(add);
+  auto tuple_getitem = add->input(1);
+  if (tuple_getitem->isa<CNode>() && AnfAlgo::GetCNodeName(tuple_getitem) == prim::kPrimTupleGetItem->name()) {
+    auto getitem = tuple_getitem->cast<CNodePtr>();
+    auto bnupdate = getitem->input(1);
+    if (bnupdate->isa<CNode>() && AnfAlgo::GetCNodeName(bnupdate) == kBNTrainingUpdateOpName) {
+      std::vector<int> output_used_num(AnfAlgo::GetOutputTensorNum(bnupdate), 0);
+      for (auto out_getitem : manager->node_users()[bnupdate]) {
+        auto out_getitem_ptr = out_getitem.first->cast<CNodePtr>();
+        auto input2 = out_getitem_ptr->input(2);
+        auto output_idx = GetValue<int>(GetValueNode(input2));
+        output_used_num[output_idx] = SizeToInt(manager->node_users()[out_getitem.first].size());
+      }
+      AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(output_used_num), bnupdate);
+      std::unordered_set<AnfNodePtr> record{cnode, relu_input, bnupdate};
+      candidate_fusion->push_back(record);
+      fused_set->insert(record.begin(), record.end());
     }
   }
 }
@@ -470,15 +566,14 @@ void MatchOpNamePattern(const session::KernelGraph &kernel_graph, std::unordered
     auto cnode = node->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(cnode);
     if (AnfAlgo::GetCNodeName(cnode) == kBNTrainingReduceOpName) {
-      auto conv = cnode->input(1);
-      if (conv->isa<CNode>() && AnfAlgo::GetCNodeName(conv) == prim::kPrimConv2D->name()) {
-        auto manager = kernel_graph.manager();
-        MS_EXCEPTION_IF_NULL(manager);
-        auto &users = manager->node_users();
-        AnfAlgo::SetNodeAttr(kAttrOutputUsedNum, MakeValue(users[conv].size()), conv);
-        std::unordered_set<AnfNodePtr> record({cnode, conv});
-        candidate_fusion->push_back(record);
-        fused_set->insert(record.begin(), record.end());
+      MatchConvBnreduce(cnode, kernel_graph, fused_set, candidate_fusion);
+    } else if (AnfAlgo::GetCNodeName(cnode) == kReluV2OpName ||
+               AnfAlgo::GetCNodeName(cnode) == prim::kPrimRelu->name()) {
+      auto relu_input = cnode->input(1);
+      if (relu_input->isa<CNode>() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTensorAdd->name()) {
+        MatchBnupdateAddRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion);
+      } else if (relu_input->isa<CNode>() && AnfAlgo::GetCNodeName(relu_input) == prim::kPrimTupleGetItem->name()) {
+        MatchBnupdateRelu(cnode, relu_input, kernel_graph, fused_set, candidate_fusion);
       }
     }
   }
@@ -536,27 +631,23 @@ void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, std::unord
 }
 }  // namespace
 
-void BufferFusion::GetBufferFusionInfo(const session::KernelGraph &kernel_graph,
+void BufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph,
                                        std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const {
   MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
-  std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph.get_return());
+  std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph->get_return());
   for (auto &node : node_list) {
     if (!AnfAlgo::IsRealCNodeKernel(node)) {
       continue;
     }
-
-    int32_t cur_fusion_id = -1;
     auto cnode = node->cast<CNodePtr>();
     MS_EXCEPTION_IF_NULL(cnode);
     if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) {
-      cur_fusion_id = AnfAlgo::GetNodeAttr<int32_t>(cnode, kOpAttrFusionId);
+      auto cur_fusion_id = AnfAlgo::GetNodeAttr<int32_t>(cnode, kOpAttrFusionId);
       CheckCurrentNodeIsInput(cnode, cur_fusion_id, buffer_fusion_infos);
     }
-    // Check if current node is output
-    CheckCurrentNodeIsOutput(cnode, cur_fusion_id, buffer_fusion_infos);
   }
-
-  GetFusionScopeNodeList(kernel_graph, buffer_fusion_infos);
+  GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos);
+  GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos);
   for (auto &buffer_fusion_info : *buffer_fusion_infos) {
     buffer_fusion_info.second.kernel_build_info =
       CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list_in, buffer_fusion_info.second.inputs_list,
@@ -569,7 +660,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c
   bool change = false;
   std::unordered_map<int32_t, BufferFusionInfo_t> buffer_fusion_infos;
   buffer_fusion_infos.clear();
-  GetBufferFusionInfo(*kernel_graph, &buffer_fusion_infos);
+  GetBufferFusionInfo(kernel_graph, &buffer_fusion_infos);
 
   std::vector<mindspore::kernel::FusionScopeInfo> fusion_scope_infos;
   for (auto &buffer_fusion_info : buffer_fusion_infos) {
@@ -600,7 +691,7 @@ bool BufferFusion::FuseBufferFusionPattern(session::KernelGraph *kernel_graph) c
       MS_LOG(DEBUG) << "fusion id: " << fusion_id << ", fusion op compiling failed";
       continue;
     }
-    change = ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_mods[fusion_id], kernel_graph);
+    change = ReplaceFusionOp(&buffer_fusion_infos, fusion_id, kernel_mods[fusion_id], kernel_graph);
   }
   MS_LOG(DEBUG) << "End Buffer Fusion";
   return change;
@@ -630,8 +721,10 @@ bool BufferFusion::MatchBufferFusionPattern(const session::KernelGraph &kernel_g
   return true;
 }
 
-bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr,
+bool BufferFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos,
+                                   int32_t fusion_id, const kernel::KernelModPtr &kernel_ptr,
                                    session::KernelGraph *kernel_graph) const {
+  auto buffer_fusion_info = (*buffer_fusion_infos)[fusion_id];
   auto buffer_fusion = CreateFusionOp(buffer_fusion_info.inputs_list, buffer_fusion_info.outputs_list,
                                       buffer_fusion_info.anf_nodes, kernel_graph);
   AnfAlgo::SetSelectKernelBuildInfo(buffer_fusion_info.kernel_build_info, buffer_fusion.get());
@@ -651,7 +744,7 @@ bool BufferFusion::ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info,
   AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get());
   AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get());
   // replace node
-  ReplaceOldNode(buffer_fusion_info.outputs_list, buffer_fusion, kernel_graph);
+  ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph);
   return true;
 }
 
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
index c54fd0cd97..9bed7217dd 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
+++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
@@ -44,10 +44,10 @@ class BufferFusion : public Pass {
   bool Run(const FuncGraphPtr &graph) override;
 
  private:
-  void GetBufferFusionInfo(const session::KernelGraph &kernel_graph,
+  void GetBufferFusionInfo(session::KernelGraph *kernel_graph,
                            std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const;
-  bool ReplaceFusionOp(const BufferFusionInfo_t &buffer_fusion_info, const kernel::KernelModPtr &kernel_ptr,
-                       session::KernelGraph *kernel_graph) const;
+  bool ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos, int32_t fusion_id,
+                       const kernel::KernelModPtr &kernel_ptr, session::KernelGraph *kernel_graph) const;
   bool MatchBufferFusionPattern(const session::KernelGraph &kernel_graph) const;
   bool FuseBufferFusionPattern(session::KernelGraph *kernel_graph) const;
 };
diff --git a/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
deleted file mode 100644
index 9807344139..0000000000
--- a/tests/ut/cpp/pre_activate/ascend/buffer_fusion/buffer_fusion_test.cc
+++ /dev/null
@@ -1,1298 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <iostream>
-#include <memory>
-#include <unordered_map>
-
-#include "common/common_test.h"
-#include "common/py_func_graph_fetcher.h"
-
-#include "ir/anf.h"
-#include "ir/func_graph_cloner.h"
-#include "utils/context/ms_context.h"
-#include "debug/draw.h"
-#include "debug/anf_ir_dump.h"
-#include "operator/ops.h"
-#include "utils/utils.h"
-#include "kernel/tbe/tbe_kernel_mod.h"
-#include "session/kernel_graph.h"
-#include "device/kernel_info.h"
-#include "session/anf_runtime_algorithm.h"
-#include "pre_activate/common/pattern_engine.h"
-#define private public
-#include "pre_activate/ascend/buffer_fusion/buffer_fusion.h"
-
-namespace mindspore {
-namespace opt {
-using Primitive = mindspore::Primitive;
-using session::KernelGraph;
-using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
-using KernelBuildInfoBuilder = kernel::KernelBuildInfo::KernelBuildInfoBuilder;
-class TestHWBufferFusion : public UT::Common {
- public:
-  TestHWBufferFusion() : getPyFun_("gtest_input.pre_activate.hw_opt_test", true) {}
-
- public:
-  UT::PyFuncGraphFetcher getPyFun_;
-};
-
-static KernelGraphPtr CreateKernelGraphForBufferFusionMultipleIn(
-  uint32_t after_layers, mindspore::kernel::FusionType fusiontype = mindspore::kernel::CONVLUTION) {
-  KernelGraphPtr g = std::make_shared<KernelGraph>();
-  std::vector<AnfNodePtr> inputs;
-
-  std::vector<int> shp = {1, 3, 3, 4};
-  TensorTypePtr tensor_type = std::make_shared<TensorType>(kFloat32);
-  tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type};
-
-  uint32_t layerscount = 1;
-  CNodePtr ptr_formerlayer;
-  std::string name = "";
-
-  // Construct first node
-  tensor::TensorPtr y_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  y_tensor->set_device_info(device_info);
-  tensor::TensorPtr z_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  z_tensor->set_device_info(device_info);
-
-  auto y_const = NewValueNode(y_tensor);
-  auto z_const = NewValueNode(z_tensor);
-  y_const->set_abstract(y_tensor->ToAbstract());
-  z_const->set_abstract(z_tensor->ToAbstract());
-  g->MutableInputs()->push_back(y_const);
-  g->MutableInputs()->push_back(z_const);
-
-  auto p_conv = std::make_shared<Primitive>("Conv2D");
-  std::vector<std::string> input_names = {"x", "y"};
-  std::vector<std::string> output_names = {"output"};
-
-  ValuePtr input_names_v = MakeValue(input_names);
-  ValuePtr output_names_v = MakeValue(output_names);
-  p_conv->set_attr("input_names", input_names_v);
-  p_conv->set_attr("output_names", output_names_v);
-
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_conv));
-  inputs.push_back(y_const);
-  inputs.push_back(z_const);
-  name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot";
-
-  auto kernelptr_first = g->NewCNode(inputs);
-  kernelptr_first->set_abstract(y_tensor->ToAbstract());
-  kernelptr_first->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder;
-
-  builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW});
-  builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()});
-  builder.SetOutputsFormat({kOpFormat_NCHW});
-  builder.SetOutputsDeviceType({kFloat32->type_id()});
-  builder.SetKernelType(KernelType::TBE_KERNEL);
-  builder.SetFusionType(fusiontype);
-  builder.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_first.get());
-  ptr_formerlayer = kernelptr_first;
-
-  // configure fusion successor layers
-  int layer_idx = 0;
-  while (after_layers--) {
-    auto p_relu = std::make_shared<Primitive>("ReLU6");
-    if (layer_idx == 0) {
-      tensor::TensorPtr x_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-      x_tensor->set_device_info(device_info);
-
-      auto x_const = NewValueNode(x_tensor);
-      x_const->set_abstract(x_tensor->ToAbstract());
-      std::vector<std::string> input_names = {"x", "y"};
-      std::vector<std::string> output_names = {"output"};
-      ValuePtr input_names_v = MakeValue(input_names);
-      ValuePtr output_names_v = MakeValue(output_names);
-      p_relu->set_attr("input_names", input_names_v);
-      p_relu->set_attr("output_names", output_names_v);
-
-      inputs.clear();
-      inputs.push_back(NewValueNode(p_relu));
-      inputs.push_back(ptr_formerlayer);
-      inputs.push_back(x_const);
-    } else {
-      std::vector<std::string> input_names = {"x"};
-      std::vector<std::string> output_names = {"output"};
-      ValuePtr input_names_v = MakeValue(input_names);
-      ValuePtr output_names_v = MakeValue(output_names);
-      p_relu->set_attr("input_names", input_names_v);
-      p_relu->set_attr("output_names", output_names_v);
-
-      inputs.clear();
-      inputs.push_back(NewValueNode(p_relu));
-      inputs.push_back(ptr_formerlayer);
-    }
-    auto kernelptr_floor = g->NewCNode(inputs);
-    kernelptr_floor->set_abstract(y_tensor->ToAbstract());
-    kernelptr_floor->set_kernel_info(std::make_shared<device::KernelInfo>());
-    KernelBuildInfoBuilder builder;
-    if (layer_idx == 0) {
-      builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW});
-      builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()});
-    } else {
-      builder.SetInputsFormat({kOpFormat_NCHW});
-      builder.SetInputsDeviceType({kFloat32->type_id()});
-    }
-
-    builder.SetOutputsFormat({kOpFormat_NCHW});
-    builder.SetOutputsDeviceType({kFloat32->type_id()});
-    builder.SetKernelType(KernelType::TBE_KERNEL);
-    builder.SetFusionType(kernel::FusionType::ELEMWISE);
-    builder.SetProcessor(kernel::Processor::AICORE);
-    AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get());
-    ptr_formerlayer = kernelptr_floor;
-    layerscount++;
-    layer_idx++;
-  }
-
-  // return res
-  auto p_return = std::make_shared<Primitive>("return");
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_return));
-  inputs.push_back(ptr_formerlayer);
-  auto ret = g->NewCNode(inputs);
-  ret->set_abstract(y_tensor->ToAbstract());
-
-  g->set_return(ret);
-
-  draw::Draw(name, g);
-
-  return g;
-}
-
-static KernelGraphPtr CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter(
-  uint32_t before_layers, uint32_t after_layers = 3,
-  mindspore::kernel::FusionType fusiontype = mindspore::kernel::SEGMENT) {
-  KernelGraphPtr g = std::make_shared<KernelGraph>();
-  std::vector<AnfNodePtr> inputs;
-
-  std::vector<int> shp = {1, 3, 3, 4};
-  TensorTypePtr tensor_type = std::make_shared<TensorType>(kFloat32);
-  tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type};
-
-  uint32_t layerscount = 1;
-  CNodePtr ptr_formerlayer;
-  std::string name = "";
-  tensor::TensorPtr x_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  auto x_abstract = x_tensor->ToAbstract();
-  auto x_const = NewValueNode(x_tensor);
-  x_const->set_abstract(x_abstract);
-  g->MutableInputs()->push_back(x_const);
-
-  while (before_layers--) {
-    auto p_relu = std::make_shared<Primitive>("ReLU6");
-    std::vector<std::string> input_names = {"x"};
-    std::vector<std::string> output_names = {"output"};
-    ValuePtr input_names_v = MakeValue(input_names);
-    ValuePtr output_names_v = MakeValue(output_names);
-    p_relu->set_attr("input_names", input_names_v);
-    p_relu->set_attr("output_names", output_names_v);
-
-    inputs.clear();
-    if (layerscount == 1) {
-      inputs.push_back(NewValueNode(p_relu));
-      inputs.push_back(x_const);
-    } else {
-      inputs.push_back(NewValueNode(p_relu));
-      inputs.push_back(ptr_formerlayer);
-    }
-    auto kernelptr_floor = g->NewCNode(inputs);
-    kernelptr_floor->set_abstract(x_abstract);
-    kernelptr_floor->set_kernel_info(std::make_shared<device::KernelInfo>());
-    KernelBuildInfoBuilder builder;
-    builder.SetInputsFormat({kOpFormat_NCHW});
-    builder.SetOutputsFormat({kOpFormat_NCHW});
-    builder.SetInputsDeviceType({kFloat32->type_id()});
-    builder.SetOutputsDeviceType({kFloat32->type_id()});
-    builder.SetKernelType(KernelType::TBE_KERNEL);
-    builder.SetFusionType(kernel::FusionType::ELEMWISE);
-    builder.SetProcessor(kernel::Processor::AICORE);
-    AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get());
-    ptr_formerlayer = kernelptr_floor;
-    layerscount++;
-  }
-
-  // Construct the conv2d node
-  tensor::TensorPtr y_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  y_tensor->set_device_info(device_info);
-  auto y_const = NewValueNode(y_tensor);
-  y_const->set_abstract(y_tensor->ToAbstract());
-
-  if (fusiontype == kernel::FusionType::CONVLUTION) {
-    auto p_conv = std::make_shared<Primitive>("Conv2D");
-    std::vector<std::string> input_names = {"x", "y"};
-    std::vector<std::string> output_names = {"output"};
-
-    ValuePtr input_names_v = MakeValue(input_names);
-    ValuePtr output_names_v = MakeValue(output_names);
-    p_conv->set_attr("input_names", input_names_v);
-    p_conv->set_attr("output_names", output_names_v);
-
-    inputs.clear();
-    inputs.push_back(NewValueNode(p_conv));
-    inputs.push_back(y_const);
-    inputs.push_back(ptr_formerlayer);
-    name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot";
-  } else {
-    auto p_red_seg = std::make_shared<Primitive>("ReduceOrSegment");
-    std::vector<std::string> input_names = {"x"};
-    std::vector<std::string> output_names = {"output"};
-
-    ValuePtr input_names_v = MakeValue(input_names);
-    ValuePtr output_names_v = MakeValue(output_names);
-    p_red_seg->set_attr("input_names", input_names_v);
-    p_red_seg->set_attr("output_names", output_names_v);
-
-    inputs.clear();
-    inputs.push_back(NewValueNode(p_red_seg));
-    inputs.push_back(ptr_formerlayer);
-    name = "test_regOrSeg_" + std::to_string(layerscount) + "layers_graph.dot";
-  }
-
-  auto kernelptr_first = g->NewCNode(inputs);
-  kernelptr_first->set_abstract(y_tensor->ToAbstract());
-  kernelptr_first->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder;
-  if (fusiontype == kernel::FusionType::CONVLUTION) {
-    builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW});
-    builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()});
-  } else {
-    builder.SetInputsFormat({kOpFormat_NCHW});
-    builder.SetInputsDeviceType({kFloat32->type_id()});
-  }
-  builder.SetOutputsFormat({kOpFormat_NCHW});
-  builder.SetOutputsDeviceType({kFloat32->type_id()});
-  builder.SetKernelType(KernelType::TBE_KERNEL);
-  builder.SetFusionType(fusiontype);
-  builder.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_first.get());
-  ptr_formerlayer = kernelptr_first;
-
-  // configure fusion successor layers
-  while (after_layers--) {
-    auto p_relu = std::make_shared<Primitive>("ReLU6");
-    std::vector<std::string> input_names = {"x"};
-    std::vector<std::string> output_names = {"output"};
-    ValuePtr input_names_v = MakeValue(input_names);
-    ValuePtr output_names_v = MakeValue(output_names);
-    p_relu->set_attr("input_names", input_names_v);
-    p_relu->set_attr("output_names", output_names_v);
-
-    inputs.clear();
-    inputs.push_back(NewValueNode(p_relu));
-    inputs.push_back(ptr_formerlayer);
-
-    auto kernelptr_floor = g->NewCNode(inputs);
-    kernelptr_floor->set_abstract(y_tensor->ToAbstract());
-    kernelptr_floor->set_kernel_info(std::make_shared<device::KernelInfo>());
-    KernelBuildInfoBuilder builder;
-    builder.SetInputsFormat({kOpFormat_NCHW});
-    builder.SetOutputsFormat({kOpFormat_NCHW});
-    builder.SetInputsDeviceType({kFloat32->type_id()});
-    builder.SetOutputsDeviceType({kFloat32->type_id()});
-    builder.SetKernelType(KernelType::TBE_KERNEL);
-    builder.SetFusionType(kernel::FusionType::ELEMWISE);
-    builder.SetProcessor(kernel::Processor::AICORE);
-    AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get());
-    ptr_formerlayer = kernelptr_floor;
-    layerscount++;
-  }
-
-  // return res
-  auto p_return = std::make_shared<Primitive>("return");
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_return));
-  inputs.push_back(ptr_formerlayer);
-  auto ret = g->NewCNode(inputs);
-  ret->set_abstract(y_tensor->ToAbstract());
-  g->set_return(ret);
-  draw::Draw(name, g);
-  return g;
-}
-
-static KernelGraphPtr CreateKernelGraphForBufferFusionSingleIn(
-  uint32_t after_layers, mindspore::kernel::FusionType fusiontype = mindspore::kernel::CONVLUTION) {
-  // build the func_graph manually, eg:
-  /* CreateKernelGraphForBufferFusionSingleIn(1)
-   * @mindspore
-   * def f(x):
-   *     z=conv2d(x, y)
-   *     ret=relu(z)
-   *     return ret
-   */
-  KernelGraphPtr g = std::make_shared<KernelGraph>();
-  std::vector<AnfNodePtr> inputs;
-
-  std::vector<int> shp = {1, 3, 3, 4};
-  TensorTypePtr tensor_type = std::make_shared<TensorType>(kFloat32);
-  tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type};
-
-  uint32_t layerscount = 1;
-  CNodePtr ptr_formerlayer;
-  std::string name = "";
-
-  // Construct first node
-  tensor::TensorPtr y_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  y_tensor->set_device_info(device_info);
-  tensor::TensorPtr z_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  z_tensor->set_device_info(device_info);
-
-  auto y_const = NewValueNode(y_tensor);
-  auto z_const = NewValueNode(z_tensor);
-  y_const->set_abstract(y_tensor->ToAbstract());
-  z_const->set_abstract(z_tensor->ToAbstract());
-  g->MutableInputs()->push_back(y_const);
-  g->MutableInputs()->push_back(z_const);
-
-  if (fusiontype == kernel::FusionType::CONVLUTION) {
-    auto p_conv = std::make_shared<Primitive>("Conv2D");
-    std::vector<std::string> input_names = {"x", "y"};
-    std::vector<std::string> output_names = {"output"};
-
-    ValuePtr input_names_v = MakeValue(input_names);
-    ValuePtr output_names_v = MakeValue(output_names);
-    p_conv->set_attr("input_names", input_names_v);
-    p_conv->set_attr("output_names", output_names_v);
-
-    inputs.clear();
-    inputs.push_back(NewValueNode(p_conv));
-    inputs.push_back(y_const);
-    inputs.push_back(z_const);
-    name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot";
-  } else {
-    auto p_red_seg = std::make_shared<Primitive>("ReduceOrSegment");
-    std::vector<std::string> input_names = {"x"};
-    std::vector<std::string> output_names = {"output"};
-
-    ValuePtr input_names_v = MakeValue(input_names);
-    ValuePtr output_names_v = MakeValue(output_names);
-    p_red_seg->set_attr("input_names", input_names_v);
-    p_red_seg->set_attr("output_names", output_names_v);
-
-    inputs.clear();
-    inputs.push_back(NewValueNode(p_red_seg));
-    inputs.push_back(y_const);
-    name = "test_regOrSeg_" + std::to_string(layerscount) + "layers_graph.dot";
-  }
-
-  auto kernelptr_first = g->NewCNode(inputs);
-  kernelptr_first->set_abstract(y_tensor->ToAbstract());
-  kernelptr_first->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder;
-  if (fusiontype == kernel::FusionType::CONVLUTION) {
-    builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW});
-    builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()});
-  } else {
-    builder.SetInputsFormat({kOpFormat_NCHW});
-    builder.SetInputsDeviceType({kFloat32->type_id()});
-  }
-
-  builder.SetOutputsFormat({kOpFormat_NCHW});
-  builder.SetOutputsDeviceType({kFloat32->type_id()});
-  builder.SetKernelType(KernelType::TBE_KERNEL);
-  builder.SetFusionType(fusiontype);
-  builder.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_first.get());
-  ptr_formerlayer = kernelptr_first;
-
-  // configure fusion successor layers
-  while (after_layers--) {
-    auto p_relu = std::make_shared<Primitive>("ReLU6");
-    std::vector<std::string> input_names = {"x"};
-    std::vector<std::string> output_names = {"output"};
-    ValuePtr input_names_v = MakeValue(input_names);
-    ValuePtr output_names_v = MakeValue(output_names);
-    p_relu->set_attr("input_names", input_names_v);
-    p_relu->set_attr("output_names", output_names_v);
-
-    inputs.clear();
-    inputs.push_back(NewValueNode(p_relu));
-    inputs.push_back(ptr_formerlayer);
-
-    auto kernelptr_floor = g->NewCNode(inputs);
-    kernelptr_floor->set_abstract(y_tensor->ToAbstract());
-    kernelptr_floor->set_kernel_info(std::make_shared<device::KernelInfo>());
-    KernelBuildInfoBuilder builder;
-    builder.SetInputsFormat({kOpFormat_NCHW});
-    builder.SetOutputsFormat({kOpFormat_NCHW});
-    builder.SetInputsDeviceType({kFloat32->type_id()});
-    builder.SetOutputsDeviceType({kFloat32->type_id()});
-    builder.SetKernelType(KernelType::TBE_KERNEL);
-    builder.SetFusionType(kernel::FusionType::ELEMWISE);
-    builder.SetProcessor(kernel::Processor::AICORE);
-    AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get());
-    ptr_formerlayer = kernelptr_floor;
-    layerscount++;
-  }
-
-  // return res
-  auto p_return = std::make_shared<Primitive>("return");
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_return));
-  inputs.push_back(ptr_formerlayer);
-  auto ret = g->NewCNode(inputs);
-  ret->set_abstract(y_tensor->ToAbstract());
-
-  g->set_return(ret);
-
-  draw::Draw(name, g);
-
-  return g;
-}
-
-static KernelGraphPtr CreateKernelGraphForBufferFusion(
-  uint32_t targetlayers, bool conv_flag = false,
-  mindspore::kernel::FusionType fusiontype = mindspore::kernel::CONVLUTION) {
-  // build the func_graph manually, eg:
-  /* CreateKernelGraphForBufferFusion(3)
-   * @mindspore
-   * def f(x):
-   *     y=relu(x)
-   *     z=relu(y)
-   *     ret=relu(z)
-   *     return ret
-   */
-  KernelGraphPtr g = std::make_shared<KernelGraph>();
-  std::vector<AnfNodePtr> inputs;
-  // x is input tensor.
-  std::vector<int> shp = {1, 3, 3, 4};
-  tensor::TensorPtr x_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-
-  TensorTypePtr tensor_type = std::make_shared<TensorType>(kFloat32);
-  tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type};
-  x_tensor->set_device_info(device_info);
-
-  auto x_abstract = x_tensor->ToAbstract();
-  auto x_const = NewValueNode(x_tensor);
-  x_const->set_abstract(x_abstract);
-  g->MutableInputs()->push_back(x_const);
-
-  uint32_t layerscount = 1;
-  CNodePtr ptr_formerlayer;
-  // configure func_graph hiden layers
-  while (targetlayers--) {
-    auto p_relu = std::make_shared<Primitive>("ReLU6");
-    std::vector<std::string> input_names = {"x"};
-    std::vector<std::string> output_names = {"output"};
-    ValuePtr input_names_v = MakeValue(input_names);
-    ValuePtr output_names_v = MakeValue(output_names);
-    p_relu->set_attr("input_names", input_names_v);
-    p_relu->set_attr("output_names", output_names_v);
-
-    inputs.clear();
-    if (layerscount == 1) {
-      inputs.push_back(NewValueNode(p_relu));
-      inputs.push_back(x_const);
-    } else {
-      inputs.push_back(NewValueNode(p_relu));
-      inputs.push_back(ptr_formerlayer);
-    }
-    auto kernelptr_floor = g->NewCNode(inputs);
-    kernelptr_floor->set_abstract(x_abstract);
-    kernelptr_floor->set_kernel_info(std::make_shared<device::KernelInfo>());
-    KernelBuildInfoBuilder builder;
-    builder.SetInputsFormat({kOpFormat_NCHW});
-    builder.SetOutputsFormat({kOpFormat_NCHW});
-    builder.SetInputsDeviceType({kFloat32->type_id()});
-    builder.SetOutputsDeviceType({kFloat32->type_id()});
-    builder.SetKernelType(KernelType::TBE_KERNEL);
-    builder.SetFusionType(kernel::FusionType::ELEMWISE);
-    builder.SetProcessor(kernel::Processor::AICORE);
-    AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get());
-    ptr_formerlayer = kernelptr_floor;
-    layerscount++;
-  }
-  std::string name = "test_construct_" + std::to_string(layerscount) + "layers_graph.dot";
-  if (conv_flag) {
-    tensor::TensorPtr y_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-    y_tensor->set_device_info(device_info);
-    tensor::TensorPtr z_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-    z_tensor->set_device_info(device_info);
-    auto y_const = NewValueNode(y_tensor);
-    auto z_const = NewValueNode(y_tensor);
-
-    y_const->set_abstract(y_tensor->ToAbstract());
-    z_const->set_abstract(z_tensor->ToAbstract());
-
-    g->MutableInputs()->push_back(y_const);
-
-    if (fusiontype == kernel::FusionType::CONVLUTION) {
-      auto p_conv = std::make_shared<Primitive>("Conv2D");
-      std::vector<std::string> input_names = {"x", "y"};
-      std::vector<std::string> output_names = {"output"};
-
-      ValuePtr input_names_v = MakeValue(input_names);
-      ValuePtr output_names_v = MakeValue(output_names);
-      p_conv->set_attr("input_names", input_names_v);
-      p_conv->set_attr("output_names", output_names_v);
-
-      inputs.clear();
-      inputs.push_back(NewValueNode(p_conv));
-      inputs.push_back(y_const);
-      inputs.push_back(ptr_formerlayer);
-    } else {
-      auto p_conv = std::make_shared<Primitive>("ReduceOrSegment");
-      std::vector<std::string> input_names = {"x"};
-      std::vector<std::string> output_names = {"output"};
-
-      ValuePtr input_names_v = MakeValue(input_names);
-      ValuePtr output_names_v = MakeValue(output_names);
-      p_conv->set_attr("input_names", input_names_v);
-      p_conv->set_attr("output_names", output_names_v);
-
-      inputs.clear();
-      inputs.push_back(NewValueNode(p_conv));
-      inputs.push_back(ptr_formerlayer);
-    }
-
-    auto kernelptr_conv = g->NewCNode(inputs);
-    kernelptr_conv->set_abstract(x_abstract);
-    kernelptr_conv->set_kernel_info(std::make_shared<device::KernelInfo>());
-    KernelBuildInfoBuilder builder;
-    if (fusiontype == kernel::FusionType::CONVLUTION) {
-      builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW});
-      builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()});
-    } else {
-      builder.SetInputsFormat({kOpFormat_NCHW});
-      builder.SetInputsDeviceType({kFloat32->type_id()});
-    }
-    builder.SetOutputsFormat({kOpFormat_NCHW});
-    builder.SetOutputsDeviceType({kFloat32->type_id()});
-    builder.SetKernelType(KernelType::TBE_KERNEL);
-    builder.SetFusionType(fusiontype);
-    builder.SetProcessor(kernel::Processor::AICORE);
-    AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_conv.get());
-    ptr_formerlayer = kernelptr_conv;
-    name = "test_conv_" + std::to_string(layerscount) + "layers_graph.dot";
-  }
-  // return res
-  auto p_return = std::make_shared<Primitive>("return");
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_return));
-  inputs.push_back(ptr_formerlayer);
-  auto ret = g->NewCNode(inputs);
-  ret->set_abstract(x_abstract);
-
-  g->set_return(ret);
-
-  draw::Draw(name, g);
-
-  return g;
-}
-
-CNodePtr CreateKernelGraphBranch(KernelGraphPtr g, CNodePtr inputptr, int layers,
-                                 const kernel::FusionType fusiontype = kernel::FusionType::CONVLUTION) {
-  std::vector<int> shp = {1, 3, 3, 4};
-  tensor::TensorPtr x_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  TensorTypePtr tensor_type = std::make_shared<TensorType>(kFloat32);
-  tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type};
-  x_tensor->set_device_info(device_info);
-  auto x_abstract = x_tensor->ToAbstract();
-  auto x_const = NewValueNode(x_tensor);
-  x_const->set_abstract(x_abstract);
-
-  CNodePtr ptr_formerlayer = inputptr;
-  while (layers--) {
-    auto p_relu = std::make_shared<Primitive>("ReLU6");
-    std::vector<std::string> input_names = {"x"};
-    std::vector<std::string> output_names = {"output"};
-    ValuePtr input_names_v = MakeValue(input_names);
-    ValuePtr output_names_v = MakeValue(output_names);
-    p_relu->set_attr("input_names", input_names_v);
-    p_relu->set_attr("output_names", output_names_v);
-
-    std::vector<AnfNodePtr> inputs;
-    inputs.clear();
-    inputs.push_back(NewValueNode(p_relu));
-    inputs.push_back(ptr_formerlayer);
-    auto kernelptr_floor = g->NewCNode(inputs);
-    kernelptr_floor->set_abstract(x_abstract);
-    kernelptr_floor->set_kernel_info(std::make_shared<device::KernelInfo>());
-    KernelBuildInfoBuilder builder;
-    builder.SetInputsFormat({kOpFormat_NCHW});
-    builder.SetOutputsFormat({kOpFormat_NCHW});
-    builder.SetInputsDeviceType({kFloat32->type_id()});
-    builder.SetOutputsDeviceType({kFloat32->type_id()});
-    builder.SetKernelType(KernelType::TBE_KERNEL);
-    builder.SetFusionType(kernel::FusionType::ELEMWISE);
-    builder.SetProcessor(kernel::Processor::AICORE);
-    AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_floor.get());
-    ptr_formerlayer = kernelptr_floor;
-  }
-
-  tensor::TensorPtr y_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  y_tensor->set_device_info(device_info);
-  tensor::TensorPtr z_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  z_tensor->set_device_info(device_info);
-  auto y_const = NewValueNode(y_tensor);
-  auto z_const = NewValueNode(y_tensor);
-
-  y_const->set_abstract(y_tensor->ToAbstract());
-  z_const->set_abstract(z_tensor->ToAbstract());
-
-  g->MutableInputs()->push_back(y_const);
-
-  auto p_conv = std::make_shared<Primitive>("Conv2D");
-  std::vector<std::string> input_names = {"x", "y"};
-  std::vector<std::string> output_names = {"output"};
-
-  ValuePtr input_names_v = MakeValue(input_names);
-  ValuePtr output_names_v = MakeValue(output_names);
-  p_conv->set_attr("input_names", input_names_v);
-  p_conv->set_attr("output_names", output_names_v);
-
-  std::vector<AnfNodePtr> inputs;
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_conv));
-  inputs.push_back(y_const);
-  inputs.push_back(ptr_formerlayer);
-
-  auto kernelptr_conv = g->NewCNode(inputs);
-  kernelptr_conv->set_abstract(x_abstract);
-  kernelptr_conv->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder;
-  builder.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW});
-  builder.SetOutputsFormat({kOpFormat_NCHW});
-  builder.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()});
-  builder.SetOutputsDeviceType({kFloat32->type_id()});
-  builder.SetKernelType(KernelType::TBE_KERNEL);
-  builder.SetFusionType(fusiontype);
-  builder.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_conv.get());
-  return kernelptr_conv;
-}
-
-static KernelGraphPtr CreateKernelGraphForMultiUse(uint32_t targetlayer1s, uint32_t targetlayer2s) {
-  /*  @mindspore
-   * def f(x):
-   *     multi_use=relu(x)
-   *     y=relu(multi_use)
-   *     z=relu(multi_use)
-   *     ret=relu(y, z)
-   *     return ret
-   */
-  KernelGraphPtr g = std::make_shared<KernelGraph>();
-  std::vector<AnfNodePtr> inputs;
-  // x is input tensor.
-  std::vector<int> shp = {1, 3, 3, 4};
-  tensor::TensorPtr x_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  TensorTypePtr tensor_type = std::make_shared<TensorType>(kFloat32);
-  tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type};
-  x_tensor->set_device_info(device_info);
-
-  auto x_abstract = x_tensor->ToAbstract();
-  auto x_const = NewValueNode(x_tensor);
-  x_const->set_abstract(x_abstract);
-
-  g->MutableInputs()->push_back(x_const);
-
-  auto p_multi = std::make_shared<Primitive>("MULTI_USE_ReLU6");
-  std::vector<std::string> input_names = {"x"};
-  std::vector<std::string> output_names = {"output"};
-  ValuePtr input_names_v = MakeValue(input_names);
-  ValuePtr output_names_v = MakeValue(output_names);
-  p_multi->set_attr("input_names", input_names_v);
-  p_multi->set_attr("output_names", output_names_v);
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_multi));
-  inputs.push_back(x_const);
-  auto kernelptr_multi = g->NewCNode(inputs);
-  kernelptr_multi->set_abstract(x_abstract);
-  kernelptr_multi->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder;
-  builder.SetInputsFormat({kOpFormat_NCHW});
-  builder.SetOutputsFormat({kOpFormat_NCHW});
-  builder.SetInputsDeviceType({kFloat32->type_id()});
-  builder.SetOutputsDeviceType({kFloat32->type_id()});
-  builder.SetKernelType(KernelType::TBE_KERNEL);
-  builder.SetFusionType(kernel::FusionType::ELEMWISE);
-  builder.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_multi.get());
-
-  CNodePtr outptrbranch1 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer2s);
-  CNodePtr outptrbranch2 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer1s);
-
-  auto p_relu = std::make_shared<Primitive>("ReLU6");
-  input_names = {"x"};
-  output_names = {"output"};
-  input_names_v = MakeValue(input_names);
-  output_names_v = MakeValue(output_names);
-  p_relu->set_attr("input_names", input_names_v);
-  p_relu->set_attr("output_names", output_names_v);
-
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_relu));
-  inputs.push_back(outptrbranch1);
-  inputs.push_back(outptrbranch2);
-  auto kernelptr_floor = g->NewCNode(inputs);
-  kernelptr_floor->set_abstract(x_abstract);
-  kernelptr_floor->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder1;
-  builder1.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW});
-  builder1.SetOutputsFormat({kOpFormat_NCHW});
-  builder1.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()});
-  builder1.SetOutputsDeviceType({kFloat32->type_id()});
-  builder1.SetKernelType(KernelType::TBE_KERNEL);
-  builder1.SetFusionType(kernel::FusionType::ELEMWISE);
-  builder1.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), kernelptr_floor.get());
-
-  // return res
-  auto p_return = std::make_shared<Primitive>("return");
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_return));
-  inputs.push_back(kernelptr_floor);
-  auto ret = g->NewCNode(inputs);
-  ret->set_abstract(x_abstract);
-
-  g->set_return(ret);
-  string name = "multi_use_graph.dot";
-  draw::Draw(name, g);
-
-  return g;
-}
-#ifdef BUFFER_FUSION_MULTI_OUT
-static KernelGraphPtr CreateKernelGraphForMultiOutputWithLinearInput(
-  uint32_t targetlayer1s, uint32_t targetlayer2s, bool use_flag = true,
-  const kernel::FusionType fusion_type = kernel::FusionType::CONVLUTION) {
-  KernelGraphPtr g = std::make_shared<KernelGraph>();
-  std::vector<AnfNodePtr> inputs;
-  // x is input tensor.
-  std::vector<int> shp = {1, 3, 3, 4};
-  tensor::TensorPtr x_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  TensorTypePtr tensor_type = std::make_shared<TensorType>(kFloat32);
-  tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type};
-  x_tensor->set_device_info(device_info);
-
-  auto x_abstract = x_tensor->ToAbstract();
-  auto x_const = NewValueNode(x_tensor);
-  x_const->set_abstract(x_abstract);
-  g->MutableInputs()->push_back(x_const);
-
-  auto p_relu0 = std::make_shared<Primitive>("ReLU6");
-  std::vector<std::string> input_names0 = {"x"};
-  std::vector<std::string> output_names0 = {"output"};
-  ValuePtr input_names_v0 = MakeValue(input_names0);
-  ValuePtr output_names_v0 = MakeValue(output_names0);
-  p_relu0->set_attr("input_names", input_names_v0);
-  p_relu0->set_attr("output_names", output_names_v0);
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_relu0));
-  inputs.push_back(x_const);
-  auto kernelptr_floor0 = g->NewCNode(inputs);
-  kernelptr_floor0->set_abstract(x_abstract);
-  kernelptr_floor0->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder0;
-  builder0.SetInputsFormat({kOpFormat_NCHW});
-  builder0.SetOutputsFormat({kOpFormat_NCHW});
-  builder0.SetInputsDeviceType({kFloat32->type_id()});
-  builder0.SetOutputsDeviceType({kFloat32->type_id()});
-  builder0.SetKernelType(KernelType::TBE_KERNEL);
-  builder0.SetFusionType(kernel::FusionType::ELEMWISE);
-  builder0.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder0.Build(), kernelptr_floor0.get());
-  CNodePtr ptr_formerlayer;
-  ptr_formerlayer = kernelptr_floor0;
-
-  auto p_multi = std::make_shared<Primitive>("MULTI_USE_ReLU6");
-  std::vector<std::string> input_names = {"x"};
-  std::vector<std::string> output_names = {"output"};
-  ValuePtr input_names_v = MakeValue(input_names);
-  ValuePtr output_names_v = MakeValue(output_names);
-  p_multi->set_attr("input_names", input_names_v);
-  p_multi->set_attr("output_names", output_names_v);
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_multi));
-  inputs.push_back(ptr_formerlayer);
-  auto kernelptr_multi = g->NewCNode(inputs);
-  kernelptr_multi->set_abstract(x_abstract);
-  kernelptr_multi->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder;
-  builder.SetInputsFormat({kOpFormat_NCHW});
-  builder.SetOutputsFormat({kOpFormat_NCHW});
-  builder.SetInputsDeviceType({kFloat32->type_id()});
-  builder.SetOutputsDeviceType({kFloat16->type_id()});
-  builder.SetKernelType(KernelType::TBE_KERNEL);
-  builder.SetFusionType(kernel::FusionType::ELEMWISE);
-  builder.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_multi.get());
-
-  CNodePtr outptrbranch2 = nullptr;
-  CNodePtr outptrbranch1 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer2s, fusion_type);
-  if (use_flag) {
-    outptrbranch2 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer1s, fusion_type);
-  }
-  auto p_relu = std::make_shared<Primitive>("ReLU6");
-  input_names = {"x"};
-  output_names = {"output"};
-  input_names_v = MakeValue(input_names);
-  output_names_v = MakeValue(output_names);
-  p_relu->set_attr("input_names", input_names_v);
-  p_relu->set_attr("output_names", output_names_v);
-
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_relu));
-  inputs.push_back(outptrbranch1);
-  if (use_flag) {
-    inputs.push_back(outptrbranch2);
-  }
-
-  auto kernelptr_floor = g->NewCNode(inputs);
-  kernelptr_floor->set_abstract(x_abstract);
-  kernelptr_floor->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder1;
-  if (use_flag) {
-    builder1.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW});
-    builder1.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()});
-  } else {
-    builder1.SetInputsFormat({kOpFormat_NCHW});
-    builder1.SetInputsDeviceType({kFloat32->type_id()});
-  }
-  builder1.SetOutputsFormat({kOpFormat_NCHW});
-  builder1.SetOutputsDeviceType({kFloat32->type_id()});
-  builder1.SetKernelType(KernelType::TBE_KERNEL);
-  builder1.SetFusionType(kernel::FusionType::ELEMWISE);
-  builder1.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), kernelptr_floor.get());
-  cout << "built two branches done" << endl;
-  // return res
-  auto p_return = std::make_shared<Primitive>("return");
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_return));
-  inputs.push_back(kernelptr_floor);
-  auto ret = g->NewCNode(inputs);
-  ret->set_abstract(x_abstract);
-
-  g->set_return(ret);
-  string name = "multi_use_graph.dot";
-  draw::Draw(name, g);
-
-  return g;
-}
-
-static KernelGraphPtr CreateKernelGraphForMultiOutput(
-  uint32_t targetlayer1s, uint32_t targetlayer2s, bool use_flag = true,
-  const kernel::FusionType fusion_type = kernel::FusionType::CONVLUTION) {
-  KernelGraphPtr g = std::make_shared<KernelGraph>();
-  std::vector<AnfNodePtr> inputs;
-  // x is input tensor.
-  std::vector<int> shp = {1, 3, 3, 4};
-  tensor::TensorPtr x_tensor = std::make_shared<tensor::Tensor>(kFloat32->type_id(), shp);
-  TensorTypePtr tensor_type = std::make_shared<TensorType>(kFloat32);
-  tensor::DeviceInfo device_info{kOpFormat_NCHW, tensor_type};
-  x_tensor->set_device_info(device_info);
-
-  auto x_abstract = x_tensor->ToAbstract();
-  auto x_const = NewValueNode(x_tensor);
-  x_const->set_abstract(x_abstract);
-  g->MutableInputs()->push_back(x_const);
-
-  auto p_multi = std::make_shared<Primitive>("MULTI_USE_ReLU6");
-  std::vector<std::string> input_names = {"x"};
-  std::vector<std::string> output_names = {"output"};
-  ValuePtr input_names_v = MakeValue(input_names);
-  ValuePtr output_names_v = MakeValue(output_names);
-  p_multi->set_attr("input_names", input_names_v);
-  p_multi->set_attr("output_names", output_names_v);
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_multi));
-  inputs.push_back(x_const);
-  auto kernelptr_multi = g->NewCNode(inputs);
-  kernelptr_multi->set_abstract(x_abstract);
-  kernelptr_multi->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder;
-  builder.SetInputsFormat({kOpFormat_NCHW});
-  builder.SetOutputsFormat({kOpFormat_NCHW, kOpFormat_NCHW});
-  builder.SetInputsDeviceType({kFloat32->type_id()});
-  builder.SetOutputsDeviceType({kFloat16->type_id(), kFloat32->type_id()});
-  builder.SetKernelType(KernelType::TBE_KERNEL);
-  builder.SetFusionType(kernel::FusionType::ELEMWISE);
-  builder.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder.Build(), kernelptr_multi.get());
-
-  CNodePtr outptrbranch2 = nullptr;
-  CNodePtr outptrbranch1 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer2s, fusion_type);
-  if (use_flag) {
-    outptrbranch2 = CreateKernelGraphBranch(g, kernelptr_multi, targetlayer1s, fusion_type);
-  }
-  auto p_relu = std::make_shared<Primitive>("ReLU6");
-  input_names = {"x"};
-  output_names = {"output"};
-  input_names_v = MakeValue(input_names);
-  output_names_v = MakeValue(output_names);
-  p_relu->set_attr("input_names", input_names_v);
-  p_relu->set_attr("output_names", output_names_v);
-
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_relu));
-  inputs.push_back(outptrbranch1);
-  if (use_flag) {
-    inputs.push_back(outptrbranch2);
-  }
-  auto kernelptr_floor = g->NewCNode(inputs);
-  kernelptr_floor->set_abstract(x_abstract);
-  kernelptr_floor->set_kernel_info(std::make_shared<device::KernelInfo>());
-  KernelBuildInfoBuilder builder1;
-  if (use_flag) {
-    builder1.SetInputsFormat({kOpFormat_NCHW, kOpFormat_NCHW});
-    builder1.SetInputsDeviceType({kFloat32->type_id(), kFloat32->type_id()});
-  } else {
-    builder1.SetInputsFormat({kOpFormat_NCHW});
-    builder1.SetInputsDeviceType({kFloat32->type_id()});
-  }
-  builder1.SetOutputsFormat({kOpFormat_NCHW});
-  builder1.SetOutputsDeviceType({kFloat32->type_id()});
-  builder1.SetKernelType(KernelType::TBE_KERNEL);
-  builder1.SetFusionType(kernel::FusionType::ELEMWISE);
-  builder1.SetProcessor(kernel::Processor::AICORE);
-  AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), kernelptr_floor.get());
-
-  // return res
-  auto p_return = std::make_shared<Primitive>("return");
-  inputs.clear();
-  inputs.push_back(NewValueNode(p_return));
-  inputs.push_back(kernelptr_floor);
-  auto ret = g->NewCNode(inputs);
-  ret->set_abstract(x_abstract);
-
-  g->set_return(ret);
-  string name = "multi_use_graph.dot";
-  draw::Draw(name, g);
-
-  return g;
-}
-#endif
-TEST_F(TestHWBufferFusion, BufferFusionlayerSingleIn1) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionSingleIn(1);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  draw::Draw("before_BufferFusionlayerSingleIn1.dot", graph_ptr);
-
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 8);
-  buffer_fusion.Run(graph_ptr);
-  draw::Draw("after_BufferFusionlayerSingleIn1.dot", graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 6);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionlayerSingleIn2) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionSingleIn(2);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  draw::Draw("before_BufferFusionlayerSingleIn2.dot", graph_ptr);
-
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 10);
-  buffer_fusion.Run(graph_ptr);
-  draw::Draw("after_BufferFusionlayerSingleIn2.dot", graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 6);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionlayerSingleIn3) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionSingleIn(3);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  draw::Draw("before_BufferFusionlayerSingleIn3.dot", graph_ptr);
-
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 12);
-  buffer_fusion.Run(graph_ptr);
-  draw::Draw("after_BufferFusionlayerSingleIn3.dot", graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 6);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionlayer1) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(1);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 5);
-  buffer_fusion.Run(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 5);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionlayer2) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(2);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 7);
-  buffer_fusion.Run(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 5);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionlayer4) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(4);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 11);
-  buffer_fusion.Run(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 5);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionlayer6) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(6);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 15);
-  buffer_fusion.Run(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 7);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionlayer8) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(8);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 19);
-  buffer_fusion.Run(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 7);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionconv1) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(1, true);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), false);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionconv8) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(8, true);
-  draw::Draw("before_BufferFusionconv8.dot", graph_ptr);
-
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true);
-  kernel::KernelPackPtr kernel_pack = std::make_shared<kernel::KernelPack>();
-  auto kernel_ptr = std::make_shared<kernel::TbeKernelMod>(kernel_pack);
-  std::unordered_map<int, BufferFusionInfo_t> buffer_fusion_infos;
-  buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos);
-  std::vector<int32_t> fusion_ids;
-  for (auto &buffer_fusion_info : buffer_fusion_infos) {
-    fusion_ids.push_back(buffer_fusion_info.first);
-  }
-  std::sort(fusion_ids.begin(), fusion_ids.end());
-  for (auto &fusion_id : fusion_ids) {
-    buffer_fusion.ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_ptr, graph_ptr.get());
-  }
-  draw::Draw("after_BufferFusionconv8.dot", graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 10);
-}
-
-#ifdef BUFFER_FUSION_MULTI_OUT
-TEST_F(TestHWBufferFusion, BufferFusionMultiOutWithLinearInput) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForMultiOutputWithLinearInput(1, 1, true, mindspore::kernel::OPAQUE);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 19);
-
-  ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true);
-  kernel::KernelPackPtr kernel_pack = std::make_shared<kernel::KernelPack>();
-  auto kernel_ptr = std::make_shared<kernel::TbeKernelMod>(kernel_pack);
-  std::unordered_map<int, BufferFusionInfo_t> buffer_fusion_infos;
-  buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos);
-  for (auto &buffer_fusion_info : buffer_fusion_infos) {
-    EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 3);
-    EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1);
-    EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 2);
-    buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get());
-  }
-  ASSERT_EQ(manager->all_nodes().size(), 21);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionMultiOut) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForMultiOutput(1, 1, true, mindspore::kernel::OPAQUE);
-  draw::Draw("before_BufferFusionMultiOut.dot", graph_ptr);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 17);
-  ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true);
-  kernel::KernelPackPtr kernel_pack = std::make_shared<kernel::KernelPack>();
-  auto kernel_ptr = std::make_shared<kernel::TbeKernelMod>(kernel_pack);
-  std::unordered_map<int, BufferFusionInfo_t> buffer_fusion_infos;
-  buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos);
-  for (auto &buffer_fusion_info : buffer_fusion_infos) {
-    EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 2);
-    EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1);
-    EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 2);
-    buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get());
-  }
-  draw::Draw("after_BufferFusionMultiOut.dot", graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 21);
-}
-#endif
-
-TEST_F(TestHWBufferFusion, BufferMultiUse) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForMultiUse(3, 4);
-  draw::Draw("before_BufferMultiUse.dot", graph_ptr);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true);
-  kernel::KernelPackPtr kernel_pack = std::make_shared<kernel::KernelPack>();
-  auto kernel_ptr = std::make_shared<kernel::TbeKernelMod>(kernel_pack);
-  std::unordered_map<int, BufferFusionInfo_t> buffer_fusion_infos;
-  buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos);
-  std::vector<int32_t> fusion_ids;
-  for (auto &buffer_fusion_info : buffer_fusion_infos) {
-    fusion_ids.push_back(buffer_fusion_info.first);
-  }
-  std::sort(fusion_ids.begin(), fusion_ids.end());
-  for (auto &fusion_id : fusion_ids) {
-    buffer_fusion.ReplaceFusionOp(buffer_fusion_infos[fusion_id], kernel_ptr, graph_ptr.get());
-  }
-  draw::Draw("after_BufferMultiUse.dot", graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 15);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionReduce) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(2, true, mindspore::kernel::COMMREDUCE);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true);
-  kernel::KernelPackPtr kernel_pack = std::make_shared<kernel::KernelPack>();
-  auto kernel_ptr = std::make_shared<kernel::TbeKernelMod>(kernel_pack);
-  std::unordered_map<int, BufferFusionInfo_t> buffer_fusion_infos;
-  buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos);
-  for (auto &buffer_fusion_info : buffer_fusion_infos) {
-    EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 3);
-    EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1);
-    EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 1);
-    buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get());
-  }
-  ASSERT_EQ(manager->all_nodes().size(), 5);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionSegment) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusion(2, true, mindspore::kernel::SEGMENT);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(buffer_fusion.MatchBufferFusionPattern(*graph_ptr), true);
-  kernel::KernelPackPtr kernel_pack = std::make_shared<kernel::KernelPack>();
-  auto kernel_ptr = std::make_shared<kernel::TbeKernelMod>(kernel_pack);
-  std::unordered_map<int, BufferFusionInfo_t> buffer_fusion_infos;
-  buffer_fusion.GetBufferFusionInfo(*graph_ptr, &buffer_fusion_infos);
-  for (auto &buffer_fusion_info : buffer_fusion_infos) {
-    EXPECT_EQ(buffer_fusion_info.second.anf_nodes.size(), 3);
-    EXPECT_EQ(buffer_fusion_info.second.inputs_list.size(), 1);
-    EXPECT_EQ(buffer_fusion_info.second.outputs_list.size(), 1);
-    buffer_fusion.ReplaceFusionOp(buffer_fusion_info.second, kernel_ptr, graph_ptr.get());
-  }
-  ASSERT_EQ(manager->all_nodes().size(), 5);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionEltwise1BeforeAnd3After) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter(1);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  draw::Draw("before_BufferFusionEltwiseBeforeAndAfter1.dot", graph_ptr);
-
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 13);
-  buffer_fusion.Run(graph_ptr);
-  draw::Draw("after_BufferFusionEltwiseBeforeAndAfter1.dot", graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 5);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionEltwise2BeforeAnd3After) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter(2);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  draw::Draw("before_BufferFusionEltwiseBeforeAndAfter2.dot", graph_ptr);
-
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 15);
-  buffer_fusion.Run(graph_ptr);
-  draw::Draw("after_BufferFusionEltwiseBeforeAndAfter2.dot", graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 5);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionEltwise3BeforeAnd3After) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionEltwiseBeforeAndAfter(3);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  draw::Draw("before_BufferFusionEltwiseBeforeAndAfter3.dot", graph_ptr);
-
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 17);
-  buffer_fusion.Run(graph_ptr);
-  draw::Draw("after_BufferFusionEltwiseBeforeAndAfter3.dot", graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 5);
-}
-
-TEST_F(TestHWBufferFusion, BufferFusionMultipleIn) {
-  KernelGraphPtr graph_ptr = CreateKernelGraphForBufferFusionMultipleIn(2);
-  ASSERT_TRUE(nullptr != graph_ptr);
-  draw::Draw("before_BufferFusionMultipleIn.dot", graph_ptr);
-
-  mindspore::opt::BufferFusion buffer_fusion = BufferFusion();
-  std::vector<FuncGraphPtr> graphs{graph_ptr};
-  FuncGraphManagerPtr manager = std::make_shared<FuncGraphManager>(graphs);
-  manager->AddFuncGraph(graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 11);
-  buffer_fusion.Run(graph_ptr);
-  draw::Draw("after_BufferFusionMultipleIn.dot", graph_ptr);
-  ASSERT_EQ(manager->all_nodes().size(), 7);
-}
-}  // namespace opt
-}  // namespace mindspore

From 99bbb3a3b2a0eac1c224256eb2782149733b10ef Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Sun, 26 Apr 2020 17:25:12 +0800
Subject: [PATCH 091/242] modify scripts for pylint

---
 example/resnet101_imagenet/crossentropy.py |  6 +--
 example/resnet101_imagenet/dataset.py      |  2 +-
 example/resnet101_imagenet/lr_generator.py |  5 +--
 example/resnet101_imagenet/train.py        | 20 ++++------
 example/resnet101_imagenet/var_init.py     | 43 +++++++++++-----------
 mindspore/model_zoo/resnet.py              |  3 +-
 6 files changed, 37 insertions(+), 42 deletions(-)

diff --git a/example/resnet101_imagenet/crossentropy.py b/example/resnet101_imagenet/crossentropy.py
index e636b8529e..1145a41804 100755
--- a/example/resnet101_imagenet/crossentropy.py
+++ b/example/resnet101_imagenet/crossentropy.py
@@ -12,15 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
+"""define loss function for network"""
 from mindspore.nn.loss.loss import _Loss
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore import Tensor
 from mindspore.common import dtype as mstype
 import mindspore.nn as nn
- 
-"""define loss function for network"""
+
 class CrossEntropy(_Loss):
+    """the redefined loss function with SoftmaxCrossEntropyWithLogits"""
     def __init__(self, smooth_factor=0., num_classes=1001):
         super(CrossEntropy, self).__init__()
         self.onehot = P.OneHot()
@@ -28,7 +29,6 @@ class CrossEntropy(_Loss):
         self.off_value = Tensor(1.0 * smooth_factor / (num_classes -1), mstype.float32)
         self.ce = nn.SoftmaxCrossEntropyWithLogits()
         self.mean = P.ReduceMean(False)
- 
     def construct(self, logit, label):
         one_hot_label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
         loss = self.ce(logit, one_hot_label)
diff --git a/example/resnet101_imagenet/dataset.py b/example/resnet101_imagenet/dataset.py
index 920e1c093c..27d93dc086 100755
--- a/example/resnet101_imagenet/dataset.py
+++ b/example/resnet101_imagenet/dataset.py
@@ -57,7 +57,7 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
     normalize_op = C.Normalize((0.475, 0.451, 0.392), (0.275, 0.267, 0.278))
     changeswap_op = C.HWC2CHW()
 
-    trans=[]
+    trans = []
     if do_train:
         trans = [decode_op,
                  random_resize_crop_op,
diff --git a/example/resnet101_imagenet/lr_generator.py b/example/resnet101_imagenet/lr_generator.py
index b2271a1382..67ff1fef25 100755
--- a/example/resnet101_imagenet/lr_generator.py
+++ b/example/resnet101_imagenet/lr_generator.py
@@ -13,9 +13,8 @@
 # limitations under the License.
 # ============================================================================
 """learning rate generator"""
-import numpy as np
 import math
- 
+import numpy as np
 
 def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr):
     lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
@@ -50,7 +49,7 @@ def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch):
             decayed = linear_decay * cosine_decay + 0.00001
             lr = base_lr * decayed
         lr_each_step.append(lr)
-    return np.array(lr_each_step).astype(np.float32)  
+    return np.array(lr_each_step).astype(np.float32)
 
 def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
     """
diff --git a/example/resnet101_imagenet/train.py b/example/resnet101_imagenet/train.py
index 2df6c3bad4..37f49ec3d7 100755
--- a/example/resnet101_imagenet/train.py
+++ b/example/resnet101_imagenet/train.py
@@ -14,11 +14,12 @@
 # ============================================================================
 """train_imagenet."""
 import os
+import math
 import argparse
 import random
 import numpy as np
 from dataset import create_dataset
-from lr_generator import get_lr
+from lr_generator import get_lr, warmup_cosine_annealing_lr
 from config import config
 from mindspore import context
 from mindspore import Tensor
@@ -33,7 +34,7 @@ from mindspore.communication.management import init
 import mindspore.nn as nn
 from crossentropy import CrossEntropy
 from var_init import default_recurisive_init, KaimingNormal
-from mindspore.common import initializer as weight_init
+import mindspore.common.initializer as weight_init
 
 random.seed(1)
 np.random.seed(1)
@@ -69,23 +70,20 @@ if __name__ == '__main__':
 
     epoch_size = config.epoch_size
     net = resnet101(class_num=config.class_num)
-    
     # weight init
     default_recurisive_init(net)
     for name, cell in net.cells_and_names():
         if isinstance(cell, nn.Conv2d):
             cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5),
-                                                                mode='fan_out', nonlinearity='relu'),
+                                                                              mode='fan_out', nonlinearity='relu'),
                                                                 cell.weight.default_input.shape(),
                                                                 cell.weight.default_input.dtype())
-            
     if not config.label_smooth:
         config.label_smooth_factor = 0.0
-    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) 
-
+    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
     if args_opt.do_train:
         dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
-                                 repeat_num=epoch_size, batch_size=config.batch_size)
+                repeat_num=epoch_size, batch_size=config.batch_size)
         step_size = dataset.get_dataset_size()
         loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
 
@@ -96,12 +94,10 @@ if __name__ == '__main__':
             lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
                                warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
                                lr_decay_mode='poly'))
-            
         opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
                        config.weight_decay, config.loss_scale)
-
-        model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', keep_batchnorm_fp32=False, loss_scale_manager=loss_scale, metrics={'acc'}) 
-
+        model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', keep_batchnorm_fp32=False,
+                      loss_scale_manager=loss_scale, metrics={'acc'})
         time_cb = TimeMonitor(data_size=step_size)
         loss_cb = LossMonitor()
         cb = [time_cb, loss_cb]
diff --git a/example/resnet101_imagenet/var_init.py b/example/resnet101_imagenet/var_init.py
index af4cd64b3b..061ec94fbf 100755
--- a/example/resnet101_imagenet/var_init.py
+++ b/example/resnet101_imagenet/var_init.py
@@ -18,12 +18,10 @@ import numpy as np
 from mindspore.common import initializer as init
 import mindspore.nn as nn
 from mindspore import Tensor
-
  
 def calculate_gain(nonlinearity, param=None):
     r"""Return the recommended gain value for the given nonlinearity function.
-    The values are as follows:
- 
+    The values are as follows: 
     ================= ====================================================
     nonlinearity      gain
     ================= ====================================================
@@ -34,11 +32,9 @@ def calculate_gain(nonlinearity, param=None):
     ReLU              :math:`\sqrt{2}`
     Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
     ================= ====================================================
- 
     Args:
         nonlinearity: the non-linear function (`nn.functional` name)
         param: optional parameter for the non-linear function
- 
     """
     linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
     if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
@@ -57,17 +53,15 @@ def calculate_gain(nonlinearity, param=None):
             raise ValueError("negative_slope {} not a valid number".format(param))
         return math.sqrt(2.0 / (1 + negative_slope ** 2))
     else:
-        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) 
-
+        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
+    
 def _calculate_correct_fan(array, mode):
     mode = mode.lower()
     valid_modes = ['fan_in', 'fan_out']
     if mode not in valid_modes:
-        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
- 
+        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes)) 
     fan_in, fan_out = _calculate_fan_in_and_fan_out(array)
-    return fan_in if mode == 'fan_in' else fan_out 
- 
+    return fan_in if mode == 'fan_in' else fan_out
 
 def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     r"""Fills the input `Tensor` with values according to the method
@@ -75,12 +69,10 @@ def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     performance on ImageNet classification` - He, K. et al. (2015), using a
     uniform distribution. The resulting tensor will have values sampled from
     :math:`\mathcal{U}(-\text{bound}, \text{bound})` where
- 
     .. math::
         \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
- 
     Also known as He initialization.
- 
+
     Args:
         array: an n-dimensional `tensor`
         a: the negative slope of the rectifier used after this layer (only
@@ -91,8 +83,7 @@ def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
             backwards pass.
         nonlinearity: the non-linear function (`nn.functional` name),
             recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
-    """
- 
+    """ 
     fan = _calculate_correct_fan(array, mode)
     gain = calculate_gain(nonlinearity, a)
     std = gain / math.sqrt(fan)
@@ -129,6 +120,7 @@ def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     return np.random.normal(0, std, array.shape)
  
 def _calculate_fan_in_and_fan_out(array):
+    """calculate the fan_in and fan_out for input array"""
     dimensions = len(array.shape)
     if dimensions < 2:
         raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions")
@@ -166,18 +158,27 @@ class KaimingNormal(init.Initializer):
         init._assignment(arr, tmp)
 
 def default_recurisive_init(custom_cell):
+    """weight init for conv2d and dense"""
     for name, cell in custom_cell.cells_and_names():
         if isinstance(cell, nn.Conv2d):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), cell.weight.default_input.shape(), cell.weight.default_input.dtype())
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), 
+                    cell.weight.default_input.shape(), 
+                    cell.weight.default_input.dtype())
             if cell.bias is not None:
                 fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
                 bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, cell.bias.default_input.shape()), cell.bias.default_input.dtype())
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, 
+                    cell.bias.default_input.shape()), 
+                    cell.bias.default_input.dtype())
         elif isinstance(cell, nn.Dense):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), cell.weight.default_input.shape(), cell.weight.default_input.dtype())
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), 
+                    cell.weight.default_input.shape(), 
+                    cell.weight.default_input.dtype())
             if cell.bias is not None:
                 fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
                 bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, cell.bias.default_input.shape()), cell.bias.default_input.dtype())
-        elif isinstance(cell, nn.BatchNorm2d) or isinstance(cell, nn.BatchNorm1d):
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, 
+                    cell.bias.default_input.shape()), 
+                    cell.bias.default_input.dtype())
+        elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)):
             pass
diff --git a/mindspore/model_zoo/resnet.py b/mindspore/model_zoo/resnet.py
index a243ff5a2a..d67f26814c 100755
--- a/mindspore/model_zoo/resnet.py
+++ b/mindspore/model_zoo/resnet.py
@@ -279,5 +279,4 @@ def resnet101(class_num=1001):
                   [64, 256, 512, 1024],
                   [256, 512, 1024, 2048],
                   [1, 2, 2, 2],
-                  class_num)
-    
+                  class_num)
\ No newline at end of file

From 9cb71441ea80d6324bc795009330b8be2a4d3e22 Mon Sep 17 00:00:00 2001
From: buxue <yiren19920727@163.com>
Date: Sun, 26 Apr 2020 15:25:26 +0800
Subject: [PATCH 092/242] fix bugs of Acosh, TopK, ResizeNearestNeighbor,
 DepthwiseConv2dNative

---
 mindspore/ccsrc/transform/util.cc     | 19 ++++++++-----------
 mindspore/nn/optim/momentum.py        |  2 +-
 mindspore/ops/operations/array_ops.py |  5 +++++
 mindspore/ops/operations/math_ops.py  |  3 ++-
 mindspore/ops/operations/nn_ops.py    | 23 ++++++++++++++---------
 tests/ut/python/ops/test_ops.py       |  4 ++--
 6 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/mindspore/ccsrc/transform/util.cc b/mindspore/ccsrc/transform/util.cc
index b1120ade6d..3f856fe564 100644
--- a/mindspore/ccsrc/transform/util.cc
+++ b/mindspore/ccsrc/transform/util.cc
@@ -171,20 +171,17 @@ GeTensorPtr TransformUtil::ConvertTensor(const MeTensorPtr &tensor, const std::s
     MS_LOG(ERROR) << "The Me Tensor data type size is wrong, type size is: " << type_size;
     return nullptr;
   }
-  // get tensor buff size
-  size_t data_buff_size = 0;
   size_t elements_num = IntToSize(tensor->ElementsNum());
-  if (elements_num > 0 && type_size > 0 && UINT_MAX / type_size >= elements_num) {
-    data_buff_size = elements_num * type_size;
+  if (UINT_MAX / type_size < elements_num) {
+    MS_LOG(ERROR) << "The required Me Tensor data buff size " << elements_num << " x " << type_size
+                  << " overflowed UINT_MAX: " << UINT_MAX << ".";
+    return nullptr;
   }
+
+  // get tensor buff size
+  size_t data_buff_size = elements_num * type_size;
   if (data_buff_size == 0) {
-    if (elements_num > 0 && type_size > 0 && UINT_MAX / type_size < elements_num) {
-      MS_LOG(ERROR) << "The required Me Tensor data buff size " << elements_num << " x " << type_size
-                    << " overflowed UINT_MAX: " << UINT_MAX << ".";
-    } else {
-      MS_LOG(ERROR) << "The Me Tensor data buff size is 0.";
-    }
-    return nullptr;
+    MS_LOG(INFO) << "The Me Tensor data buff size is 0.";
   }
   // create ge tensor
   auto desc = GetGeTensorDesc(tensor->shape_c(), tensor->data_type(), format);
diff --git a/mindspore/nn/optim/momentum.py b/mindspore/nn/optim/momentum.py
index c69e226df9..67de590c5f 100755
--- a/mindspore/nn/optim/momentum.py
+++ b/mindspore/nn/optim/momentum.py
@@ -56,7 +56,7 @@ class Momentum(Optimizer):
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
 
     Outputs:
-        Tensor[bool], the value is True.
+        tuple[bool], all elements are True.
 
     Raises:
         ValueError: If the momentum is less than 0.0.
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index 2638658357..abffde1865 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -1885,6 +1885,11 @@ class ResizeNearestNeighbor(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, size, align_corners=False):
         """Init ResizeNearestNeighbor"""
+        validator.check_value_type("size", size, [tuple, list], self.name)
+        validator.check_value_type("align_corners", align_corners, [bool], self.name)
+        validator.check_integer("length of size", len(size), 2, Rel.EQ, self.name)
+        for i, value in enumerate(size):
+            validator.check_integer(f'{i}th value of size', value, 0, Rel.GE, self.name)
         self.init_prim_io_names(inputs=['image_in'], outputs=['image_out'])
 
     def infer_shape(self, x):
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 8de4108435..1dfe93136b 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -1251,7 +1251,8 @@ class Acosh(PrimitiveWithInfer):
     Compute inverse hyperbolic cosine of x element-wise.
 
     Inputs:
-        - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
+        - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`,
+          and the data type of 'input_x' is number, the element in 'input_x' should be greater than or equal to 1.
 
     Outputs:
         Tensor, has the same shape as `input_x`.
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index c03aa45490..dce0772682 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -753,8 +753,15 @@ class DepthwiseConv2dNative(PrimitiveWithInfer):
         self.init_prim_io_names(inputs=['x', 'w'], outputs=['output'])
         self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
         self.stride = _check_positive_int_or_tuple('stride', stride, self.name)
+        if self.stride[0] != self.stride[1]:
+            raise ValueError("The height and width of stride should be equal,"
+                             f"but got height:{self.stride[0]},  width:{self.stride[1]}")
         self.add_prim_attr('stride', (1, 1, self.stride[0], self.stride[1]))
+
         self.dilation = _check_positive_int_or_tuple('dilation', dilation, self.name)
+        if self.dilation[0] != self.dilation[1]:
+            raise ValueError("The height and width of dilation should be equal,"
+                             f"but got height:{self.dilation[0]},  width:{self.dilation[1]}")
         self.add_prim_attr('dilation', (1, 1, self.dilation[0], self.dilation[1]))
         validator.check_value_type('pad', pad, (int,), self.name)
         self.pad_mode = validator.check_string('pad_mode', pad_mode, ['valid', 'same', 'pad'], self.name)
@@ -771,13 +778,11 @@ class DepthwiseConv2dNative(PrimitiveWithInfer):
         validator.check("x_shape[1]", x_shape[1], "w_shape[1]", w_shape[1], Rel.EQ, self.name)
         validator.check('kernel_size', self.kernel_size, 'w_shape[2:4]', tuple(w_shape[2:4]), Rel.EQ, self.name)
 
-        kernel_size_h = w_shape[2]
-        kernel_size_w = w_shape[3]
-        stride_h = self.stride[2]
-        stride_w = self.stride[3]
-        dilation_h = self.dilation[2]
-        dilation_w = self.dilation[3]
-
+        kernel_size_n, _, kernel_size_h, kernel_size_w = w_shape
+        _, _, stride_h, stride_w = self.stride
+        _, _, dilation_h, dilation_w = self.dilation
+        if kernel_size_n != 1:
+            raise ValueError(f"The batch of input weight should be 1, but got {kernel_size_n}")
         if self.pad_mode == "valid":
             h_out = math.ceil((x_shape[2] - dilation_h * (kernel_size_h - 1)) / stride_h)
             w_out = math.ceil((x_shape[3] - dilation_w * (kernel_size_w - 1)) / stride_w)
@@ -1198,8 +1203,8 @@ class TopK(PrimitiveWithInfer):
         >>> input_x = Tensor([1, 2, 3, 4, 5], mindspore.float16)
         >>> k = 3
         >>> values, indices = topk(input_x, k)
-        >>> assert values == Tensor(np.array([5, 4, 3]))
-        >>> assert indices == Tensor(np.array([4, 3, 2]))
+        >>> assert values == Tensor(np.array([5, 4, 3]), mstype.float16)
+        >>> assert indices == Tensor(np.array([4, 3, 2]), mstype.int32)
     """
 
     @prim_attr_register
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index d6622e76f4..bd1ce15824 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -793,8 +793,8 @@ test_case_nn_ops = [
         'desc_bprop': [[5, 5]]}),
     ('DepthwiseConv2dNative_1', {
         'block': P.DepthwiseConv2dNative(3, (3, 3), pad_mode="pad", pad=1, stride=2),
-        'desc_inputs': [[10, 32, 32, 32], [3, 32, 3, 3]],
-        'desc_bprop': [[10, 30, 16, 16]]}),
+        'desc_inputs': [[10, 32, 32, 32], [1, 32, 3, 3]],
+        'desc_bprop': [[10, 32, 16, 16]]}),
     ('DepthwiseConv2dNative_2', {
         'block': P.DepthwiseConv2dNative(1, (3, 3), pad_mode="same", pad=0, stride=1),
         'desc_inputs': [[2592, 2048, 4, 4], [1, 2048, 3, 3]],

From 248582d6fc89800d55d78a96b558d68e2fecf226 Mon Sep 17 00:00:00 2001
From: chang zherui <760161589@qq.com>
Date: Sun, 26 Apr 2020 18:13:28 +0800
Subject: [PATCH 093/242] nodify version number

---
 README.md                                   | 18 +++++++++---------
 RELEASE.md                                  |  2 +-
 build.sh                                    |  4 ++--
 docker/README.md                            |  4 ++--
 docker/mindspore-cpu/0.1.0-alpha/Dockerfile |  2 +-
 docker/mindspore-gpu/0.1.0-alpha/Dockerfile |  2 +-
 setup.py                                    |  2 +-
 7 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 3de87d3fec..d480644124 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem.
 
 <img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/>
 
-For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.1.0-alpha/architecture.html).
+For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.2.0-alpha/architecture.html).
 
 ### Automatic Differentiation
 
@@ -76,7 +76,7 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.
 
     ```
-    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
+    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
     ```
 
 2. Run the following command to verify the install.
@@ -96,10 +96,10 @@ currently the containerized build options are supported as follows:
 
 | Hardware Platform | Docker Image Repository | Tag | Description |
 | :---------------- | :---------------------- | :-- | :---------- |
-| CPU | `mindspore/mindspore-cpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` CPU release. |
+| CPU | `mindspore/mindspore-cpu` | `0.2.0-alpha` | Production environment with pre-installed MindSpore `0.2.0-alpha` CPU release. |
 |  |  | `devel` | Development environment provided to build MindSpore (with `CPU` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
 |  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `CPU` backend. |
-| GPU | `mindspore/mindspore-gpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` GPU release. |
+| GPU | `mindspore/mindspore-gpu` | `0.2.0-alpha` | Production environment with pre-installed MindSpore `0.2.0-alpha` GPU release. |
 |  |  | `devel` | Development environment provided to build MindSpore (with `GPU CUDA10.1` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
 |  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU` backend. |
 | Ascend | <center>&mdash;</center> | <center>&mdash;</center> | Coming soon. |
@@ -108,8 +108,8 @@ currently the containerized build options are supported as follows:
 
     For `CPU` backend, you can directly pull and run the image using the below command:
     ```
-    docker pull mindspore/mindspore-cpu:0.1.0-alpha
-    docker run -it mindspore/mindspore-cpu:0.1.0-alpha python -c 'import mindspore'
+    docker pull mindspore/mindspore-cpu:0.2.0-alpha
+    docker run -it mindspore/mindspore-cpu:0.2.0-alpha python -c 'import mindspore'
     ```
 
 * GPU
@@ -126,8 +126,8 @@ currently the containerized build options are supported as follows:
 
     Then you can pull and run the image using the below command:
     ```
-    docker pull mindspore/mindspore-gpu:0.1.0-alpha
-    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.1.0-alpha /bin/bash
+    docker pull mindspore/mindspore-gpu:0.2.0-alpha
+    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.2.0-alpha /bin/bash
     ```
 
     To test if the docker image works, please execute the python code below and check the output:
@@ -161,7 +161,7 @@ please check out `docker` folder for the details.
 
 ## Quickstart
 
-See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/quick_start/quick_start.html)
+See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.2.0-alpha/quick_start/quick_start.html)
 to implement the image classification.
 
 ## Docs
diff --git a/RELEASE.md b/RELEASE.md
index ce9064e4b1..265282547c 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,4 +1,4 @@
-# Release 0.1.0-alpha
+# Release 0.2.0-alpha
 
 ## Main Features
 
diff --git a/build.sh b/build.sh
index b48014ed93..0b60344980 100755
--- a/build.sh
+++ b/build.sh
@@ -433,9 +433,9 @@ build_predict()
 
     cd "${BASEPATH}/predict/output/"
     if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then
-      tar -cf MSPredict-0.1.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
+      tar -cf MSPredict-0.2.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
     elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then
-      tar -cf MSPredict-0.1.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
+      tar -cf MSPredict-0.2.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
     fi
     echo "success to build predict project!"
 }
diff --git a/docker/README.md b/docker/README.md
index c6851fe531..891a9b0782 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -7,11 +7,11 @@ This folder hosts all the `Dockerfile` to build MindSpore container images with
 * CPU
 
     ```
-    cd mindspore-cpu/0.1.0-alpha && docker build . -t mindspore/mindspore-cpu:0.1.0-alpha
+    cd mindspore-cpu/0.2.0-alpha && docker build . -t mindspore/mindspore-cpu:0.2.0-alpha
     ```
 
 * GPU
 
     ```
-    cd mindspore-gpu/0.1.0-alpha && docker build . -t mindspore/mindspore-gpu:0.1.0-alpha
+    cd mindspore-gpu/0.2.0-alpha && docker build . -t mindspore/mindspore-gpu:0.2.0-alpha
     ```
diff --git a/docker/mindspore-cpu/0.1.0-alpha/Dockerfile b/docker/mindspore-cpu/0.1.0-alpha/Dockerfile
index c9fb7c2b88..9524cee745 100644
--- a/docker/mindspore-cpu/0.1.0-alpha/Dockerfile
+++ b/docker/mindspore-cpu/0.1.0-alpha/Dockerfile
@@ -64,4 +64,4 @@ RUN mkdir -pv /root/.pip \
     && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
 
 # Install MindSpore cpu whl package
-RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
diff --git a/docker/mindspore-gpu/0.1.0-alpha/Dockerfile b/docker/mindspore-gpu/0.1.0-alpha/Dockerfile
index 50ca2b9f08..9b59f845f7 100644
--- a/docker/mindspore-gpu/0.1.0-alpha/Dockerfile
+++ b/docker/mindspore-gpu/0.1.0-alpha/Dockerfile
@@ -80,4 +80,4 @@ RUN cd /tmp \
     && rm -f /tmp/openmpi-3.1.5.tar.gz
 
 # Install MindSpore cuda-10.1 whl package
-RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
diff --git a/setup.py b/setup.py
index 82e6d70fcc..d929d5d707 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@ from setuptools import setup, find_packages
 from setuptools.command.egg_info import egg_info
 from setuptools.command.build_py import build_py
 
-version = '0.1.0'
+version = '0.2.0'
 
 backend_policy = os.getenv('BACKEND_POLICY')
 commit_id = os.getenv('COMMIT_ID').replace("\n", "")

From ce2a13fcda31f42924c8e36e8e3f074414ff918d Mon Sep 17 00:00:00 2001
From: YuJianfeng <yujianfeng5@huawei.com>
Date: Sun, 26 Apr 2020 14:09:47 +0800
Subject: [PATCH 094/242] Check topk supported before converting input to attr

---
 .../ccsrc/pre_activate/ascend/ascend_helper.h | 11 ++++
 .../ascend/ir_fission/topk_split.cc           | 63 ++++++++++++++-----
 .../ascend/ir_fission/topk_split.h            |  9 ++-
 mindspore/ccsrc/pre_activate/common/helper.cc | 42 +++++++++++++
 mindspore/ccsrc/pre_activate/common/helper.h  |  4 ++
 .../pass/const_input_to_attr_registry.cc      |  1 -
 .../pass/convert_const_input_to_attr.cc       | 46 +-------------
 .../ascend/ir_fission/topk_split_test.cc      | 40 +++++++++---
 .../pre_activate/topk_split_test.py           |  2 +-
 9 files changed, 146 insertions(+), 72 deletions(-)

diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h
index a8fd7dc514..1840966358 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_helper.h
@@ -21,6 +21,7 @@
 #include <vector>
 #include "device/ascend/kernel_select_ascend.h"
 #include "kernel/kernel_query.h"
+#include "kernel/tbe/tbe_kernel_select.h"
 
 namespace mindspore {
 namespace opt {
@@ -36,6 +37,16 @@ class KernelSelect {
 };
 using KernelSelectPtr = std::shared_ptr<KernelSelect>;
 
+class SupportedChecker {
+ public:
+  SupportedChecker() = default;
+  virtual ~SupportedChecker() = default;
+  virtual bool CheckSupported(const AnfNodePtr &anf_node, const kernel::KernelBuildInfoPtr &select_kernel_build_info) {
+    return kernel::CheckSupported(anf_node, select_kernel_build_info);
+  }
+};
+using SupportedCheckerPtr = std::shared_ptr<SupportedChecker>;
+
 class KernelQuery {
  public:
   KernelQuery() = default;
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
index 5924f6cd1c..4bdd5f0382 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.cc
@@ -16,6 +16,9 @@
 #include "pre_activate/ascend/ir_fission/topk_split.h"
 #include <vector>
 #include <memory>
+#include <unordered_set>
+#include "pre_activate/common/helper.h"
+#include "kernel/kernel_build_info.h"
 #include "utils/utils.h"
 #include "session/kernel_graph.h"
 #include "session/anf_runtime_algorithm.h"
@@ -25,6 +28,7 @@
 namespace mindspore {
 namespace opt {
 constexpr size_t kFloat16Len = 2;  // size of float16;
+constexpr size_t kTopkIndexK = 1;
 namespace {
 tensor::TensorPtr CreateTensor(const AnfNodePtr &node) {
   // 1 create tensor
@@ -70,37 +74,68 @@ ValueNodePtr CreateValueNode(const AnfNodePtr &node) {
   AnfAlgo::SetSelectKernelBuildInfo(builder1.Build(), indices_const.get());
   return indices_const;
 }
+
+kernel::KernelBuildInfoPtr CreateKernelBuildInfo() {
+  kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
+  builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT});
+  builder.SetOutputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT});
+  builder.SetInputsDeviceType({kNumberTypeFloat16, kNumberTypeFloat16});
+  builder.SetOutputsDeviceType({kNumberTypeFloat16, kNumberTypeInt32});
+  return builder.Build();
+}
 }  // namespace
 
 const BaseRef TopKSplit::DefinePattern() const {
-  VarPtr X = std::make_shared<Var>();
-  MS_EXCEPTION_IF_NULL(X);
+  VarPtr X1 = std::make_shared<Var>();
+  VarPtr X2 = std::make_shared<Var>();
   auto prim = std::make_shared<Primitive>(kTopKOpName);
-  MS_EXCEPTION_IF_NULL(prim);
-  return VectorRef({prim, X});
+  return VectorRef({prim, X1, X2});
 }
 
 const AnfNodePtr TopKSplit::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &) const {
   MS_EXCEPTION_IF_NULL(func_graph);
   MS_EXCEPTION_IF_NULL(node);
   auto kernel_graph = func_graph->cast<KernelGraphPtr>();
-  auto indices_const = CreateValueNode(node);
   // set value node as topk's input
   auto cnode = node->cast<CNodePtr>();
   MS_EXCEPTION_IF_NULL(cnode);
-  MS_LOG(INFO) << "already has input size: " << cnode->inputs().size();
-  cnode->add_input(indices_const);
+  // Copy a new node to check supported.
+  std::vector<AnfNodePtr> new_inputs{NewValueNode(std::make_shared<Primitive>(kTopKOpName))};
+  new_inputs.insert(new_inputs.end(), cnode->inputs().begin() + 1, cnode->inputs().end());
+  CNodePtr new_cnode = func_graph->NewCNode(new_inputs);
+  MS_EXCEPTION_IF_NULL(new_cnode);
+  new_cnode->set_abstract(cnode->abstract());
+  new_cnode->set_scope(cnode->scope());
+  AnfAlgo::CopyNodeAttrs(cnode, new_cnode);
+  CheckCNodeInputSize(new_cnode, kTopkInputNum);
+  // Convert the tensor input to scalar and convert it to attr
+  auto input_k = new_cnode->input(kTopkIndexK + 1);
+  MS_EXCEPTION_IF_NULL(input_k);
+  if (!IsValueNode<tensor::Tensor>(input_k)) {
+    return nullptr;
+  }
+  ValuePtr value = GetValueNode(input_k);
+  MS_EXCEPTION_IF_NULL(value);
+  auto tensor = value->cast<tensor::TensorPtr>();
+  MS_EXCEPTION_IF_NULL(tensor);
+  int32_t *data = reinterpret_cast<int32_t *>(tensor->data_c());
+  MS_EXCEPTION_IF_NULL(data);
+  auto new_value_node = std::make_shared<ValueNode>(MakeValue(*data));
+  new_cnode->set_input(kTopkIndexK + 1, new_value_node);
+
+  std::unordered_set<size_t> attr_index{kTopkIndexK};
+  ConstInputToAttr(new_cnode, attr_index);
+  auto indices_const = CreateValueNode(new_cnode);
+  new_cnode->add_input(indices_const);
+  MS_EXCEPTION_IF_NULL(supported_checker_);
+  if (!supported_checker_->CheckSupported(new_cnode, CreateKernelBuildInfo())) {
+    return nullptr;
+  }
+
   if (kernel_graph != nullptr) {
     kernel_graph->AddValueNodeToGraph(indices_const);
   }
 
-  CNodePtr new_cnode = nullptr;
-  if (kernel_graph == nullptr) {
-    new_cnode = std::make_shared<CNode>(*cnode);
-  } else {
-    new_cnode = kernel_graph->NewCNode(cnode);
-  }
-  MS_EXCEPTION_IF_NULL(new_cnode);
   return new_cnode;
 }
 }  // namespace opt
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h
index 8fcbbac475..e7293e1fa3 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/topk_split.h
@@ -16,15 +16,22 @@
 #ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_
 #define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TOPK_SPLIT_H_
 
+#include <memory>
 #include "pre_activate/common/optimizer.h"
+#include "pre_activate/ascend/ascend_helper.h"
+
 namespace mindspore {
 namespace opt {
 class TopKSplit : public PatternProcessPass {
  public:
-  explicit TopKSplit(bool multigraph = true) : PatternProcessPass("topk_split", multigraph) {}
+  explicit TopKSplit(bool multigraph = true)
+      : PatternProcessPass("topk_split", multigraph), supported_checker_(std::make_shared<SupportedChecker>()) {}
   ~TopKSplit() override = default;
   const BaseRef DefinePattern() const override;
   const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+
+ private:
+  SupportedCheckerPtr supported_checker_;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/common/helper.cc b/mindspore/ccsrc/pre_activate/common/helper.cc
index de45239268..9e8187ffb2 100644
--- a/mindspore/ccsrc/pre_activate/common/helper.cc
+++ b/mindspore/ccsrc/pre_activate/common/helper.cc
@@ -422,5 +422,47 @@ AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePt
   AnfAlgo::SetOutputInferTypeAndShape({origin_type}, {origin_shape}, tuple_getitem.get());
   return tuple_getitem;
 }
+
+void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  std::vector<AnfNodePtr> new_inputs;
+  std::vector<std::string> new_input_names;
+  auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
+  MS_EXCEPTION_IF_NULL(primitive);
+  auto input_names = primitive->GetAttr(kAttrInputNames);
+  if (input_names == nullptr) {
+    MS_LOG(DEBUG) << "input_names are nullptr in cnode[" + cnode->DebugString() + "]";
+    return;
+  }
+  auto input_names_vec = GetValue<std::vector<std::string>>(input_names);
+  auto inputs = cnode->inputs();
+  new_inputs.push_back(inputs[0]);
+  bool need_update = false;
+  for (size_t i = 0; i < inputs.size() - 1; ++i) {
+    auto input_node = inputs[i + 1];
+    MS_EXCEPTION_IF_NULL(input_node);
+    if (input_attrs.find(i) != input_attrs.end() && input_node->isa<ValueNode>()) {
+      auto value_node = input_node->cast<ValueNodePtr>();
+      MS_EXCEPTION_IF_NULL(value_node);
+      MS_LOG(DEBUG) << "start erase input[" << i << "] of cnode[" + cnode->DebugString() + "]";
+      if (i >= input_names_vec.size()) {
+        MS_LOG(EXCEPTION) << "index " << i << " is larger than input names size [" << input_names_vec.size() << "]";
+      }
+      primitive->set_attr(input_names_vec[i], value_node->value());
+      need_update = true;
+    } else {
+      new_inputs.push_back(input_node);
+      if (i < input_names_vec.size()) {
+        new_input_names.push_back(input_names_vec[i]);
+      }
+    }
+  }
+  if (need_update) {
+    // Update cnode's inputs
+    cnode->set_inputs(new_inputs);
+    // Update cnode's input_names attr
+    primitive->set_attr(kAttrInputNames, MakeValue(new_input_names));
+  }
+}
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/common/helper.h b/mindspore/ccsrc/pre_activate/common/helper.h
index 04a4dd6c81..9ef57d8e7c 100644
--- a/mindspore/ccsrc/pre_activate/common/helper.h
+++ b/mindspore/ccsrc/pre_activate/common/helper.h
@@ -19,6 +19,7 @@
 #include <vector>
 #include <memory>
 #include <string>
+#include <unordered_set>
 #include "ir/func_graph.h"
 #include "session/kernel_graph.h"
 #include "common/utils.h"
@@ -86,6 +87,7 @@ constexpr size_t kAdamApplyOneOutputNum = 3;
 constexpr size_t kBackendTransDataInputNum = 2;
 constexpr size_t kApplyMomentumInputNum = 6;
 constexpr size_t kBiasAddInputNum = 3;
+constexpr size_t kTopkInputNum = 3;
 
 enum FusedBatchNormInput {
   kX = 1,
@@ -150,6 +152,8 @@ void RemoveNopNode(session::KernelGraph *const graph);
 AnfNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx);
 
 bool IsUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node);
+
+void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs);
 }  // namespace opt
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_COMMON_HELPER_H_
diff --git a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc b/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
index fb47c9fc2a..0b4263685b 100644
--- a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
+++ b/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
@@ -52,7 +52,6 @@ ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() {
   Register(kFlattenGradOpName, {1});
   Register(kExpandDimsOpName, {1});
   Register(kSplitOpName, {0});
-  Register(kTopKOpName, {1});
   Register(kErfOpName, {1});
   Register(kSparseApplyAdagradOpName, {2});
   Register(kResizeNearestNeighborGrad, {1});
diff --git a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc
index 15d62a164f..1f9e2712a6 100644
--- a/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc
+++ b/mindspore/ccsrc/pre_activate/pass/convert_const_input_to_attr.cc
@@ -18,10 +18,10 @@
 #include <vector>
 #include <string>
 #include <unordered_map>
-#include <unordered_set>
 #include <memory>
 
 #include "pre_activate/pass/const_input_to_attr_registry.h"
+#include "pre_activate/common/helper.h"
 #include "utils/utils.h"
 #include "utils/context/ms_context.h"
 #include "operator/ops.h"
@@ -29,50 +29,6 @@
 
 namespace mindspore {
 namespace opt {
-namespace {
-void ConstInputToAttr(const CNodePtr &cnode, const std::unordered_set<size_t> &input_attrs) {
-  MS_EXCEPTION_IF_NULL(cnode);
-  std::vector<AnfNodePtr> new_inputs;
-  std::vector<std::string> new_input_names;
-  auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
-  MS_EXCEPTION_IF_NULL(primitive);
-  auto input_names = primitive->GetAttr(kAttrInputNames);
-  if (input_names == nullptr) {
-    MS_LOG(DEBUG) << "input_names are nullptr in cnode[" + cnode->DebugString() + "]";
-    return;
-  }
-  auto input_names_vec = GetValue<std::vector<std::string>>(input_names);
-  auto inputs = cnode->inputs();
-  new_inputs.push_back(inputs[0]);
-  bool need_update = false;
-  for (size_t i = 0; i < inputs.size() - 1; ++i) {
-    auto input_node = inputs[i + 1];
-    MS_EXCEPTION_IF_NULL(input_node);
-    if (input_attrs.find(i) != input_attrs.end() && input_node->isa<ValueNode>()) {
-      auto value_node = input_node->cast<ValueNodePtr>();
-      MS_EXCEPTION_IF_NULL(value_node);
-      MS_LOG(DEBUG) << "start erase input[" << i << "] of cnode[" + cnode->DebugString() + "]";
-      if (i >= input_names_vec.size()) {
-        MS_LOG(EXCEPTION) << "index " << i << " is larger than input names size [" << input_names_vec.size() << "]";
-      }
-      primitive->set_attr(input_names_vec[i], value_node->value());
-      need_update = true;
-    } else {
-      new_inputs.push_back(input_node);
-      if (i < input_names_vec.size()) {
-        new_input_names.push_back(input_names_vec[i]);
-      }
-    }
-  }
-  if (need_update) {
-    // Update cnode's inputs
-    cnode->set_inputs(new_inputs);
-    // Update cnode's input_names attr
-    primitive->set_attr(kAttrInputNames, MakeValue(new_input_names));
-  }
-}
-}  // namespace
-
 const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const AnfNodePtr &node,
                                                   const EquivPtr &) const {
   if (node == nullptr || !AnfAlgo::IsRealCNodeKernel(node)) {
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc
index 94fa04ef7a..43ddc046b7 100644
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/topk_split_test.cc
@@ -17,8 +17,13 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "device/kernel_info.h"
-#include "pre_activate/ascend/ir_fission/topk_split.h"
+#include "pre_activate/pass/convert_const_input_to_attr.h"
 #include "debug/anf_ir_dump.h"
+#define private public
+#define protected public
+#include "pre_activate/ascend/ir_fission/topk_split.h"
+#undef private
+#undef protected
 
 namespace mindspore {
 namespace opt {
@@ -30,6 +35,15 @@ class TestHWTopKSplit : public BackendCommon {
   UT::PyFuncGraphFetcher get_py_fun_;
 };
 
+class MockSupportedChecker : public SupportedChecker {
+ public:
+  MockSupportedChecker() = default;
+  ~MockSupportedChecker() override = default;
+  bool CheckSupported(const AnfNodePtr &anf_node, const kernel::KernelBuildInfoPtr &select_kernel_build_info) override {
+    return true;
+  }
+};  // namespace opt
+
 TEST_F(TestHWTopKSplit, test_topk_split) {
   /*
    * def before(input):
@@ -40,19 +54,25 @@ TEST_F(TestHWTopKSplit, test_topk_split) {
   FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_topk_split", "before");
   std::vector<int> shp{4, 4};
   auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
-  g->parameters()[0]->set_abstract(x_abstract);
-  auto ret = g->get_return();
-  EXPECT_NE(ret, nullptr);
-  auto tuple_getitem = ret->input(1);
-  EXPECT_NE(tuple_getitem, nullptr);
-  auto topk = tuple_getitem->cast<CNodePtr>()->input(1);
-  topk->set_abstract(x_abstract);
+  AbstractBasePtrList args_spec_list{x_abstract};
+  auto kernel_graph = GetKernelGraph(g, args_spec_list);
 
   auto optimizer = std::make_shared<opt::GraphOptimizer>();
   auto pm = std::make_shared<opt::PassManager>();
-  pm->AddPass(std::make_shared<opt::TopKSplit>());
+  pm->AddPass(std::make_shared<opt::ConvertConstInputToAttr>());
+  auto topk_split = std::make_shared<opt::TopKSplit>();
+  topk_split->supported_checker_ = std::make_shared<MockSupportedChecker>();
+  pm->AddPass(topk_split);
   optimizer->AddPassManager(pm);
-  FuncGraphPtr new_graph = optimizer->Optimize(g);
+  FuncGraphPtr new_graph = optimizer->Optimize(kernel_graph);
+
+  auto ret = new_graph->get_return();
+  EXPECT_NE(ret, nullptr);
+  auto make_tuple = ret->input(1);
+  EXPECT_NE(make_tuple, nullptr);
+  auto tuple_getitem = make_tuple->cast<CNodePtr>()->input(1);
+  EXPECT_NE(tuple_getitem, nullptr);
+  auto topk = tuple_getitem->cast<CNodePtr>()->input(1);
   auto topk_cnode = topk->cast<CNodePtr>();
   EXPECT_EQ(topk_cnode->inputs().size(), 3);
   EXPECT_TRUE(topk_cnode->input(2)->isa<ValueNode>());
diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/topk_split_test.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/topk_split_test.py
index 4cdbfa084e..c173419897 100644
--- a/tests/ut/cpp/python_input/gtest_input/pre_activate/topk_split_test.py
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/topk_split_test.py
@@ -35,7 +35,7 @@ def test_topk_split(tag):
 
     @fns
     def before(input):
-        topk = TopK(input)
+        topk = TopK(input, 2)
         output = tuple_getitem(topk, 0)
         return output
 

From 6d47036f959ba0ea4ad1ed5e54e24ebc6c60116f Mon Sep 17 00:00:00 2001
From: chenjianping <jpc.chen@huawei.com>
Date: Sun, 26 Apr 2020 10:52:53 +0000
Subject: [PATCH 095/242] erase datatype raise kernel

---
 .../device/ascend/kernel_select_ascend.cc     | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
index 549b97b61b..1efd3d6c22 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
+++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
@@ -218,16 +218,7 @@ void AddNodeInputDataType(const CNodePtr &kernel_node, size_t input_index,
                           std::vector<TypeId> *node_mix_precision_datatype) {
   AnfNodePtr cur_input = AnfAlgo::GetInputNode(kernel_node, input_index);
   MS_EXCEPTION_IF_NULL(cur_input);
-  TypeId input_origin_type;
-  if (cur_input->isa<Parameter>() && AnfAlgo::IsParameterWeight(cur_input->cast<ParameterPtr>())) {
-    // weight
-    input_origin_type = AnfAlgo::GetOutputDeviceDataType(cur_input, 0);
-  } else if (cur_input->isa<ValueNode>()) {
-    input_origin_type = AnfAlgo::GetOutputDeviceDataType(cur_input, 0);
-  } else {
-    // feature map
-    input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index);
-  }
+  TypeId input_origin_type = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, input_index);
   AddSupportMixedPrecisionDataTypeIndex(input_origin_type, node_mix_precision_datatype_index);
   node_mix_precision_datatype->push_back(input_origin_type);
 }
@@ -297,6 +288,12 @@ bool RaiseDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_dat
   return !kernel_match_datatype_idx->empty();
 }
 
+bool CanDataTypeReduce(const std::vector<int> &datatype_indexes, int check_index,
+                       const std::vector<int> &node_mix_precision_datatype_index) {
+  return datatype_indexes[check_index] != kUnSupportMixedDataTypeIndex &&
+         datatype_indexes[check_index] <= node_mix_precision_datatype_index[check_index];
+}
+
 bool RaiseOrReduceDataTypePrecisionSelect(const std::vector<int> &node_mix_precision_datatype_index,
                                           const std::vector<TypeId> &node_mix_precision_datatype,
                                           const std::map<size_t, std::vector<TypeId>> &kernel_support_datatypes,
@@ -329,7 +326,7 @@ bool RaiseOrReduceDataTypePrecisionSelect(const std::vector<int> &node_mix_preci
       if (i >= datatype_indexes.size()) {
         MS_LOG(EXCEPTION) << "index " << i << "> kernel datatype indexes size " << datatype_indexes.size();
       }
-      if (datatype_indexes[i] == kUnSupportMixedDataTypeIndex) {
+      if (!CanDataTypeReduce(datatype_indexes, i, node_mix_precision_datatype_index)) {
         iter = kernel_match_datatype_idx->erase(iter);
       } else {
         ++iter;
@@ -376,6 +373,7 @@ void PrecisionReduce(const std::vector<int> &node_mix_precision_datatype_index,
   bool selected_ret = RaiseDataTypePrecisionSelect(node_mix_precision_datatype_index, node_mix_precision_datatype,
                                                    kernel_support_datatype, kernel_match_datatype_idx);
   if (selected_ret) {
+    *precision_reduce = false;
     return;
   }
   if (context_ptr->enable_reduce_precision()) {

From c098f1ede5caa3ab302b853009f6f7fc53f2af1d Mon Sep 17 00:00:00 2001
From: leonwanghui <leon.wanghui@huawei.com>
Date: Sun, 26 Apr 2020 20:08:58 +0800
Subject: [PATCH 096/242] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20?=
 =?UTF-8?q?!703=20:=20modify=20version=20number'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                   | 18 +++++++++---------
 RELEASE.md                                  |  2 +-
 build.sh                                    |  4 ++--
 docker/README.md                            |  4 ++--
 docker/mindspore-cpu/0.1.0-alpha/Dockerfile |  2 +-
 docker/mindspore-gpu/0.1.0-alpha/Dockerfile |  2 +-
 setup.py                                    |  2 +-
 7 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index d480644124..3de87d3fec 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem.
 
 <img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/>
 
-For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.2.0-alpha/architecture.html).
+For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.1.0-alpha/architecture.html).
 
 ### Automatic Differentiation
 
@@ -76,7 +76,7 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.
 
     ```
-    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
+    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
     ```
 
 2. Run the following command to verify the install.
@@ -96,10 +96,10 @@ currently the containerized build options are supported as follows:
 
 | Hardware Platform | Docker Image Repository | Tag | Description |
 | :---------------- | :---------------------- | :-- | :---------- |
-| CPU | `mindspore/mindspore-cpu` | `0.2.0-alpha` | Production environment with pre-installed MindSpore `0.2.0-alpha` CPU release. |
+| CPU | `mindspore/mindspore-cpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` CPU release. |
 |  |  | `devel` | Development environment provided to build MindSpore (with `CPU` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
 |  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `CPU` backend. |
-| GPU | `mindspore/mindspore-gpu` | `0.2.0-alpha` | Production environment with pre-installed MindSpore `0.2.0-alpha` GPU release. |
+| GPU | `mindspore/mindspore-gpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` GPU release. |
 |  |  | `devel` | Development environment provided to build MindSpore (with `GPU CUDA10.1` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
 |  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU` backend. |
 | Ascend | <center>&mdash;</center> | <center>&mdash;</center> | Coming soon. |
@@ -108,8 +108,8 @@ currently the containerized build options are supported as follows:
 
     For `CPU` backend, you can directly pull and run the image using the below command:
     ```
-    docker pull mindspore/mindspore-cpu:0.2.0-alpha
-    docker run -it mindspore/mindspore-cpu:0.2.0-alpha python -c 'import mindspore'
+    docker pull mindspore/mindspore-cpu:0.1.0-alpha
+    docker run -it mindspore/mindspore-cpu:0.1.0-alpha python -c 'import mindspore'
     ```
 
 * GPU
@@ -126,8 +126,8 @@ currently the containerized build options are supported as follows:
 
     Then you can pull and run the image using the below command:
     ```
-    docker pull mindspore/mindspore-gpu:0.2.0-alpha
-    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.2.0-alpha /bin/bash
+    docker pull mindspore/mindspore-gpu:0.1.0-alpha
+    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.1.0-alpha /bin/bash
     ```
 
     To test if the docker image works, please execute the python code below and check the output:
@@ -161,7 +161,7 @@ please check out `docker` folder for the details.
 
 ## Quickstart
 
-See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.2.0-alpha/quick_start/quick_start.html)
+See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/quick_start/quick_start.html)
 to implement the image classification.
 
 ## Docs
diff --git a/RELEASE.md b/RELEASE.md
index 265282547c..ce9064e4b1 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,4 +1,4 @@
-# Release 0.2.0-alpha
+# Release 0.1.0-alpha
 
 ## Main Features
 
diff --git a/build.sh b/build.sh
index 0b60344980..b48014ed93 100755
--- a/build.sh
+++ b/build.sh
@@ -433,9 +433,9 @@ build_predict()
 
     cd "${BASEPATH}/predict/output/"
     if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then
-      tar -cf MSPredict-0.2.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
+      tar -cf MSPredict-0.1.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
     elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then
-      tar -cf MSPredict-0.2.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
+      tar -cf MSPredict-0.1.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
     fi
     echo "success to build predict project!"
 }
diff --git a/docker/README.md b/docker/README.md
index 891a9b0782..c6851fe531 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -7,11 +7,11 @@ This folder hosts all the `Dockerfile` to build MindSpore container images with
 * CPU
 
     ```
-    cd mindspore-cpu/0.2.0-alpha && docker build . -t mindspore/mindspore-cpu:0.2.0-alpha
+    cd mindspore-cpu/0.1.0-alpha && docker build . -t mindspore/mindspore-cpu:0.1.0-alpha
     ```
 
 * GPU
 
     ```
-    cd mindspore-gpu/0.2.0-alpha && docker build . -t mindspore/mindspore-gpu:0.2.0-alpha
+    cd mindspore-gpu/0.1.0-alpha && docker build . -t mindspore/mindspore-gpu:0.1.0-alpha
     ```
diff --git a/docker/mindspore-cpu/0.1.0-alpha/Dockerfile b/docker/mindspore-cpu/0.1.0-alpha/Dockerfile
index 9524cee745..c9fb7c2b88 100644
--- a/docker/mindspore-cpu/0.1.0-alpha/Dockerfile
+++ b/docker/mindspore-cpu/0.1.0-alpha/Dockerfile
@@ -64,4 +64,4 @@ RUN mkdir -pv /root/.pip \
     && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
 
 # Install MindSpore cpu whl package
-RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
diff --git a/docker/mindspore-gpu/0.1.0-alpha/Dockerfile b/docker/mindspore-gpu/0.1.0-alpha/Dockerfile
index 9b59f845f7..50ca2b9f08 100644
--- a/docker/mindspore-gpu/0.1.0-alpha/Dockerfile
+++ b/docker/mindspore-gpu/0.1.0-alpha/Dockerfile
@@ -80,4 +80,4 @@ RUN cd /tmp \
     && rm -f /tmp/openmpi-3.1.5.tar.gz
 
 # Install MindSpore cuda-10.1 whl package
-RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
diff --git a/setup.py b/setup.py
index d929d5d707..82e6d70fcc 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@ from setuptools import setup, find_packages
 from setuptools.command.egg_info import egg_info
 from setuptools.command.build_py import build_py
 
-version = '0.2.0'
+version = '0.1.0'
 
 backend_policy = os.getenv('BACKEND_POLICY')
 commit_id = os.getenv('COMMIT_ID').replace("\n", "")

From e2b0a2814250a9eb50500004b8a77d06478be97d Mon Sep 17 00:00:00 2001
From: chujinjin <chujinjin52@huawei.com>
Date: Wed, 22 Apr 2020 09:39:55 +0800
Subject: [PATCH 097/242] add pynative cache

---
 .../ccsrc/device/ascend/ascend_memory_manager.cc  |  2 +-
 mindspore/ccsrc/pipeline/pipeline.cc              |  2 ++
 mindspore/ccsrc/pynative/pynative_execute.cc      |  9 ++++++++-
 mindspore/ccsrc/pynative/pynative_execute.h       |  3 +++
 mindspore/ccsrc/session/ascend_session.cc         | 15 ++++++++++++++-
 mindspore/ccsrc/session/ascend_session.h          |  2 ++
 mindspore/ccsrc/session/session_basic.cc          |  2 +-
 7 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
index 42830f54fa..c2373d3c7e 100644
--- a/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_memory_manager.cc
@@ -22,7 +22,7 @@ namespace mindspore {
 namespace device {
 namespace ascend {
 const uint64_t kAscendDeviceMemGB = 20;
-const uint64_t kAscendMemPoolGB = 5;
+const uint64_t kAscendMemPoolGB = 10;
 const uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << 30);
 const uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << 30);
 
diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc
index 251a0c2d84..cf67efe6ee 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -38,6 +38,7 @@
 #include "parallel/graph_util/get_parallel_info.h"
 #include "device/kernel_runtime_manager.h"
 #include "debug/trace.h"
+#include "pynative/pynative_execute.h"
 
 #if (ENABLE_GE || ENABLE_D)
 #include "pipeline/pipeline_ge.h"
@@ -822,6 +823,7 @@ void FinalizeGe() {
 
 void ClearResAtexit() {
   MS_LOG(DEBUG) << "Pipeline clear all resource";
+  pynative::ClearPyNativeSession();
   device::KernelRuntimeManager::Instance().ClearRuntimeResource();
 
   ad::g_k_prims.clear();
diff --git a/mindspore/ccsrc/pynative/pynative_execute.cc b/mindspore/ccsrc/pynative/pynative_execute.cc
index 821a35d8fb..8d3fe4fbb7 100644
--- a/mindspore/ccsrc/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pynative/pynative_execute.cc
@@ -44,6 +44,7 @@ const std::set<std::string> vm_operators = {"partial", "depend", "make_ref", "ze
 
 namespace mindspore {
 namespace pynative {
+static std::shared_ptr<session::SessionBasic> session = nullptr;
 inline ValuePtr PyAttrValue(const py::object &obj) {
   ValuePtr converted_ret = nullptr;
   bool converted = parse::ConvertData(obj, &converted_ret);
@@ -310,7 +311,11 @@ py::object RunOpInMs(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *stat
   if (device_target != kAscendDevice && device_target != kGPUDevice) {
     MS_EXCEPTION(ArgumentError) << "Device target [" << device_target << "] is not supported in Pynative mode";
   }
-  std::shared_ptr<session::SessionBasic> session = session::SessionFactory::Get().Create(device_target);
+
+  if (session == nullptr) {
+    session = session::SessionFactory::Get().Create(device_target);
+  }
+
   MS_EXCEPTION_IF_NULL(session);
   session->Init(ms_context->device_id());
 
@@ -407,5 +412,7 @@ py::tuple RunOp(const py::args &args) {
   MS_LOG(INFO) << "RunOp end";
   return result;
 }
+
+void ClearPyNativeSession() { session = nullptr; }
 }  // namespace pynative
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pynative/pynative_execute.h b/mindspore/ccsrc/pynative/pynative_execute.h
index c64c6b4b25..65be3b2ab2 100644
--- a/mindspore/ccsrc/pynative/pynative_execute.h
+++ b/mindspore/ccsrc/pynative/pynative_execute.h
@@ -36,6 +36,9 @@ namespace py = pybind11;
 py::object RunOpInVM(const OpExecInfoPtr &op_exec_info, PynativeStatusCode *status);
 
 py::tuple RunOp(const py::args &args);
+
+void ClearPyNativeSession();
+
 }  // namespace pynative
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc
index 253d2d08ae..0b8c8bfa55 100755
--- a/mindspore/ccsrc/session/ascend_session.cc
+++ b/mindspore/ccsrc/session/ascend_session.cc
@@ -249,10 +249,23 @@ void AscendSession::RunOpExecTask(const std::shared_ptr<KernelGraph> &kernel_gra
   MS_LOG(INFO) << "Finish!";
 }
 
+bool AscendSession::GraphCacheExist(const GraphInfo &graph_info) const {
+  if (run_op_graphs_.find(graph_info) != run_op_graphs_.end()) {
+    return true;
+  }
+
+  return false;
+}
+
 void AscendSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
                             const std::vector<tensor::TensorPtr> &input_tensors,
                             const std::vector<bool> &tensors_mask) {
   MS_LOG(INFO) << "Build op " << op_run_info.op_name << " start !";
+  if (GraphCacheExist(graph_info)) {
+    MS_LOG(INFO) << "Build op " << op_run_info.op_name << " finish !";
+    return;
+  }
+
   // construct graph include one op
   auto graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask);
   MS_EXCEPTION_IF_NULL(graph);
@@ -267,6 +280,7 @@ void AscendSession::BuildOp(const OpRunInfo &op_run_info, const GraphInfo &graph
   RunOpAdjustKernel(graph);
   BuildKernel(graph);
   run_op_graphs_[graph_info] = graph;
+  MS_LOG(INFO) << "Build op " << op_run_info.op_name << " finish !";
 }
 
 py::tuple AscendSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
@@ -291,7 +305,6 @@ py::tuple AscendSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &gr
   }
   py::object tuple_obj = utils::cast<PyObjectRef>(output_tensors).object_;
   py::tuple tuple_tensors = py::cast<py::tuple>(tuple_obj);
-  run_op_graphs_.clear();
   MS_LOG(INFO) << "Run op " << op_run_info.op_name << " finish!";
   return tuple_tensors;
 }
diff --git a/mindspore/ccsrc/session/ascend_session.h b/mindspore/ccsrc/session/ascend_session.h
index 0b006256a1..a3f6aadd1d 100755
--- a/mindspore/ccsrc/session/ascend_session.h
+++ b/mindspore/ccsrc/session/ascend_session.h
@@ -107,6 +107,8 @@ class AscendSession : public SessionBasic {
   std::vector<GraphType> &GetGraphOrderType(GraphId final_graph_id);
   // copy output of if and else
   void CopyOutputOfIf(GraphId false_graph_id);
+  // check if graph cache exist
+  bool GraphCacheExist(const GraphInfo &graph_info) const;
 
   // member variables
   // key is final_graph_id,value is child graph execute order of final graph
diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc
index cb9e5c4dc9..3436d68b81 100755
--- a/mindspore/ccsrc/session/session_basic.cc
+++ b/mindspore/ccsrc/session/session_basic.cc
@@ -125,7 +125,7 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne
   // if in paynative mode,data only copyed to host when user want to print data
   auto ms_context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(ms_context);
-  if (ms_context->enable_pynative_infer()) {
+  if (ms_context->execution_mode() == kPynativeMode) {
     tensor->set_device_address(AnfAlgo::GetMutableOutputAddr(node, output_index));
   } else if (!address->SyncDeviceToHost(trans::GetRuntimePaddingShape(node, output_index),
                                         LongToSize(tensor->data().nbytes()), tensor->data_type(),

From b681cec8f2105e812a5b077d82e59c5064b1579b Mon Sep 17 00:00:00 2001
From: zhoufeng <zhoufeng54@huawei.com>
Date: Sun, 26 Apr 2020 20:51:18 +0800
Subject: [PATCH 098/242] cmake refactor

---
 cmake/external_libs/mkl_dnn.cmake             |   2 +-
 cmake/external_libs/protobuf.cmake            |  60 ++-
 mindspore/ccsrc/CMakeLists.txt                | 370 ++++--------------
 mindspore/ccsrc/common/CMakeLists.txt         |   4 +-
 .../engine/datasetops/source/tf_buffer.h      |   2 +-
 .../engine/datasetops/source/tf_client.cc     |   2 +-
 .../engine/datasetops/source/tf_client.h      |   2 +-
 .../engine/datasetops/source/tf_reader_op.cc  |   2 +-
 mindspore/ccsrc/debug/CMakeLists.txt          |  20 +-
 mindspore/ccsrc/debug/dump_proto.cc           |   2 +-
 mindspore/ccsrc/device/CMakeLists.txt         |  78 ++--
 mindspore/ccsrc/gvar/CMakeLists.txt           |   5 +
 mindspore/ccsrc/ir/CMakeLists.txt             |   7 +-
 mindspore/ccsrc/kernel/CMakeLists.txt         |  74 ++--
 mindspore/ccsrc/onnx/CMakeLists.txt           |   7 +-
 mindspore/ccsrc/onnx/onnx_exporter.cc         |   2 +-
 mindspore/ccsrc/operator/CMakeLists.txt       |   7 +-
 mindspore/ccsrc/optimizer/CMakeLists.txt      |  11 +-
 mindspore/ccsrc/parallel/CMakeLists.txt       |   6 +
 .../parallel_strategy_checkpoint.cc           |   2 +-
 mindspore/ccsrc/pipeline/CMakeLists.txt       |  27 +-
 mindspore/ccsrc/pre_activate/CMakeLists.txt   |  13 +
 mindspore/ccsrc/predict/CMakeLists.txt        |  20 +-
 .../ccsrc/predict/generator/ir/ir_task_info.h |   2 +-
 mindspore/ccsrc/pybind_api/CMakeLists.txt     |   7 +-
 mindspore/ccsrc/pynative/CMakeLists.txt       |  13 +-
 mindspore/ccsrc/session/CMakeLists.txt        |  38 +-
 mindspore/ccsrc/transform/CMakeLists.txt      |  11 +-
 mindspore/ccsrc/transform/convert.cc          |   4 +-
 mindspore/ccsrc/utils/CMakeLists.txt          |  11 +-
 mindspore/ccsrc/vm/CMakeLists.txt             |   7 +-
 31 files changed, 333 insertions(+), 485 deletions(-)
 create mode 100644 mindspore/ccsrc/gvar/CMakeLists.txt
 create mode 100644 mindspore/ccsrc/parallel/CMakeLists.txt
 create mode 100644 mindspore/ccsrc/pre_activate/CMakeLists.txt

diff --git a/cmake/external_libs/mkl_dnn.cmake b/cmake/external_libs/mkl_dnn.cmake
index 4b2c46670a..85a3132ba1 100644
--- a/cmake/external_libs/mkl_dnn.cmake
+++ b/cmake/external_libs/mkl_dnn.cmake
@@ -4,7 +4,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Windows")
     mindspore_add_pkg(onednn
         VER 1.1.1
         LIBS dnnl mkldnn
-        HEAD_ONLY ./
+        HEAD_ONLY ./include
         RELEASE on
         URL https://github.com/oneapi-src/oneDNN/releases/download/v1.1.1/dnnl_win_1.1.1_cpu_vcomp.zip
         MD5 ecaab9ed549643067699c80e5cea1c23)
diff --git a/cmake/external_libs/protobuf.cmake b/cmake/external_libs/protobuf.cmake
index a574e789db..6fe34577af 100644
--- a/cmake/external_libs/protobuf.cmake
+++ b/cmake/external_libs/protobuf.cmake
@@ -38,17 +38,17 @@ function(ms_protobuf_generate c_var h_var)
         get_filename_component(file_dir ${abs_file} PATH)
         file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir})
 
-        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc")
-        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h")
+        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc")
+        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h")
 
         add_custom_command(
-                OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc"
-                "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h"
+                OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc"
+                "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h"
                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-                COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}"
-                COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
+                COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto"
+                COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
                 DEPENDS protobuf::protoc ${abs_file}
-                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
+                COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM)
     endforeach()
 
     set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
@@ -71,40 +71,38 @@ function(ms_protobuf_generate_py c_var h_var py_var)
         get_filename_component(abs_file ${file} ABSOLUTE)
         get_filename_component(file_name ${file} NAME_WE)
         get_filename_component(file_dir ${abs_file} PATH)
-        file(RELATIVE_PATH rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${file_dir})
-
 
-        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc")
-        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h")
-        list(APPEND ${py_var} "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py")
+        list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc")
+        list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h")
+        list(APPEND ${py_var} "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py")
         if (WIN32)
             add_custom_command(
-                    OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc"
-                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h"
-                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
+                    OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc"
+                    "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h"
+                    "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py"
                     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-                    COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}"
-                    COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
-                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
-                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
-                    COMMAND perl -pi.bak -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
-                    COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
+                    COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto"
+                    COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
+                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
+                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
+                    COMMAND perl -pi.bak -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py"
+                    COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
                     DEPENDS protobuf::protoc ${abs_file}
                     COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
         else()
             add_custom_command(
-                    OUTPUT "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.cc"
-                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}.pb.h"
-                    "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
+                    OUTPUT "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.cc"
+                    "${CMAKE_BINARY_DIR}/proto/${file_name}.pb.h"
+                    "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py"
                     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
-                    COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/${rel_path}"
-                    COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
-                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
-                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/${rel_path} ${abs_file}
-                    COMMAND perl -pi -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py"
-                    COMMAND cp "${CMAKE_BINARY_DIR}/${rel_path}/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
+                    COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto"
+                    COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
+                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
+                    COMMAND protobuf::protoc -I${file_dir} --python_out=${CMAKE_BINARY_DIR}/proto ${abs_file}
+                    COMMAND perl -pi -e "s/import (.+_pb2.*)/from . import \\1/"  "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py"
+                    COMMAND cp "${CMAKE_BINARY_DIR}/proto/${file_name}_pb2.py" "${PROJECT_SOURCE_DIR}/mindspore/train/"
                     DEPENDS protobuf::protoc ${abs_file}
-                    COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
+                    COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM)
         endif()
     endforeach()
     set_source_files_properties(${${c_var}} ${${h_var}} ${${py_var}} PROPERTIES GENERATED TRUE)
diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index eb33de1c4b..7fb42ab359 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -1,14 +1,11 @@
+## common setting
 include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-if(ENABLE_CPU)
-    include(ExternalProject)
-    add_compile_definitions(CPUSESSION)
-    file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-    "device/cpu/*.cc"
-    )
-    if (CMAKE_SYSTEM_NAME MATCHES "Windows")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF")
-        add_compile_definitions(BUILDING_DLL)
-    endif()
+include_directories(${CMAKE_BINARY_DIR})
+link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)
+
+if (CMAKE_SYSTEM_NAME MATCHES "Windows")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes -DHAVE_SNPRINTF")
+    add_compile_definitions(BUILDING_DLL)
 endif()
 
 if(ENABLE_GPU)
@@ -20,7 +17,7 @@ if(ENABLE_GPU)
     enable_language(CUDA)
     if(NOT CUDA_PATH OR CUDA_PATH STREQUAL "")
         if(DEFINED ENV{CUDA_HOME})
-            set(CUDA_PATH $ENV{CUDA_HOME})                
+            set(CUDA_PATH $ENV{CUDA_HOME})
         else()
             set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
         endif()
@@ -41,261 +38,101 @@ if(ENABLE_GPU)
             "kernel/akg/akgkernelbuild.cc"
             "kernel/akg/akg_kernel_attrs_process.cc"
             )
-    file(GLOB_RECURSE GPU_KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "kernel/gpu/*.cc"
-            )
+
     list(APPEND CUDA_NVCC_FLAGS -arch=sm_53)
     list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc")
-    add_library(gpu_queue SHARED "device/gpu/blocking_queue.cc" "device/gpu/gpu_buffer_mgr.cc")
-    target_link_libraries(gpu_queue ${CMAKE_THREAD_LIBS_INIT} ${CUDA_PATH}/lib64/libcudart.so)
-
-
-    file(GLOB_RECURSE MS_STEPS_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "session/gpu_session.cc"
-        )
     list(REMOVE_ITEM GPU_SRC_LIST "device/gpu/mpi/mpi_initializer.cc"
                                   "device/gpu/distribution/collective_wrapper.cc"
                                   "device/gpu/distribution/mpi_wrapper.cc"
                                   "device/gpu/distribution/nccl_wrapper.cc"
                                   )
-    list(REMOVE_ITEM GPU_KERNEL_SRC_LIST "device/gpu/mpi/mpi_initializer.cc"
-            "kernel/gpu/nccl/nccl_gpu_kernel.cc"
-            )
 
     set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
     string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
     cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
     set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})
+endif ()
 
-    if(ENABLE_MPI)
-      include(ExternalProject)
-
-      file(GLOB_RECURSE GPU_NCCL_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-              "kernel/gpu/nccl/*.cc"
-              )
-      file(GLOB_RECURSE GPU_MPI_PYTHON_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-              "device/gpu/mpi/mpi_initializer.cc"
-              )
-      add_library(gpu_collective SHARED "device/gpu/distribution/collective_wrapper.cc"
-                                        "device/gpu/distribution/mpi_wrapper.cc"
-                                        "device/gpu/distribution/nccl_wrapper.cc"
-                                        )
-    endif()
-endif()
-
+## make flatuffer files
 include_directories("${CMAKE_BINARY_DIR}/predict/schema/inner")
 file(GLOB_RECURSE FLATBUFFER_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/schema/*.fbs")
 set(FLATBUFFER_OU "${CMAKE_BINARY_DIR}/predict/schema/inner")
-ms_build_flatbuffers("${FLATBUFFER_IN}" "${FLATBUFFER_IN}" GENERATED_OUTPUT_DIR "${FLATBUFFER_OU}")
+ms_build_flatbuffers("${FLATBUFFER_IN}" "${FLATBUFFER_IN}" flat_input "${FLATBUFFER_OU}")
 
-file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "ir/*.cc"
-        "ir/dtype/*.cc"
-        "utils/context/ms_context.cc"
-        "utils/symbolic.cc"
-        "utils/tensorprint_utils.cc"
-        "utils/convert_utils.cc"
-        "utils/graph_utils.cc"
-        "utils/misc.cc"
-        "utils/callbacks.cc"
-        "utils/profile.cc"
-        "utils/base_ref.cc"
-        "utils/summary/event_writer.cc"
-        "utils/log_adapter.cc"
-        "utils/comm_manager.cc"
-        "utils/any.cc"
-        "utils/config_manager.cc"
-        "utils/system/file_system.cc"
-        "utils/system/crc32c.cc"
-        "common/*.cc"
-        "parallel/*.cc"
-        "pipeline/pipeline.cc"
-        "pipeline/resource.cc"
-        "pipeline/pass.cc"
-        "pipeline/action.cc"
-        "pipeline/validator.cc"
-        "pipeline/remove_value_node_dup.cc"
-        "pipeline/parse/*.cc"
-        "pipeline/static_analysis/*.cc"
-        "optimizer/*.cc"
-        "debug/*.cc"
-        "onnx/onnx_exporter.cc"
-        "operator/*.cc"
-        "session/kernel_graph.cc"
-        "utils/node_utils.cc"
-        "session/session_basic.cc"
-        "session/session_factory.cc"
-        "session/anf_runtime_algorithm.cc"
-        "vm/*.cc"
-        "pynative/base.cc"
-        "pynative/pynative_execute.cc"
-        "pybind_api/*.cc"
-        "device/common/*.cc"
-        "kernel/kernel_query.cc"
-        "kernel/kernel_build_info.cc"
-        "kernel/kash/*.cc"
-        "device/kernel_info.cc"
-        "device/kernel_runtime.cc"
-        "device/memory_manager.cc"
-        "device/kernel_runtime_manager.cc"
-        "device/convert_tensor_utils.cc"
-        "pre_activate/common/*.cc"
-        "pre_activate/pass/*.cc"
-        "pre_activate/gpu/*.cc"
-        "pre_activate/mem_reuse/*.cc"
-        "predict/predict.cc"
-        "predict/generator/utils/ir_model_util.cc"
-        "predict/converter/*.cc"
-        "predict/converter/attr_utils/*.cc"
-        "predict/converter/lite_model/*.cc"
-        "predict/converter/lite_model/operations/*.cc"
-        "kernel/common_utils.cc"
-        "kernel/oplib/*.cc"
-        "kernel/kash/*.cc"
-        "device/gpu/distribution/collective_init.cc"
-        )
-if (ENABLE_CPU)
-    list(REMOVE_ITEM MINDSPORE_SRC_LIST "device/gpu/distribution/collective_init.cc")
-    if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
-        list(REMOVE_ITEM MINDSPORE_SRC_LIST "kernel/kernel_query.cc")
-    endif()
-endif()
-if (NOT ENABLE_GPU)
-    list(APPEND MINDSPORE_SRC_LIST "device/gpu/distribution/collective_fake_init.cc")
-endif()
-file(GLOB_RECURSE MEM_REUSE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "pre_activate/mem_reuse/*.cc"
-        )
-if(NOT ENABLE_DUMP_E2E)
-    list(REMOVE_ITEM MINDSPORE_SRC_LIST "debug/e2e_dump.cc")
-endif()
-file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_CURRENT_SOURCE_DIR})
-file(GLOB_RECURSE ONNX_PROTO RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/onnx.proto")
-message("onnx proto path is : ${ONNX_PROTO}")
+## make protobuf files
+file(COPY "${ms_onnx_INC}/onnx/onnx.proto" DESTINATION ${CMAKE_BINARY_DIR}/proto)
+file(GLOB ONNX_PROTO "" ${CMAKE_BINARY_DIR}/proto/onnx.proto)
+message("onnx proto path is :" ${ONNX_PROTO})
 ms_protobuf_generate(ONNX_PROTO_SRCS ONNX_PROTO_HDRS ${ONNX_PROTO})
 list(APPEND MINDSPORE_PROTO_LIST ${ONNX_PROTO_SRCS})
 
-if(ENABLE_DUMP_PROTO)
+if (ENABLE_DUMP_PROTO)
     include_directories(${CMAKE_BINARY_DIR})
-    file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "utils/node_strategy.proto"
-            )
+    file(GLOB_RECURSE PROTO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "utils/node_strategy.proto")
     ms_protobuf_generate(PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
 
     file(GLOB_RECURSE PROTO_PY RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "utils/anf_ir.proto"
-            "utils/summary.proto"
-            "utils/checkpoint.proto"
-            )
+        "utils/anf_ir.proto"
+        "utils/summary.proto"
+        "utils/checkpoint.proto"
+    )
     ms_protobuf_generate_py(PY_SRCS PY_HDRS PY_PYS ${PROTO_PY})
 
-    list(APPEND MINDSPORE_PROTO_DUMP_LIST ${PROTO_SRCS})
-    list(APPEND MINDSPORE_PROTO_DUMP_LIST ${PY_SRCS})
-    list(APPEND MINDSPORE_SRC_LIST "debug/dump_proto.cc")
-    list(APPEND MINDSPORE_SRC_LIST "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
-    add_compile_definitions(ENABLE_DUMP_PROTO)
-endif()
-
-if(ENABLE_GE)
-    file(GLOB_RECURSE GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "transform/*.cc"
-            "pynative/pynative_execute_ge.cc"
-            "utils/callbacks_ge.cc"
-            "pipeline/pipeline_ge.cc"
-            )
-    list(APPEND MINDSPORE_SRC_LIST ${GE_SRC_LIST})
-endif()
+    list(APPEND MINDSPORE_PROTO_LIST ${PROTO_SRCS})
+    list(APPEND MINDSPORE_PROTO_LIST ${PY_SRCS})
+endif ()
 
-if(ENABLE_D)
+if (ENABLE_D)
     include_directories("${CMAKE_BINARY_DIR}/kernel/aicpu")
-    file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "kernel/aicpu/proto/*.proto"
-            )
-    ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
-
     include_directories("${CMAKE_BINARY_DIR}/predict/generator/ir")
-    file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "predict/proto/*.proto"
-            )
+    file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "kernel/aicpu/proto/*.proto")
+    ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
+    
+    file(GLOB_RECURSE PROTO_INNER RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "predict/proto/*.proto")
     ms_protobuf_generate(PREDICT_PROTOSRCS PREDICT_PROTOHDRS ${PROTO_INNER})
 
-    file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "device/ascend/*.cc"
-            "device/ascend/profiling/*.cc"
-            "device/ascend/tasksink/*.cc"
-            "device/kernel_adjust.cc"
-            "kernel/kernel_fusion.cc"
-            "kernel/tbe/*.cc"
-            "pre_activate/ascend/*.cc"
-            "transform/*.cc"
-            "pipeline/pipeline_ge.cc"
-            )
-    list(APPEND MINDSPORE_SRC_LIST ${D_SRC_LIST})
-    list(APPEND MINDSPORE_PROTO_AICPU_LIST ${PROTOSRCS})
-    list(APPEND MINDSPORE_PROTO_PREDICT_LIST ${PREDICT_PROTOSRCS})
+    list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
+    list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
 
-    file(GLOB_RECURSE MS_STEPS_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "session/ascend_session.cc"
-        )
-    file(GLOB_RECURSE MS_TASKINFO_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "device/ascend/tasksink/taskinfo/*.cc")
-    file(GLOB_RECURSE MS_AICPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "kernel/aicpu/*.cc"
-        )
-    file(GLOB_RECURSE MS_RT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "kernel/mng/*.cc"
-        )
-    file(GLOB_RECURSE MS_HCCL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "kernel/hccl/*.cc"
-        )
-    file(GLOB_RECURSE MS_PREDICT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "predict/generator/ir/*.cc"
-        )
     add_compile_definitions(ENABLE_D)
-endif()
-
-file(GLOB_RECURSE MS_GVAR_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "gvar/*.cc"
-        )
-
-add_library(mindspore_gvar SHARED ${MS_GVAR_SRC_LIST})
-add_library(mindspore STATIC ${MINDSPORE_SRC_LIST})
-add_dependencies(mindspore GENERATED_OUTPUT_DIR)
+endif ()
 
-if(ENABLE_D)
-    list(APPEND MINDSPORE_PROTO_LIST ${MINDSPORE_PROTO_AICPU_LIST})
-endif()
-if(ENABLE_DUMP_PROTO)
-    list(APPEND MINDSPORE_PROTO_LIST ${MINDSPORE_PROTO_DUMP_LIST})
-endif()
-list(APPEND MINDSPORE_PROTO_LIST ${MINDSPORE_PROTO_PREDICT_LIST})
-if(MINDSPORE_PROTO_LIST)
+if (MINDSPORE_PROTO_LIST)
     add_library(proto_input STATIC ${MINDSPORE_PROTO_LIST})
     set_target_properties(proto_input PROPERTIES COMPILE_FLAGS "-Wno-unused-variable")
-    target_link_libraries(mindspore proto_input)
 endif()
 
-if(APPLE)
-    set_target_properties(mindspore_gvar PROPERTIES MACOSX_RPATH ON)
-endif()
+## make sub objects
+set(SUB_COMP 
+    transform pre_activate parallel pipeline device kernel common debug gvar ir onnx operator optimizer predict
+    pybind_api pynative session utils vm
+)
 
-link_directories(${CMAKE_SOURCE_DIR}/build/mindspore/graphengine)
+foreach (_comp ${SUB_COMP})
+    add_subdirectory(${_comp})
+    if (TARGET _mindspore_${_comp}_obj)
+        list(APPEND SUB_OBJECTS_SRC $<TARGET_OBJECTS:_mindspore_${_comp}_obj>)
+        add_dependencies(_mindspore_${_comp}_obj proto_input flat_input)
+    endif ()
+endforeach ()
+
+add_library(mindspore STATIC ${SUB_OBJECTS_SRC})
+target_link_libraries(mindspore proto_input)
+target_link_libraries(mindspore securec mindspore::flatbuffers)
+if (NOT WIN32)
+  target_link_libraries(mindspore dl)
+endif()
 
 if (ENABLE_GE)
     if(ENABLE_TRAIN)
-        target_link_libraries(mindspore graph ge_client_train)
-    else()
-        target_link_libraries(mindspore graph ge_client)
-    endif()
-    target_link_libraries(mindspore tsdclient)
-elseif(ENABLE_D)
-    add_compile_definitions(NO_GE_CLIENT)
-    target_link_libraries(mindspore graph)
-else()
-    add_compile_definitions(NO_GE_CLIENT)
+        target_link_libraries(mindspore ge_client_train)
+    else ()
+        target_link_libraries(mindspore ge_client)
+    endif ()
+    target_link_libraries(mindspore graph tsdclient)
 endif()
 
-if(ENABLE_D)
+if (ENABLE_D)
     if (DEFINED ENV{D_LINK_PATH})
         if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
             MESSAGE("system processor matches aarch64")
@@ -306,13 +143,13 @@ if(ENABLE_D)
         else ()
             MESSAGE("system ${CMAKE_HOST_SYSTEM_PROCESSOR} not support")
         endif()
-    else()
+    else ()
         MESSAGE("use system default lib")
-        if(DEFINED ENV{ASCEND_CUSTOM_PATH})
+        if (DEFINED ENV{ASCEND_CUSTOM_PATH})
             set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
-        else()
+        else ()
             set(ASCEND_PATH /usr/local/Ascend)
-        endif()
+        endif ()
         set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
         set(ASCEND_DRIVER_BACK_PATH ${ASCEND_PATH}/driver/lib64/driver)
         set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
@@ -327,37 +164,14 @@ if(ENABLE_D)
     target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${PROFILING} ${HCCL} ${TSDCLIENT})
 endif()
 
-target_link_libraries(mindspore securec)
-if (NOT WIN32)
-  target_link_libraries(mindspore dl)
-endif()
-target_link_libraries(mindspore mindspore::flatbuffers)
 # link protobuf
 if (ENABLE_D)
     target_link_libraries(mindspore mindspore::protobuf)
 endif()
 
-if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
-    target_link_libraries(mindspore ${PYTHON_LIBRARIES} mindspore_gvar)
-endif()
-
 # set c_expression building
-if (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
-    set(PYTHON_MODULE_SOURCE ${MS_GVAR_SRC_LIST}
-        pipeline/init.cc
-        kernel/oplib/oplib.cc
-        ${MINDSPORE_SRC_LIST} ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST}
-        ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST})
-else()
-    set(PYTHON_MODULE_SOURCE
-        pipeline/init.cc
-        kernel/oplib/oplib.cc
-        ${MS_STEPS_SRC_LIST} ${MS_CCE_SRC_LIST} ${MS_AICPU_SRC_LIST} ${MS_TASKINFO_LIST} ${MS_RT_SRC_LIST}
-        ${GPU_NCCL_LIST} ${MS_HCCL_SRC_LIST} ${MS_PREDICT_SRC_LIST} ${CPU_SRC_LIST} ${MEM_REUSE_SRC_LIST} ${GPU_KERNEL_SRC_LIST})
-endif()
-
 set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
-pybind11_add_module(_c_expression ${PYTHON_MODULE_SOURCE})
+pybind11_add_module(_c_expression "pipeline/init.cc")
 
 MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
 if (CMAKE_SYSTEM_NAME MATCHES "Linux")
@@ -372,55 +186,41 @@ else ()
     MESSAGE(FATAL_ERROR "other platform: ${CMAKE_SYSTEM_NAME}")
 endif ()
 
-
 set(ORIGIN_PATH ${ORIGIN_PATH}/lib)
 set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${ORIGIN_PATH})
-if (WIN32)
-    target_link_libraries(_c_expression PRIVATE
-            mindspore::pybind11_module
-            securec
-            proto_input
-            mindspore::flatbuffers
-            )
-else()
-    target_link_libraries(_c_expression PRIVATE
-            mindspore::pybind11_module
-            mindspore
-            mindspore_gvar
-            )
-endif()
 
-if(USE_GLOG)
+if (CMAKE_SYSTEM_NAME MATCHES "Windows")
+    target_link_libraries(mindspore mindspore::pybind11_module)
+    target_link_libraries(mindspore mindspore_gvar)
+else ()
+    target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
+    target_link_libraries(_c_expression PRIVATE mindspore_gvar)
+endif ()
+
+target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
+
+if (USE_GLOG)
     target_link_libraries(_c_expression PRIVATE mindspore::glog)
-endif()
+endif ()
 
-if(ENABLE_DUMP_PROTO)
+if (ENABLE_DUMP_PROTO)
     target_link_libraries(_c_expression PRIVATE mindspore::protobuf)
-endif()
+endif ()
 
-if(ENABLE_GPU)
+if (ENABLE_GPU)
     message("add gpu lib to c_expression")
-    target_link_libraries(_c_expression PRIVATE
-                          gpu_cuda_lib
-                          gpu_queue
-                          cublas
+    target_link_libraries(_c_expression PRIVATE gpu_cuda_lib gpu_queue cublas
                           ${CUDA_PATH}/lib64/libcurand.so
                           ${CUDNN_PATH}/lib64/libcudnn.so
                           ${CUDA_PATH}/lib64/libcudart.so
                           ${CUDA_PATH}/lib64/stubs/libcuda.so)
-    if(ENABLE_MPI)
-        pybind11_add_module(_ms_mpi ${GPU_MPI_PYTHON_LIST})
-        target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
-        target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)
-    endif()
-endif()
-
+endif ()
 
-if(ENABLE_CPU)
+if (ENABLE_CPU)
     target_link_libraries(_c_expression PRIVATE mindspore::dnnl mindspore::mkldnn)
-endif()
+endif ()
 
-if(ENABLE_MINDDATA)
+if (ENABLE_MINDDATA)
     add_subdirectory(mindrecord)
     add_subdirectory(dataset)
-endif()
+endif ()
diff --git a/mindspore/ccsrc/common/CMakeLists.txt b/mindspore/ccsrc/common/CMakeLists.txt
index 1a1a5ae9e6..3d9634280d 100644
--- a/mindspore/ccsrc/common/CMakeLists.txt
+++ b/mindspore/ccsrc/common/CMakeLists.txt
@@ -1,2 +1,2 @@
-
-add_library(_mindspore_common_obj OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/*.cc)
\ No newline at end of file
+file(GLOB_RECURSE _COMMON_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+add_library(_mindspore_common_obj OBJECT ${_COMMON_ALL_SRC_FILES})
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h
index 5745ff8071..389f4a76d9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_buffer.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 #include "dataset/engine/data_buffer.h"
-#include "./example.pb.h"
+#include "proto/example.pb.h"
 #include "dataset/engine/datasetops/source/tf_client.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc
index d41ff121af..b6e68aafb9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.cc
@@ -24,7 +24,7 @@
 #include <algorithm>
 
 #include "common/utils.h"
-#include "./example.pb.h"
+#include "proto/example.pb.h"
 #include "dataset/engine/datasetops/source/storage_client.h"
 #include "dataset/util/path.h"
 #include "dataset/util/status.h"
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h
index 6ff76e202a..3602f93351 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_client.h
@@ -25,7 +25,7 @@
 #include <utility>
 #include <vector>
 #include <map>
-#include "./example.pb.h"
+#include "proto/example.pb.h"
 #include "dataset/engine/datasetops/source/storage_client.h"
 #include "dataset/util/status.h"
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
index 6132f628d7..a2f23aac75 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
@@ -23,7 +23,7 @@
 #include <utility>
 #include <unordered_map>
 
-#include "./example.pb.h"
+#include "proto/example.pb.h"
 #include "./securec.h"
 #include "common/utils.h"
 #include "dataset/core/config_manager.h"
diff --git a/mindspore/ccsrc/debug/CMakeLists.txt b/mindspore/ccsrc/debug/CMakeLists.txt
index a88745b864..c8c50dd471 100644
--- a/mindspore/ccsrc/debug/CMakeLists.txt
+++ b/mindspore/ccsrc/debug/CMakeLists.txt
@@ -1,12 +1,16 @@
-
 set(_DEBUG_SRC_LIST
-        "${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_dump.cc"
-        "${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_utils.cc"
-        "${CMAKE_CURRENT_SOURCE_DIR}/draw.cc"
-        "${CMAKE_CURRENT_SOURCE_DIR}/dump_proto.cc")
+    "${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_dump.cc"
+    "${CMAKE_CURRENT_SOURCE_DIR}/anf_ir_utils.cc"
+    "${CMAKE_CURRENT_SOURCE_DIR}/draw.cc"
+    "${CMAKE_CURRENT_SOURCE_DIR}/dump_proto.cc"
+    "${CMAKE_CURRENT_SOURCE_DIR}/info.cc"
+    "${CMAKE_CURRENT_SOURCE_DIR}/label.cc"
+    "${CMAKE_CURRENT_SOURCE_DIR}/trace_info.cc"
+    "${CMAKE_CURRENT_SOURCE_DIR}/trace.cc"
+)
 
-if(ENABLE_DUMP_E2E)
+if (ENABLE_DUMP_E2E)
     list(APPEND _DEBUG_SRC_LIST "${CMAKE_CURRENT_SOURCE_DIR}/e2e_dump.cc")
-endif(ENABLE_DUMP_E2E)
+endif (ENABLE_DUMP_E2E)
 
-add_library(_mindspore_debug_obj OBJECT ${_DEBUG_SRC_LIST})
\ No newline at end of file
+add_library(_mindspore_debug_obj OBJECT ${_DEBUG_SRC_LIST})
diff --git a/mindspore/ccsrc/debug/dump_proto.cc b/mindspore/ccsrc/debug/dump_proto.cc
index 83ab1e4505..ab2ce1322a 100644
--- a/mindspore/ccsrc/debug/dump_proto.cc
+++ b/mindspore/ccsrc/debug/dump_proto.cc
@@ -23,7 +23,7 @@
 #include <algorithm>
 
 #include "debug/anf_ir_utils.h"
-#include "utils/anf_ir.pb.h"
+#include "proto/anf_ir.pb.h"
 #include "utils/graph_utils.h"
 #include "utils/symbolic.h"
 
diff --git a/mindspore/ccsrc/device/CMakeLists.txt b/mindspore/ccsrc/device/CMakeLists.txt
index 93ef7adc84..0a6514f65a 100644
--- a/mindspore/ccsrc/device/CMakeLists.txt
+++ b/mindspore/ccsrc/device/CMakeLists.txt
@@ -1,34 +1,50 @@
-file(GLOB_RECURSE _DEVICE_ALL_SRC_FILES *.cc)
-add_library(_mindspore_device_obj OBJECT ${_DEVICE_ALL_SRC_FILES})
-
-if(ENABLE_CPU)
-    target_compile_definitions(_mindspore_device_obj PRIVATE CPUSESSION)
-    file(GLOB_RECURSE _CPU_SRC_LIST  cpu/*.cc)
-    add_library(_c_expression_cpu_device_obj  OBJECT ${_CPU_SRC_LIST})
-endif()
-
-if(ENABLE_GPU)
-    file(GLOB_RECURSE _GPU_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "gpu/*.cc"
-            "gpu/*.cu"
-            )
-    list(REMOVE_ITEM _GPU_SRC_LIST "gpu/blocking_queue.cc"
-                                   "gpu/gpu_buffer_mgr.cc"
-                                   "gpu/mpi/mpi_initializer.cc"
+file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/*.cc"
+    "kernel_info.cc" "kernel_runtime.cc" "memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc"
+)
+
+if (ENABLE_GPU)
+    list(APPEND DEVICE_SRC_LIST "gpu/distribution/collective_init.cc")
+else ()
+    list(APPEND DEVICE_SRC_LIST "gpu/distribution/collective_fake_init.cc")
+endif ()
+
+if (ENABLE_D)
+    file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "ascend/*.cc" "kernel_adjust.cc")
+endif ()
+
+if (ENABLE_CPU)
+    file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "cpu/*.cc")
+endif ()
+
+# gpu
+if (ENABLE_GPU)
+    file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc" "gpu/*.cu")
+
+    # gpu_queue
+    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
+    add_library(gpu_queue SHARED "gpu/blocking_queue.cc" "gpu/gpu_buffer_mgr.cc")
+    target_link_libraries(gpu_queue ${CMAKE_THREAD_LIBS_INIT} ${CUDA_PATH}/lib64/libcudart.so)
+
+    list(REMOVE_ITEM CUDA_SRC_LIST "gpu/mpi/mpi_initializer.cc"
                                    "gpu/distribution/collective_wrapper.cc"
                                    "gpu/distribution/mpi_wrapper.cc"
-                                   "gpu/distribution/nccl_wrapper.cc")
-    add_library(_cuda_gpu_device_obj  OBJECT ${_GPU_SRC_LIST})
-endif()
-
-if(ENABLE_D)
-    file(GLOB_RECURSE _D_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "ascend/*.cc"
-            "ascend/profiling/*.cc"
-            "ascend/tasksink/*.cc"
-            "kernel_adjust.cc"
-            "ascend/tasksink/taskinfo/*.cc"
-            )
-    target_sources(_mindspore_device_obj  PRIVATE ${_D_SRC_LIST})
-endif()
+                                   "gpu/distribution/nccl_wrapper.cc"
+    )
+
+    if (ENABLE_MPI)
+        include(ExternalProject)
+        # gpu_collective
+        add_library(gpu_collective SHARED "gpu/distribution/collective_wrapper.cc"
+                                          "gpu/distribution/mpi_wrapper.cc"
+                                          "gpu/distribution/nccl_wrapper.cc"
+        )
+        # _ms_mpi
+        pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc")
+        target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
+        target_link_libraries(gpu_collective PRIVATE mindspore::ompi mindspore::nccl)
+    endif ()
+
+    # add_library(_mindspore_device_cuda_obj OBJECT ${CUDA_SRC_LIST})
+endif ()
 
+add_library(_mindspore_device_obj OBJECT ${DEVICE_SRC_LIST} ${D_SRC_LIST} ${CPU_SRC_LIST})
diff --git a/mindspore/ccsrc/gvar/CMakeLists.txt b/mindspore/ccsrc/gvar/CMakeLists.txt
new file mode 100644
index 0000000000..552ba742f1
--- /dev/null
+++ b/mindspore/ccsrc/gvar/CMakeLists.txt
@@ -0,0 +1,5 @@
+file(GLOB_RECURSE MS_GVAR_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cc)
+add_library(mindspore_gvar SHARED ${MS_GVAR_SRC_LIST})
+if (APPLE)
+    set_target_properties(mindspore_gvar PROPERTIES MACOSX_RPATH ON)
+endif ()
diff --git a/mindspore/ccsrc/ir/CMakeLists.txt b/mindspore/ccsrc/ir/CMakeLists.txt
index 278ad492e2..2b17eecbed 100644
--- a/mindspore/ccsrc/ir/CMakeLists.txt
+++ b/mindspore/ccsrc/ir/CMakeLists.txt
@@ -1,5 +1,2 @@
-file(GLOB_RECURSE _IR_ALL_SRC_FILES
-      ./*.cc
-      dtype/*.cc)
-
-add_library(_mindspore_ir_obj OBJECT ${_IR_ALL_SRC_FILES})
\ No newline at end of file
+file(GLOB_RECURSE _IR_SRC_LIST ./*.cc dtype/*.cc)
+add_library(_mindspore_ir_obj OBJECT ${_IR_SRC_LIST})
diff --git a/mindspore/ccsrc/kernel/CMakeLists.txt b/mindspore/ccsrc/kernel/CMakeLists.txt
index 9c5e2c1890..f3ac99283a 100644
--- a/mindspore/ccsrc/kernel/CMakeLists.txt
+++ b/mindspore/ccsrc/kernel/CMakeLists.txt
@@ -1,34 +1,40 @@
-file(GLOB_RECURSE _SESSION_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "kernel_query.cc"
-        "kernel_fusion.cc"
-        "kernel_build_info.cc"
-        "kash/*.cc"
-        "common_utils.cc"
-        "oplib/*.cc"
-        )
-
-add_library(_mindspore_kernel_obj OBJECT ${_SESSION_ALL_SRC_FILES})
-
-if(ENABLE_GPU)
-		file(GLOB_RECURSE _CUDA_GPU_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-						"gpu/*.cu"
-						"akg/gpu/*.cc"
-						)
-		add_library(_cuda_gpu_kernel_obj  OBJECT ${_CUDA_GPU_SRC_LIST})
-
-		file(GLOB_RECURSE _C_EXPRESSION_GPU_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-						"gpu/*.cc"
-						)
-		list(REMOVE_ITEM _C_EXPRESSION_GPU_SRC_LIST "gpu/nccl/nccl_gpu_kernel.cc")
-		add_library(_c_expression_gpu_device_obj OBJECT ${_C_EXPRESSION_GPU_SRC_LIST})				
-endif()
-
-if(ENABLE_D)
-    file(GLOB_RECURSE _D_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-						"tbe/*.cc"
-						"aicpu/*.cc"
-						"mng/*.cc"
-						"hccl/*.cc"
-            )
-    target_sources(_mindspore_kernel_obj  PRIVATE ${_D_SRC_LIST})
-endif()
+file(GLOB_RECURSE KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+	"kernel_build_info.cc"
+	"kash/*.cc"
+	"common_utils.cc"
+	"oplib/*.cc"
+)
+
+if (ENABLE_D)
+	file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+		"kernel_query.cc"
+		"kernel_fusion.cc"
+		"tbe/*.cc"
+		"aicpu/*.cc"
+		"mng/*.cc"
+		"hccl/*.cc"
+	)
+	add_compile_definitions(ENABLE_D)
+endif ()
+
+if (ENABLE_GPU)
+    file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+        "gpu/*.cu"
+        "akg/gpu/*.cc"
+        "akg/akgkernelbuild.cc"
+        "akg/akg_kernel_attrs_process.cc"
+	)
+
+    file(GLOB_RECURSE GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/*.cc")
+    list(REMOVE_ITEM GPU_SRC_LIST "gpu/nccl/nccl_gpu_kernel.cc")
+
+    if (ENABLE_MPI)
+        include(ExternalProject)
+        file(GLOB_RECURSE GPU_NCCL_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "gpu/nccl/*.cc")
+        list(APPEND GPU_SRC_LIST ${GPU_NCCL_LIST})
+	endif ()
+
+	# add_library(_mindspore_kernel_cuda_obj OBJECT ${CUDA_SRC_LIST})
+endif()
+
+add_library(_mindspore_kernel_obj OBJECT ${KERNEL_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
diff --git a/mindspore/ccsrc/onnx/CMakeLists.txt b/mindspore/ccsrc/onnx/CMakeLists.txt
index 2a25e67634..78884b5d05 100644
--- a/mindspore/ccsrc/onnx/CMakeLists.txt
+++ b/mindspore/ccsrc/onnx/CMakeLists.txt
@@ -1,5 +1,2 @@
-file(GLOB_RECURSE _ONNX_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "*.cc"
-        )
-
-add_library(_mindspore_onnx_obj OBJECT ${_ONNX_ALL_SRC_FILES})
+file(GLOB_RECURSE _ONNX_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+add_library(_mindspore_onnx_obj OBJECT ${_ONNX_SRC_FILES})
diff --git a/mindspore/ccsrc/onnx/onnx_exporter.cc b/mindspore/ccsrc/onnx/onnx_exporter.cc
index 772986d714..168e625a89 100644
--- a/mindspore/ccsrc/onnx/onnx_exporter.cc
+++ b/mindspore/ccsrc/onnx/onnx_exporter.cc
@@ -24,7 +24,7 @@
 #include <functional>
 
 #include "debug/anf_ir_utils.h"
-#include "./onnx.pb.h"
+#include "proto/onnx.pb.h"
 #include "operator/ops.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/operator/CMakeLists.txt b/mindspore/ccsrc/operator/CMakeLists.txt
index 328b4cf787..9a54ec047c 100644
--- a/mindspore/ccsrc/operator/CMakeLists.txt
+++ b/mindspore/ccsrc/operator/CMakeLists.txt
@@ -1,5 +1,2 @@
-file(GLOB_RECURSE _OPERATOR_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "*.cc"
-        )
-
-add_library(_mindspore_operator_obj OBJECT ${_OPERATOR_ALL_SRC_FILES})
+file(GLOB_RECURSE _OPERATOR_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+add_library(_mindspore_operator_obj OBJECT ${_OPERATOR_SRC_FILES})
diff --git a/mindspore/ccsrc/optimizer/CMakeLists.txt b/mindspore/ccsrc/optimizer/CMakeLists.txt
index 48cbeb41dd..197ece8505 100644
--- a/mindspore/ccsrc/optimizer/CMakeLists.txt
+++ b/mindspore/ccsrc/optimizer/CMakeLists.txt
@@ -1,9 +1,2 @@
-file(GLOB_RECURSE _OPTIMIZER_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "*.cc"
-        )
-
-add_library(_mindspore_optimizer_obj OBJECT ${_OPTIMIZER_ALL_SRC_FILES})
-if(ENABLE_DUMP_PROTO)
-    file(GLOB_RECURSE _PROTO_SRC_LIST  "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
-    target_sources(_mindspore_optimizer_obj  PRIVATE ${_PROTO_SRC_LIST})
-endif()
\ No newline at end of file
+file(GLOB_RECURSE _OPTIMIZER_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+add_library(_mindspore_optimizer_obj OBJECT ${_OPTIMIZER_SRC_FILES})
diff --git a/mindspore/ccsrc/parallel/CMakeLists.txt b/mindspore/ccsrc/parallel/CMakeLists.txt
new file mode 100644
index 0000000000..0280266e40
--- /dev/null
+++ b/mindspore/ccsrc/parallel/CMakeLists.txt
@@ -0,0 +1,6 @@
+file(GLOB_RECURSE _PARALLEL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+if (ENABLE_DUMP_PROTO)
+    list(REMOVE_ITEM _PARALLEL_SRC_FILES "parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc")
+endif ()
+
+add_library(_mindspore_parallel_obj OBJECT ${_PARALLEL_SRC_FILES})
diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
index dd518dc76c..981cf8a115 100644
--- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
+++ b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
@@ -23,7 +23,7 @@
 #include "common/utils.h"
 #include "utils/convert_utils.h"
 #include "utils/log_adapter.h"
-#include "utils/node_strategy.pb.h"
+#include "proto/node_strategy.pb.h"
 
 namespace mindspore {
 namespace parallel {
diff --git a/mindspore/ccsrc/pipeline/CMakeLists.txt b/mindspore/ccsrc/pipeline/CMakeLists.txt
index 4aadbcce58..630eb510fe 100644
--- a/mindspore/ccsrc/pipeline/CMakeLists.txt
+++ b/mindspore/ccsrc/pipeline/CMakeLists.txt
@@ -1,12 +1,17 @@
-file(GLOB_RECURSE _PIPELINE_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "pipeline.cc"
-        "resource.cc"
-        "pass.cc"
-        "action.cc"
-        "validator.cc"
-        "remove_value_node_dup.cc"
-        "parse/*.cc"
-        "static_analysis/*.cc"
-        )
+file(GLOB_RECURSE _PIPELINE_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+    "pipeline.cc"
+    "resource.cc"
+    "pass.cc"
+    "action.cc"
+    "validator.cc"
+    "remove_value_node_dup.cc"
+    "parse/*.cc"
+    "static_analysis/*.cc"
+)
 
-add_library(_mindspore_pipeline_obj OBJECT ${_PIPELINE_ALL_SRC_FILES})
\ No newline at end of file
+if (ENABLE_GE OR ENABLE_D)
+    file(GLOB_RECURSE _PIPELINE_GE_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "pipeline_ge.cc")
+    list(APPEND _PIPELINE_SRC_FILES ${_PIPELINE_GE_SRC_FILES})
+endif ()
+
+add_library(_mindspore_pipeline_obj OBJECT ${_PIPELINE_SRC_FILES})
diff --git a/mindspore/ccsrc/pre_activate/CMakeLists.txt b/mindspore/ccsrc/pre_activate/CMakeLists.txt
new file mode 100644
index 0000000000..611b5de4e2
--- /dev/null
+++ b/mindspore/ccsrc/pre_activate/CMakeLists.txt
@@ -0,0 +1,13 @@
+file(GLOB_RECURSE _PREACTIVATE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+    "common/*.cc"
+    "mem_reuse/*.cc"
+    "pass/*.cc"
+    "gpu/*.cc"
+)
+
+if (ENABLE_D)
+    file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "ascend/*.cc")
+	list(APPEND _PREACTIVATE_SRC_LIST ${_D_SRC_LIST})
+endif ()
+
+add_library(_mindspore_pre_activate_obj OBJECT ${_PREACTIVATE_SRC_LIST})
\ No newline at end of file
diff --git a/mindspore/ccsrc/predict/CMakeLists.txt b/mindspore/ccsrc/predict/CMakeLists.txt
index d88cf5cd83..a8cca431e7 100644
--- a/mindspore/ccsrc/predict/CMakeLists.txt
+++ b/mindspore/ccsrc/predict/CMakeLists.txt
@@ -1,8 +1,14 @@
-file(GLOB_RECURSE _PRE_ACTIVE_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "ascend/*.cc"
-        "common/*.cc"
-        "pass/*.cc"
-        "gpu/*.cc"
-        )
+file(GLOB_RECURSE _PREDICT_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+    "predict.cc"
+    "generator/utils/ir_model_util.cc"
+    "converter/*.cc"
+    "converter/attr_utils/*.cc"
+    "converter/lite_model/*.cc"
+    "converter/lite_model/operations/*.cc"
+)
 
-add_library(_mindspore_pre_active_obj OBJECT ${_PRE_ACTIVE_ALL_SRC_FILES})
\ No newline at end of file
+if (ENABLE_D)
+    file(GLOB_RECURSE _D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "generator/ir/*.cc")
+    list(APPEND _PREDICT_SRC_LIST ${_D_SRC_LIST})
+endif ()
+add_library(_mindspore_predict_obj OBJECT ${_PREDICT_SRC_LIST})
\ No newline at end of file
diff --git a/mindspore/ccsrc/predict/generator/ir/ir_task_info.h b/mindspore/ccsrc/predict/generator/ir/ir_task_info.h
index 8e80cdddbe..4b3ac85ea6 100644
--- a/mindspore/ccsrc/predict/generator/ir/ir_task_info.h
+++ b/mindspore/ccsrc/predict/generator/ir/ir_task_info.h
@@ -21,7 +21,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "predict/proto/ge_runtime_taskinfo.pb.h"
+#include "proto/ge_runtime_taskinfo.pb.h"
 
 namespace mindspore {
 namespace generator {
diff --git a/mindspore/ccsrc/pybind_api/CMakeLists.txt b/mindspore/ccsrc/pybind_api/CMakeLists.txt
index adcb5ddda1..d04d173f60 100644
--- a/mindspore/ccsrc/pybind_api/CMakeLists.txt
+++ b/mindspore/ccsrc/pybind_api/CMakeLists.txt
@@ -1,5 +1,2 @@
-file(GLOB_RECURSE _PYNATIVE_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "*.cc"
-        )
-
-add_library(_mindspore_pynative_obj OBJECT ${_PYNATIVE_ALL_SRC_FILES})
\ No newline at end of file
+file(GLOB_RECURSE _PYBIND_API_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+add_library(_mindspore_pybind_api_obj OBJECT ${_PYBIND_API_SRC_LIST})
\ No newline at end of file
diff --git a/mindspore/ccsrc/pynative/CMakeLists.txt b/mindspore/ccsrc/pynative/CMakeLists.txt
index adcb5ddda1..9536986147 100644
--- a/mindspore/ccsrc/pynative/CMakeLists.txt
+++ b/mindspore/ccsrc/pynative/CMakeLists.txt
@@ -1,5 +1,8 @@
-file(GLOB_RECURSE _PYNATIVE_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "*.cc"
-        )
-
-add_library(_mindspore_pynative_obj OBJECT ${_PYNATIVE_ALL_SRC_FILES})
\ No newline at end of file
+file(GLOB_RECURSE _PYNATIVE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "base.cc" "pynative_execute.cc")
+
+if (ENABLE_GE)
+    file(GLOB_RECURSE _GE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "pynative_execute_ge.cc")
+    list(APPEND _PYNATIVE_SRC_LIST ${_GE_SRC_LIST})
+endif ()
+
+add_library(_mindspore_pynative_obj OBJECT ${_PYNATIVE_SRC_LIST})
diff --git a/mindspore/ccsrc/session/CMakeLists.txt b/mindspore/ccsrc/session/CMakeLists.txt
index 2e685b04f4..66495626eb 100644
--- a/mindspore/ccsrc/session/CMakeLists.txt
+++ b/mindspore/ccsrc/session/CMakeLists.txt
@@ -1,22 +1,22 @@
-file(GLOB_RECURSE _SESSION_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "kernel_graph.cc"
-        "session_basic.cc"
-        "session_factory.cc"
-        "anf_runtime_algorithm.cc"
+file(GLOB_RECURSE _SESSION_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+    "kernel_graph.cc"
+    "session_basic.cc"
+    "session_factory.cc"
+    "anf_runtime_algorithm.cc"
+)
+
+if (ENABLE_GPU)
+    file(GLOB_RECURSE _GPU_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+        "gpu_session.cc"
         )
-#TODO :  Not include session_context.cc
-add_library(_mindspore_session_obj OBJECT ${_SESSION_ALL_SRC_FILES})
+    list(APPEND _SESSION_SRC_LIST ${_GPU_SRC_LIST})
+endif ()
 
-if(ENABLE_D)
-        file(GLOB_RECURSE _D_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "ascend_session.cc"
-            )
-        add_library(_mindspore_session_obj  OBJECT ${_D_SRC_LIST})
-endif()
+if (ENABLE_D)
+    file(GLOB_RECURSE _D_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+        "ascend_session.cc"
+        )
+    list(APPEND _SESSION_SRC_LIST ${_D_SRC_LIST})
+endif ()
 
-if(ENABLE_GPU)
-        file(GLOB_RECURSE _C_EXPRESSION_GPU_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-            "gpu_session.cc"
-            )
-        add_library(_c_expression_gpu_session_obj OBJECT ${_C_EXPRESSION_GPU_SRC_LIST})
-endif()
\ No newline at end of file
+add_library(_mindspore_session_obj OBJECT ${_SESSION_SRC_LIST})
\ No newline at end of file
diff --git a/mindspore/ccsrc/transform/CMakeLists.txt b/mindspore/ccsrc/transform/CMakeLists.txt
index 718f53f627..d1b70c000d 100644
--- a/mindspore/ccsrc/transform/CMakeLists.txt
+++ b/mindspore/ccsrc/transform/CMakeLists.txt
@@ -1,5 +1,8 @@
-file(GLOB_RECURSE _TRANSFORM_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "*.cc"
-        )
+if (ENABLE_GE OR ENABLE_D)
+    file(GLOB_RECURSE _TRANSFORM_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+    add_library(_mindspore_transform_obj OBJECT ${_TRANSFORM_SRC_LIST})
 
-add_library(_mindspore_transform_obj OBJECT ${_TRANSFORM_ALL_SRC_FILES})
+    if (NOT ENABLE_GE)
+        target_compile_definitions(_mindspore_transform_obj PRIVATE NO_GE_CLIENT)
+    endif()
+endif ()
diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
index e057b26f02..c9a1b21941 100644
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -125,6 +125,7 @@ const char kNameSplitD[] = "Split";
 const char kNameBatchToSpaceNd[] = "BatchToSpaceNd";
 const char kNameFloor[] = "Floor";
 const char kNameNPUGetFloatStatus[] = "NPUGetFloatStatus";
+const char kNameAssign[] = "Assign";
 const char kNameAssignAdd[] = "AssignAdd";
 const char kNameAssignSub[] = "AssignSub";
 const char kNameNPUAllocFloatStatus[] = "NPUAllocFloatStatus";
@@ -1155,8 +1156,7 @@ void DfGraphConvertor::SetOpControlInput(const AnfNodePtr node) {
   }
 }
 
-const std::vector<std::string> trans_var_list = {prim::kPrimAssign->name(), string(kNameAssignAdd),
-                                                 string(kNameAssignSub)};
+const std::vector<std::string> trans_var_list = {string(kNameAssign), string(kNameAssignAdd), string(kNameAssignSub)};
 
 void DfGraphConvertor::SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node) {
   OperatorPtr src = Convert(node);
diff --git a/mindspore/ccsrc/utils/CMakeLists.txt b/mindspore/ccsrc/utils/CMakeLists.txt
index 1fcf5e0944..52d03c3723 100644
--- a/mindspore/ccsrc/utils/CMakeLists.txt
+++ b/mindspore/ccsrc/utils/CMakeLists.txt
@@ -1,3 +1,8 @@
-file(GLOB_RECURSE _UTILS_ALL_SRC_FILES *.cc)
-#TODO :  "utils/node_utils.cc" 
-add_library(_mindspore_utils_obj OBJECT ${_UTILS_ALL_SRC_FILES})
+file(GLOB_RECURSE _UTILS_SRC_LIST ./*.cc)
+
+if (NOT ENABLE_GE)
+    file(GLOB_RECURSE _UTILS_GE_SRC_FILES ./callbacks_ge.cc)
+    list(REMOVE_ITEM _UTILS_SRC_LIST ${_UTILS_GE_SRC_FILES})
+endif ()
+
+add_library(_mindspore_utils_obj OBJECT ${_UTILS_SRC_LIST})
diff --git a/mindspore/ccsrc/vm/CMakeLists.txt b/mindspore/ccsrc/vm/CMakeLists.txt
index 2df984a29f..c5408e683e 100644
--- a/mindspore/ccsrc/vm/CMakeLists.txt
+++ b/mindspore/ccsrc/vm/CMakeLists.txt
@@ -1,5 +1,2 @@
-file(GLOB_RECURSE _VM_ALL_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "*.cc"
-        )
-
-add_library(_mindspore_vm_obj OBJECT ${_VM_ALL_SRC_FILES})
\ No newline at end of file
+file(GLOB_RECURSE _VM_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
+add_library(_mindspore_vm_obj OBJECT ${_VM_SRC_LIST})

From 3cb692bea10e477deffdbe631bc88fc30f93622e Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Sun, 26 Apr 2020 17:57:12 +0800
Subject: [PATCH 099/242] modify resnet101 scripts for pylint

---
 example/resnet101_imagenet/README.md       |  3 -
 example/resnet101_imagenet/config.py       |  3 -
 example/resnet101_imagenet/lr_generator.py | 60 -----------------
 example/resnet101_imagenet/train.py        | 17 ++---
 example/resnet101_imagenet/var_init.py     | 76 ++++++++++++----------
 mindspore/model_zoo/resnet.py              |  2 +-
 6 files changed, 49 insertions(+), 112 deletions(-)

diff --git a/example/resnet101_imagenet/README.md b/example/resnet101_imagenet/README.md
index bc653675f2..d5729b70db 100644
--- a/example/resnet101_imagenet/README.md
+++ b/example/resnet101_imagenet/README.md
@@ -54,9 +54,6 @@ Parameters for both training and evaluating can be set in config.py.
 "save_checkpoint_steps": 500,     # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step
 "keep_checkpoint_max": 40,        # only keep the last keep_checkpoint_max checkpoint
 "save_checkpoint_path": "./",     # path to save checkpoint relative to the executed path
-"lr_init": 0.01,                  # initial learning rate
-"lr_end": 0.00001,                # final learning rate
-"lr_max": 0.1,                    # maximum learning rate
 "warmup_epochs": 0,               # number of warmup epoch
 "lr_decay_mode": "cosine"         # decay mode for generating learning rate
 "label_smooth": 1,                # label_smooth
diff --git a/example/resnet101_imagenet/config.py b/example/resnet101_imagenet/config.py
index 0ad37c8678..ca58f24da3 100755
--- a/example/resnet101_imagenet/config.py
+++ b/example/resnet101_imagenet/config.py
@@ -31,9 +31,6 @@ config = ed({
     "save_checkpoint_steps": 500,
     "keep_checkpoint_max": 40,
     "save_checkpoint_path": "./",
-    "lr_init": 0.01,
-    "lr_end": 0.00001,
-    "lr_max": 0.1,
     "warmup_epochs": 0,
     "lr_decay_mode": "cosine",
     "label_smooth": 1,
diff --git a/example/resnet101_imagenet/lr_generator.py b/example/resnet101_imagenet/lr_generator.py
index 67ff1fef25..88cb85cc5b 100755
--- a/example/resnet101_imagenet/lr_generator.py
+++ b/example/resnet101_imagenet/lr_generator.py
@@ -50,63 +50,3 @@ def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch):
             lr = base_lr * decayed
         lr_each_step.append(lr)
     return np.array(lr_each_step).astype(np.float32)
-
-def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
-    """
-    generate learning rate array
-
-    Args:
-       global_step(int): total steps of the training
-       lr_init(float): init learning rate
-       lr_end(float): end learning rate
-       lr_max(float): max learning rate
-       warmup_epochs(int): number of warmup epochs
-       total_epochs(int): total epoch of training
-       steps_per_epoch(int): steps of one epoch
-       lr_decay_mode(string): learning rate decay mode, including steps, poly or default
-
-    Returns:
-       np.array, learning rate array
-    """
-    lr_each_step = []
-    total_steps = steps_per_epoch * total_epochs
-    warmup_steps = steps_per_epoch * warmup_epochs
-    if lr_decay_mode == 'steps':
-        decay_epoch_index = [0.3 * total_steps, 0.6 * total_steps, 0.8 * total_steps]
-        for i in range(total_steps):
-            if i < decay_epoch_index[0]:
-                lr = lr_max
-            elif i < decay_epoch_index[1]:
-                lr = lr_max * 0.1
-            elif i < decay_epoch_index[2]:
-                lr = lr_max * 0.01
-            else:
-                lr = lr_max * 0.001
-            lr_each_step.append(lr)
-    elif lr_decay_mode == 'poly':
-        if warmup_steps != 0:
-            inc_each_step = (float(lr_max) - float(lr_init)) / float(warmup_steps)
-        else:
-            inc_each_step = 0
-        for i in range(total_steps):
-            if i < warmup_steps:
-                lr = float(lr_init) + inc_each_step * float(i)
-            else:
-                base = (1.0 - (float(i) - float(warmup_steps)) / (float(total_steps) - float(warmup_steps)))
-                lr = float(lr_max) * base * base
-                if lr < 0.0:
-                    lr = 0.0
-            lr_each_step.append(lr)
-    else:
-        for i in range(total_steps):
-            if i < warmup_steps:
-                lr = lr_init + (lr_max - lr_init) * i / warmup_steps
-            else:
-                lr = lr_max - (lr_max - lr_end) * (i - warmup_steps) / (total_steps - warmup_steps)
-            lr_each_step.append(lr)
-
-    current_step = global_step
-    lr_each_step = np.array(lr_each_step).astype(np.float32)
-    learning_rate = lr_each_step[current_step:]
-
-    return learning_rate
diff --git a/example/resnet101_imagenet/train.py b/example/resnet101_imagenet/train.py
index 37f49ec3d7..0f20637595 100755
--- a/example/resnet101_imagenet/train.py
+++ b/example/resnet101_imagenet/train.py
@@ -19,7 +19,7 @@ import argparse
 import random
 import numpy as np
 from dataset import create_dataset
-from lr_generator import get_lr, warmup_cosine_annealing_lr
+from lr_generator import warmup_cosine_annealing_lr
 from config import config
 from mindspore import context
 from mindspore import Tensor
@@ -32,9 +32,9 @@ from mindspore.train.loss_scale_manager import FixedLossScaleManager
 import mindspore.dataset.engine as de
 from mindspore.communication.management import init
 import mindspore.nn as nn
+import mindspore.common.initializer as weight_init
 from crossentropy import CrossEntropy
 from var_init import default_recurisive_init, KaimingNormal
-import mindspore.common.initializer as weight_init
 
 random.seed(1)
 np.random.seed(1)
@@ -72,7 +72,7 @@ if __name__ == '__main__':
     net = resnet101(class_num=config.class_num)
     # weight init
     default_recurisive_init(net)
-    for name, cell in net.cells_and_names():
+    for _, cell in net.cells_and_names():
         if isinstance(cell, nn.Conv2d):
             cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5),
                                                                               mode='fan_out', nonlinearity='relu'),
@@ -83,17 +83,12 @@ if __name__ == '__main__':
     loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
     if args_opt.do_train:
         dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
-                repeat_num=epoch_size, batch_size=config.batch_size)
+                                 repeat_num=epoch_size, batch_size=config.batch_size)
         step_size = dataset.get_dataset_size()
         loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
 
-        # learning rate strategy
-        if config.lr_decay_mode == 'cosine':
-            lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size))
-        else:
-            lr = Tensor(get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
-                               warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size,
-                               lr_decay_mode='poly'))
+        # learning rate strategy with cosine
+        lr = Tensor(warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size))
         opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
                        config.weight_decay, config.loss_scale)
         model = Model(net, loss_fn=loss, optimizer=opt, amp_level='O2', keep_batchnorm_fp32=False,
diff --git a/example/resnet101_imagenet/var_init.py b/example/resnet101_imagenet/var_init.py
index 061ec94fbf..34d8664a49 100755
--- a/example/resnet101_imagenet/var_init.py
+++ b/example/resnet101_imagenet/var_init.py
@@ -18,10 +18,10 @@ import numpy as np
 from mindspore.common import initializer as init
 import mindspore.nn as nn
 from mindspore import Tensor
- 
+
 def calculate_gain(nonlinearity, param=None):
     r"""Return the recommended gain value for the given nonlinearity function.
-    The values are as follows: 
+    The values are as follows:
     ================= ====================================================
     nonlinearity      gain
     ================= ====================================================
@@ -37,12 +37,13 @@ def calculate_gain(nonlinearity, param=None):
         param: optional parameter for the non-linear function
     """
     linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
+    gain = 0
     if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
-        return 1
+        gain = 1
     elif nonlinearity == 'tanh':
-        return 5.0 / 3
+        gain = 5.0 / 3
     elif nonlinearity == 'relu':
-        return math.sqrt(2.0)
+        gain = math.sqrt(2.0)
     elif nonlinearity == 'leaky_relu':
         if param is None:
             negative_slope = 0.01
@@ -51,15 +52,16 @@ def calculate_gain(nonlinearity, param=None):
             negative_slope = param
         else:
             raise ValueError("negative_slope {} not a valid number".format(param))
-        return math.sqrt(2.0 / (1 + negative_slope ** 2))
+        gain = math.sqrt(2.0 / (1 + negative_slope ** 2))
     else:
         raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
-    
+    return gain
+
 def _calculate_correct_fan(array, mode):
     mode = mode.lower()
     valid_modes = ['fan_in', 'fan_out']
     if mode not in valid_modes:
-        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes)) 
+        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
     fan_in, fan_out = _calculate_fan_in_and_fan_out(array)
     return fan_in if mode == 'fan_in' else fan_out
 
@@ -83,13 +85,12 @@ def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
             backwards pass.
         nonlinearity: the non-linear function (`nn.functional` name),
             recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
-    """ 
+    """
     fan = _calculate_correct_fan(array, mode)
     gain = calculate_gain(nonlinearity, a)
     std = gain / math.sqrt(fan)
     bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
     return np.random.uniform(-bound, bound, array.shape)
- 
 
 def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     r"""Fills the input `Tensor` with values according to the method
@@ -97,12 +98,10 @@ def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     performance on ImageNet classification` - He, K. et al. (2015), using a
     normal distribution. The resulting tensor will have values sampled from
     :math:`\mathcal{N}(0, \text{std}^2)` where
- 
     .. math::
         \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
-
     Also known as He initialization.
- 
+
     Args:
         array: an n-dimensional `tensor`
         a: the negative slope of the rectifier used after this layer (only
@@ -118,13 +117,12 @@ def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
     gain = calculate_gain(nonlinearity, a)
     std = gain / math.sqrt(fan)
     return np.random.normal(0, std, array.shape)
- 
+
 def _calculate_fan_in_and_fan_out(array):
     """calculate the fan_in and fan_out for input array"""
     dimensions = len(array.shape)
     if dimensions < 2:
         raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions")
- 
     num_input_fmaps = array.shape[1]
     num_output_fmaps = array.shape[0]
     receptive_field_size = 1
@@ -132,19 +130,30 @@ def _calculate_fan_in_and_fan_out(array):
         receptive_field_size = array[0][0].size
     fan_in = num_input_fmaps * receptive_field_size
     fan_out = num_output_fmaps * receptive_field_size
- 
     return fan_in, fan_out
- 
+
+def assignment(arr, num):
+    """Assign the value of num to arr"""
+    if arr.shape == ():
+        arr = arr.reshape((1))
+        arr[:] = num
+        arr = arr.reshape(())
+    else:
+        if isinstance(num, np.ndarray):
+            arr[:] = num[:]
+        else:
+            arr[:] = num
+    return arr
+
 class KaimingUniform(init.Initializer):
     def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
         super(KaimingUniform, self).__init__()
         self.a = a
         self.mode = mode
         self.nonlinearity = nonlinearity
- 
     def _initialize(self, arr):
         tmp = kaiming_uniform_(arr, self.a, self.mode, self.nonlinearity)
-        init._assignment(arr, tmp) 
+        assignment(arr, tmp)
 
 class KaimingNormal(init.Initializer):
     def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
@@ -152,33 +161,32 @@ class KaimingNormal(init.Initializer):
         self.a = a
         self.mode = mode
         self.nonlinearity = nonlinearity
- 
     def _initialize(self, arr):
         tmp = kaiming_normal_(arr, self.a, self.mode, self.nonlinearity)
-        init._assignment(arr, tmp)
+        assignment(arr, tmp)
 
 def default_recurisive_init(custom_cell):
     """weight init for conv2d and dense"""
-    for name, cell in custom_cell.cells_and_names():
+    for _, cell in custom_cell.cells_and_names():
         if isinstance(cell, nn.Conv2d):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), 
-                    cell.weight.default_input.shape(), 
-                    cell.weight.default_input.dtype())
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)),
+                                                         cell.weight.default_input.shape(),
+                                                         cell.weight.default_input.dtype())
             if cell.bias is not None:
                 fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
                 bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, 
-                    cell.bias.default_input.shape()), 
-                    cell.bias.default_input.dtype())
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound,
+                                                                   cell.bias.default_input.shape()),
+                                                 cell.bias.default_input.dtype())
         elif isinstance(cell, nn.Dense):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), 
-                    cell.weight.default_input.shape(), 
-                    cell.weight.default_input.dtype())
+            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)),
+                                                         cell.weight.default_input.shape(),
+                                                         cell.weight.default_input.dtype())
             if cell.bias is not None:
                 fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
                 bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, 
-                    cell.bias.default_input.shape()), 
-                    cell.bias.default_input.dtype())
+                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound,
+                                                                   cell.bias.default_input.shape()),
+                                                 cell.bias.default_input.dtype())
         elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)):
             pass
diff --git a/mindspore/model_zoo/resnet.py b/mindspore/model_zoo/resnet.py
index d67f26814c..3055026718 100755
--- a/mindspore/model_zoo/resnet.py
+++ b/mindspore/model_zoo/resnet.py
@@ -279,4 +279,4 @@ def resnet101(class_num=1001):
                   [64, 256, 512, 1024],
                   [256, 512, 1024, 2048],
                   [1, 2, 2, 2],
-                  class_num)
\ No newline at end of file
+                  class_num)

From 8c3931cf1d2a9e40a21319d5fd7e4c88905c9b3b Mon Sep 17 00:00:00 2001
From: Zirui Wu <zirui.wu@huawei.com>
Date: Sun, 26 Apr 2020 10:07:30 -0400
Subject: [PATCH 100/242] fix first epoch always shuffle with default seed in
 random sampler

fix ut
---
 .../dataset/engine/datasetops/source/sampler/random_sampler.cc  | 2 +-
 tests/ut/python/dataset/test_datasets_celeba.py                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
index de8cde409f..e0efda6e53 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
@@ -53,6 +53,7 @@ Status RandomSampler::InitSampler() {
   num_samples_ = (user_num_samples_ < num_samples_) ? user_num_samples_ : num_samples_;
   CHECK_FAIL_RETURN_UNEXPECTED(num_samples_ > 0 && num_rows_ > 0, "both num_samples & num_rows need to be positive");
   samples_per_buffer_ = samples_per_buffer_ > num_samples_ ? num_samples_ : samples_per_buffer_;
+  rnd_.seed(seed_++);
   if (replacement_ == false) {
     shuffled_ids_.reserve(num_rows_);
     for (int64_t i = 0; i < num_rows_; i++) {
@@ -62,7 +63,6 @@ Status RandomSampler::InitSampler() {
   } else {
     dist = std::make_unique<std::uniform_int_distribution<int64_t>>(0, num_rows_ - 1);
   }
-  rnd_.seed(seed_++);
   return Status::OK();
 }
 
diff --git a/tests/ut/python/dataset/test_datasets_celeba.py b/tests/ut/python/dataset/test_datasets_celeba.py
index 6b8859f433..11c5fcb67a 100644
--- a/tests/ut/python/dataset/test_datasets_celeba.py
+++ b/tests/ut/python/dataset/test_datasets_celeba.py
@@ -20,7 +20,7 @@ DATA_DIR = "../data/dataset/testCelebAData/"
 
 
 def test_celeba_dataset_label():
-    data = ds.CelebADataset(DATA_DIR, decode=True)
+    data = ds.CelebADataset(DATA_DIR, decode=True, shuffle=False)
     expect_labels = [
         [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
          0, 0, 1],

From e227415673330bf49cda36870d413e2e5f417bdb Mon Sep 17 00:00:00 2001
From: Xiaoda Zhang <zhangxiaoda@huawei.com>
Date: Sun, 26 Apr 2020 15:33:17 +0800
Subject: [PATCH 101/242] support-the-multiple-subgraphs-in-the-ANF

---
 .../parallel/auto_parallel/graph_costmodel.h  |   1 +
 mindspore/ccsrc/parallel/costmodel_context.cc |   2 +
 mindspore/ccsrc/parallel/costmodel_context.h  |   5 +
 .../ccsrc/parallel/step_auto_parallel.cc      | 104 ++++++++++++++----
 mindspore/ccsrc/parallel/step_auto_parallel.h |   4 +-
 mindspore/ccsrc/pipeline/action.cc            |   6 +-
 mindspore/ccsrc/pipeline/init.cc              |   2 +
 mindspore/parallel/_cost_model_context.py     |  29 +++++
 .../test_auto_parallel_double_subgraphs.py    | 101 +++++++++++++++++
 .../parallel/test_auto_parallel_two_bn.py     |  70 ++++++++++++
 10 files changed, 298 insertions(+), 26 deletions(-)
 create mode 100644 tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
 create mode 100644 tests/ut/python/parallel/test_auto_parallel_two_bn.py

diff --git a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
index 530f67ba45..31de9f4456 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
@@ -44,6 +44,7 @@ namespace parallel {
 #define DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE 16
 #define DEFAULT_FULLY_USE_DEVICES true
 #define DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW false
+#define DEFAULT_IS_MULTI_SUBGRAPHS false
 
 class CostGraph;
 using CostGraphPtr = std::shared_ptr<CostGraph>;
diff --git a/mindspore/ccsrc/parallel/costmodel_context.cc b/mindspore/ccsrc/parallel/costmodel_context.cc
index 82b260f967..591fa737aa 100644
--- a/mindspore/ccsrc/parallel/costmodel_context.cc
+++ b/mindspore/ccsrc/parallel/costmodel_context.cc
@@ -46,6 +46,7 @@ void CostModelContext::ResetCostModel() {
   costmodel_communi_threshold_ = DEFAULT_COST_MODEL_COMMUNI_THRESHOLD;
   costmodel_communi_const_ = DEFAULT_COST_MODEL_COMMUNI_CONST;
   costmodel_communi_bias_ = DEFAULT_COST_MODEL_COMMUNI_BIAS;
+  is_multi_subgraphs_ = DEFAULT_IS_MULTI_SUBGRAPHS;
   costmodel_allreduce_fusion_algorithm_ = DEFAULT_COST_MODEL_ALLREDUCE_FUSION_ALGORITHM;
   costmodel_allreduce_fusion_times_ = DEFAULT_COST_MODEL_ALLREDUCE_FUSION_TIMES;
   costmodel_allreduce_fusion_tail_percent_ = DEFAULT_COST_MODEL_ALLREDUCE_FUSION_TAIL_PERCENT;
@@ -84,6 +85,7 @@ void CostModelContext::set_costmodel_communi_const(double cm_communi_const) {
 
 void CostModelContext::set_costmodel_communi_bias(double cm_communi_bias) { costmodel_communi_bias_ = cm_communi_bias; }
 
+void CostModelContext::set_multi_subgraphs(bool multi_graphs) { is_multi_subgraphs_ = multi_graphs; }
 void CostModelContext::set_costmodel_allreduce_fusion_algorithm(int32_t algorithm) {
   costmodel_allreduce_fusion_algorithm_ = algorithm;
 }
diff --git a/mindspore/ccsrc/parallel/costmodel_context.h b/mindspore/ccsrc/parallel/costmodel_context.h
index 9937483051..ebb0d00008 100644
--- a/mindspore/ccsrc/parallel/costmodel_context.h
+++ b/mindspore/ccsrc/parallel/costmodel_context.h
@@ -67,6 +67,9 @@ class CostModelContext {
   void set_costmodel_communi_bias(double);
   double costmodel_communi_bias() const { return costmodel_communi_bias_; }
 
+  void set_multi_subgraphs(bool);
+  bool is_multi_subgraphs() const { return is_multi_subgraphs_; }
+
   void set_costmodel_allreduce_fusion_algorithm(int32_t);
   int32_t costmodel_allreduce_fusion_algorithm() const { return costmodel_allreduce_fusion_algorithm_; }
 
@@ -138,6 +141,8 @@ class CostModelContext {
   // COST_MODEL_COMMUNI_BIAS
   double costmodel_communi_bias_;
 
+  bool is_multi_subgraphs_;
+
   int32_t costmodel_allreduce_fusion_algorithm_;
 
   int32_t costmodel_allreduce_fusion_times_;
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc
index 7d37bafe98..269e624efa 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc
@@ -426,13 +426,13 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &
   return operator_info;
 }
 
-Status ConstructCostGraphNodes(const std::vector<AnfNodePtr> &all_nodes, const FuncGraphPtr &) {
+// Using CNode's UniqueIds to construct nodes
+Status ConstructCostGraphNodesByUniqueId(const std::vector<AnfNodePtr> &all_nodes, const FuncGraphPtr &) {
   MS_LOG(INFO) << "Constructing nodes for cost graph begins.";
   entire_costgraph = std::make_shared<CostGraph>();
   entire_costgraph->SetDeviceMemoryAndCostParameter();
-  bool new_operator = true, first_operator = true;
-  std::string first_operator_cnode;
-  size_t current_op_index = 0;
+  // The map from CNode's UniqueId to its operatorInfo
+  std::map<std::string, OperatorInfoPtr> from_cnode_to_info;
 
   // Step 1
   for (auto &node : all_nodes) {
@@ -449,12 +449,8 @@ Status ConstructCostGraphNodes(const std::vector<AnfNodePtr> &all_nodes, const F
     PrimitivePtr prim = GetValueNode<PrimitivePtr>(prim_anf_node);
     MS_EXCEPTION_IF_NULL(prim);
 
-    // When visiting the second subgraph, use the corresponding operatorInfo which already created
-    bool modify_new_operator = (new_operator) && (!first_operator) && (cnode->UniqueId() == first_operator_cnode);
-    if (modify_new_operator) {
-      new_operator = false;
-    }
-    if (new_operator) {
+    auto search_cnode = from_cnode_to_info.find(cnode->UniqueId());
+    if (search_cnode == from_cnode_to_info.end()) {
       auto operator_info = CreateTheOperatorInfo(prim, cnode);
       if (operator_info == nullptr) {
         return FAILED;
@@ -465,14 +461,67 @@ Status ConstructCostGraphNodes(const std::vector<AnfNodePtr> &all_nodes, const F
 
       entire_costgraph->AddOperator(operator_info);
       (void)cnode->set_operator_info(operator_info);
-      if (first_operator) {
-        first_operator_cnode = cnode->UniqueId();
-        first_operator = false;
+      MS_LOG(INFO) << "The CNode with UniqueId: " << cnode->UniqueId()
+                   << " and UniqueIdThroughCopy: " << cnode->UniqueIdThroughCopy()
+                   << " is set OperatorInfo: " << operator_info->name() << ", Primitive: " << prim->name();
+      (void)from_cnode_to_info.emplace(std::make_pair(cnode->UniqueIdThroughCopy(), operator_info));
+      // Needed by rec_parser
+      entire_costgraph->add_inputs_tensor_name(inputs_tensor_name);
+    } else {
+      // Two CNODEs' UniqueIds should not be equal
+      MS_LOG(EXCEPTION) << "The CNode with UniqueId: " << cnode->UniqueId()
+                        << " and UniqueIdThroughCopy: " << cnode->UniqueIdThroughCopy()
+                        << " is set OperatorInfo: " << search_cnode->second->name() << ", Primitive: " << prim->name();
+    }
+  }
+
+  MS_LOG(INFO) << "Constructing nodes for cost graph ends.";
+  return SUCCESS;
+}
+
+// Using CNode's UniqueIdThroughCopys to construct nodes
+Status ConstructCostGraphNodesByUniqueIdTC(const std::vector<AnfNodePtr> &all_nodes, const FuncGraphPtr &) {
+  MS_LOG(INFO) << "Constructing nodes for cost graph begins.";
+  entire_costgraph = std::make_shared<CostGraph>();
+  entire_costgraph->SetDeviceMemoryAndCostParameter();
+  // The map from CNode's UniqueIdThroughCopy to its operatorInfo
+  std::map<std::string, OperatorInfoPtr> from_cnode_to_info;
+
+  for (auto &node : all_nodes) {
+    // NOTE: we only care about splittable Primitive operators
+    auto cnode = node->cast<CNodePtr>();
+    bool bool_result = (cnode == nullptr) || (!IsValueNode<Primitive>(cnode->input(0)));
+    if (bool_result) {
+      continue;
+    }
+    ValueNodePtr prim_anf_node = cnode->input(0)->cast<ValueNodePtr>();
+    if (!IsAutoParallelCareNode(cnode)) {
+      continue;
+    }
+    PrimitivePtr prim = GetValueNode<PrimitivePtr>(prim_anf_node);
+
+    // Find the operatorInfo if it exists
+    auto search_cnode = from_cnode_to_info.find(cnode->UniqueIdThroughCopy());
+    if (search_cnode == from_cnode_to_info.end()) {
+      // In this case, the corresponding OperatorInfo is not created, create the new one.
+      auto operator_info = CreateTheOperatorInfo(prim, cnode);
+      if (operator_info == nullptr) {
+        return FAILED;
       }
       // Needed by rec_parser
+      operator_info->set_type(prim->name());
+      std::vector<std::string> inputs_tensor_name = ExtractInputsTensorName(cnode);
+
+      entire_costgraph->AddOperator(operator_info);
+      (void)cnode->set_operator_info(operator_info);
+      MS_LOG(INFO) << "The CNode with UniqueId: " << cnode->UniqueId()
+                   << " and UniqueIdThroughCopy: " << cnode->UniqueIdThroughCopy()
+                   << " is set OperatorInfo: " << operator_info->name() << ", Primitive: " << prim->name();
+      (void)from_cnode_to_info.emplace(std::make_pair(cnode->UniqueIdThroughCopy(), operator_info));
+      // Needed by rec_parser
       entire_costgraph->add_inputs_tensor_name(inputs_tensor_name);
     } else {
-      auto current_op_ptr = entire_costgraph->FindOperatorByIndex(current_op_index);
+      auto current_op_ptr = search_cnode->second;
       if (current_op_ptr == nullptr) {
         MS_LOG(EXCEPTION) << "Find " << prim->name() << " from CostGraph failed.";
       } else {
@@ -484,14 +533,12 @@ Status ConstructCostGraphNodes(const std::vector<AnfNodePtr> &all_nodes, const F
                             << " does not match the Prim: " << prim->name();
         }
         (void)cnode->set_operator_info(current_op_ptr);
-        current_op_index++;
+        MS_LOG(INFO) << "The CNode with UniqueId: " << cnode->UniqueId()
+                     << " and UniqueIdThroughCopy: " << cnode->UniqueIdThroughCopy()
+                     << " is set OperatorInfo: " << current_op_ptr->name() << ", Primitive: " << prim->name();
       }
     }
   }
-  if ((!new_operator) && (current_op_index != entire_costgraph->GetOperators().size())) {
-    MS_LOG(EXCEPTION) << "The second subgraph's operator number: " << current_op_index
-                      << " does not match the first ones: " << entire_costgraph->GetOperators().size();
-  }
 
   MS_LOG(INFO) << "Constructing nodes for cost graph ends.";
   return SUCCESS;
@@ -844,11 +891,20 @@ Status ParallelStrategySearch(const std::vector<AnfNodePtr> &all_nodes, const Fu
   // OUTPUT: the determined strategy for each operator.
 
   // Step 1
-  if (ConstructCostGraphNodes(all_nodes, root) == SUCCESS) {
-    MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " << entire_costgraph->GetOperators().size()
-                 << " operators.";
+  if (CostModelContext::GetInstance()->is_multi_subgraphs()) {
+    if (ConstructCostGraphNodesByUniqueIdTC(all_nodes, root) == SUCCESS) {
+      MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are "
+                   << entire_costgraph->GetOperators().size() << " operators.";
+    } else {
+      MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed.";
+    }
   } else {
-    MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed.";
+    if (ConstructCostGraphNodesByUniqueId(all_nodes, root) == SUCCESS) {
+      MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are "
+                   << entire_costgraph->GetOperators().size() << " operators.";
+    } else {
+      MS_LOG(EXCEPTION) << "Constructing nodes for cost graph failed.";
+    }
   }
 
   // Step 2
@@ -916,7 +972,7 @@ std::vector<std::vector<std::string>> RecInputTensorNames(const std::map<std::st
 }
 
 Status ParallelStrategyRecSearch(const std::vector<AnfNodePtr> &all_nodes, const FuncGraphPtr &root) {
-  if (ConstructCostGraphNodes(all_nodes, root) == SUCCESS) {
+  if (ConstructCostGraphNodesByUniqueId(all_nodes, root) == SUCCESS) {
     MS_LOG(INFO) << "Constructing nodes for cost graph succeeded. There are " << entire_costgraph->GetOperators().size()
                  << " operators.";
   } else {
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.h b/mindspore/ccsrc/parallel/step_auto_parallel.h
index f120edcc61..fff9dfa4c3 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.h
+++ b/mindspore/ccsrc/parallel/step_auto_parallel.h
@@ -43,7 +43,9 @@ std::vector<size_t> ExtractInputTypeLengthByNode(const CNodePtr &node);
 
 std::vector<TypePtr> ExtractOutputTypeByNode(const CNodePtr &node);
 
-Status ConstructCostGraphNodes(const std::vector<AnfNodePtr> &all_nodes, const FuncGraphPtr &root);
+Status ConstructCostGraphNodesByUniqueId(const std::vector<AnfNodePtr> &all_nodes, const FuncGraphPtr &root);
+
+Status ConstructCostGraphNodesByUniqueIdTC(const std::vector<AnfNodePtr> &all_nodes, const FuncGraphPtr &root);
 
 void ConstructCostGraphEdges(const std::vector<AnfNodePtr> &all_nodes);
 
diff --git a/mindspore/ccsrc/pipeline/action.cc b/mindspore/ccsrc/pipeline/action.cc
index e8723e66a4..778600dc0a 100644
--- a/mindspore/ccsrc/pipeline/action.cc
+++ b/mindspore/ccsrc/pipeline/action.cc
@@ -24,6 +24,7 @@
 #include <functional>
 
 #include "ir/func_graph_cloner.h"
+#include "parallel/costmodel_context.h"
 #include "pipeline/pass.h"
 #include "pipeline/parse/parse_base.h"
 #include "pipeline/parse/data_converter.h"
@@ -341,7 +342,10 @@ static std::vector<ActionItem> CommonPipeline() {
 
   // Resolve the python func
   actions.emplace_back(std::make_pair("symbol_resolve", SymbolResolveAction));
-  actions.emplace_back(std::make_pair("combine_like_graphs", CombineLikeGraphs));
+  auto multi_graphs = parallel::CostModelContext::GetInstance()->is_multi_subgraphs();
+  if (!multi_graphs) {
+    actions.emplace_back(std::make_pair("combine_like_graphs", CombineLikeGraphs));
+  }
   actions.emplace_back(std::make_pair("inference_opt_prepare", InferenceOptPrepareAction));
   // Evaluate type and shape, and specialize
   actions.emplace_back(std::make_pair("abstract_specialize", AbstractSpecializeAction));
diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc
index 04e6edc5c8..868255a359 100644
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/init.cc
@@ -222,6 +222,8 @@ PYBIND11_MODULE(_c_expression, m) {
          "Set the parameter cost_model_communi_bias of the DP algorithm.")
     .def("get_costmodel_communi_bias", &CostModelContext::costmodel_communi_bias,
          "Get the parameter cost_model_communi_bias of the DP algorithm.")
+    .def("set_multi_subgraphs", &CostModelContext::set_multi_subgraphs, "Set the parameter is_multi_subgraphs.")
+    .def("get_multi_subgraphs", &CostModelContext::is_multi_subgraphs, "Get the parameter is_multi_subgraphs.")
     .def("set_costmodel_allreduce_fusion_algorithm", &CostModelContext::set_costmodel_allreduce_fusion_algorithm,
          "Set the parameter gradient AllReduce fusion algorithm.")
     .def("get_costmodel_allreduce_fusion_algorithm", &CostModelContext::costmodel_allreduce_fusion_algorithm,
diff --git a/mindspore/parallel/_cost_model_context.py b/mindspore/parallel/_cost_model_context.py
index 54cca5516b..2790aed855 100644
--- a/mindspore/parallel/_cost_model_context.py
+++ b/mindspore/parallel/_cost_model_context.py
@@ -214,6 +214,31 @@ class _CostModelContext:
             raise ValueError("Context handle is none in context!!!")
         return self._context_handle.get_costmodel_communi_bias()
 
+    def set_multi_subgraphs(self, multi_subgraph):
+        """
+        Set the flag of ANF graph containing multiple subgraphs.
+
+        Args:
+            multi_subgraph (bool): A parameter used in marking the multi-subgraphs flag.
+
+        Raises:
+            ValueError: If context handle is none.
+        """
+        if self._context_handle is None:
+            raise ValueError("Context handle is none in context!!!")
+        self._context_handle.set_multi_subgraphs(multi_subgraph)
+
+    def get_multi_subgraphs(self):
+        """
+        Get the flag of ANF graph containing multiple subgraphs.
+
+        Raises:
+            ValueError: If context handle is none.
+        """
+        if self._context_handle is None:
+            raise ValueError("Context handle is none in context!!!")
+        return self._context_handle.get_multi_subgraphs()
+
     def set_costmodel_allreduce_fusion_algorithm(self, algorithm):
         """
         Set costmodel allreduce fusion algorithm.
@@ -427,6 +452,7 @@ set_cost_model_context_func_map = {
     "costmodel_communi_threshold": cost_model_context().set_costmodel_communi_threshold,
     "costmodel_communi_const": cost_model_context().set_costmodel_communi_const,
     "costmodel_communi_bias": cost_model_context().set_costmodel_communi_bias,
+    "multi_subgraphs": cost_model_context().set_multi_subgraphs,
     "costmodel_allreduce_fusion_algorithm": cost_model_context().set_costmodel_allreduce_fusion_algorithm,
     "costmodel_allreduce_fusion_times": cost_model_context().set_costmodel_allreduce_fusion_times,
     "costmodel_allreduce_fusion_tail_percent": cost_model_context().set_costmodel_allreduce_fusion_tail_percent,
@@ -447,6 +473,7 @@ get_cost_model_context_func_map = {
     "costmodel_communi_threshold": cost_model_context().get_costmodel_communi_threshold,
     "costmodel_communi_const": cost_model_context().get_costmodel_communi_const,
     "costmodel_communi_bias": cost_model_context().get_costmodel_communi_bias,
+    "multi_subgraphs": cost_model_context().get_multi_subgraphs(),
     "costmodel_allreduce_fusion_algorithm": cost_model_context().get_costmodel_allreduce_fusion_algorithm,
     "costmodel_allreduce_fusion_times": cost_model_context().get_costmodel_allreduce_fusion_times,
     "costmodel_allreduce_fusion_tail_percent": cost_model_context().get_costmodel_allreduce_fusion_tail_percent,
@@ -461,6 +488,7 @@ get_cost_model_context_func_map = {
 
 @args_type_check(device_memory_capacity=float, costmodel_alpha=float, costmodel_beta=float, costmodel_gamma=float,
                  costmodel_communi_threshold=float, costmodel_communi_const=float, costmodel_communi_bias=float,
+                 multi_subgraphs=bool,
                  costmodel_allreduce_fusion_algorithm=int, costmodel_allreduce_fusion_times=int,
                  costmodel_allreduce_fusion_tail_percent=float, costmodel_allreduce_fusion_tail_time=float,
                  costmodel_allreduce_fusion_allreduce_inherent_time=float,
@@ -481,6 +509,7 @@ def set_cost_model_context(**kwargs):
         costmodel_communi_threshold (float): A parameter used in adjusting communication calculation for practice.
         costmodel_communi_const (float): A parameter used in adjusting communication calculation for practice.
         costmodel_communi_bias (float): A parameter used in adjusting communication calculation for practice.
+        multi_subgraphs (bool): A parameter used in marking the flag of ANF graph containing multiple subgraphs.
         costmodel_allreduce_fusion_algorithm (int): The allreduce fusion algorithm.
             0: bypass allreduce fusion;
             1: only use backward computation time to group allreduce;
diff --git a/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
new file mode 100644
index 0000000000..5e8c89de25
--- /dev/null
+++ b/tests/ut/python/parallel/test_auto_parallel_double_subgraphs.py
@@ -0,0 +1,101 @@
+import numpy as np
+from mindspore import context
+import mindspore as ms
+import mindspore.nn as nn
+from mindspore.nn.optim import Adam, FTRL
+from mindspore.ops import operations as P
+from mindspore.ops import functional as F
+from mindspore import Tensor, Parameter, ParameterTuple
+from mindspore.ops import composite as C
+from mindspore.parallel import _cost_model_context as cost_model_context
+from mindspore.common.api import _executor
+from mindspore.parallel import set_algo_parameters, get_algo_parameters, reset_algo_parameters
+from mindspore.parallel._utils import _reset_op_id as reset_op_id
+
+class Net(nn.Cell):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.mul = P.Mul()
+        self.relu = P.ReLU()
+        self.wd = Parameter(Tensor(np.ones([8, 8, 8, 8]).astype(np.float32)), name="wide")
+        self.wt = Parameter(Tensor(np.ones([8, 8, 8, 8]).astype(np.float32)), name="l")
+    def construct(self, x):
+        out = self.mul(x, self.wd)
+        out = self.mul(out, self.wt)
+        out = self.relu(out)
+        return out
+
+class NetWithLoss(nn.Cell):
+    def __init__(self, network):
+        super(NetWithLoss, self).__init__()
+        self.sum = P.ReduceSum()
+        self.mean = P.ReduceMean()
+        self.net = network
+
+    def construct(self, x):
+        predict = self.net(x)
+        loss1 = self.sum(predict, -1)
+        loss2 = self.mean(predict, -1)
+        return loss1, loss2
+
+class IthOutputCell(nn.Cell):
+    def __init__(self, network, output_index):
+        super(IthOutputCell, self).__init__()
+        self.network = network
+        self.output_index = output_index
+
+    def construct(self, x):
+        predict = self.network(x)[self.output_index]
+        return predict
+
+class TrainStepWarp(nn.Cell):
+    def __init__(self, network, sens=1000.0):
+        super(TrainStepWarp, self).__init__()
+        self.network = network
+        self.network.set_train()
+        self.trainable_params = network.trainable_params()
+        weights_w = []
+        weights_d = []
+        for params in self.trainable_params:
+            weights_w.append(params)
+            weights_d.append(params)
+        self.weights_w = ParameterTuple(weights_w)
+        self.weights_d = ParameterTuple(weights_d)
+        self.optimizer_w = FTRL(learning_rate=1e-2, params=self.weights_w, l1=1e-8,
+                                l2=1e-8, initial_accum=1.0)
+        self.optimizer_d = Adam(self.weights_d, learning_rate=3.5e-4, eps=1e-8,
+                                loss_scale=sens)
+        self.hyper_map = C.HyperMap()
+        self.grad_w = C.GradOperation('grad_w', get_by_list=True, sens_param=True)
+        self.grad_d = C.GradOperation('grad_d', get_by_list=True, sens_param=True)
+        self.sens = sens
+        self.loss_net_w = IthOutputCell(network, output_index=0)
+        self.loss_net_d = IthOutputCell(network, output_index=1)
+
+    def construct(self, x):
+        weights_w = self.weights_w
+        weights_d = self.weights_d
+        loss_w, loss_d = self.network(x)
+        sens_w = P.Fill()(P.DType()(loss_w), P.Shape()(loss_w), self.sens)
+        sens_d = P.Fill()(P.DType()(loss_d), P.Shape()(loss_d), self.sens)
+        grads_w = self.grad_w(self.loss_net_w, weights_w)(x, sens_w)
+        grads_d = self.grad_d(self.loss_net_d, weights_d)(x, sens_d)
+        return F.depend(loss_w, self.optimizer_w(grads_w)), F.depend(loss_d, self.optimizer_d(grads_d))
+
+def test_double_subgraphs():
+    cost_model_context.set_cost_model_context(multi_subgraphs=True)
+    context.set_context(save_graphs=True)
+    context.set_auto_parallel_context(device_num=8, global_rank=0)
+    net = TrainStepWarp(NetWithLoss(Net()))
+    context.set_auto_parallel_context(parallel_mode="auto_parallel")
+
+    x = Tensor(np.ones([8, 8, 8, 8]), dtype=ms.float32)
+    reset_op_id()
+    _executor.compile(net, x, phase='train')
+    strategies = _executor._get_strategy(net)
+    expected_strategies = {'Default/network-NetWithLoss/ReduceMean-op0': [[8, 1, 1, 1]],
+                           'Default/network-NetWithLoss/net-Net/ReLU-op1': [[8, 1, 1, 1]],
+                           'Default/network-NetWithLoss/net-Net/Mul-op2': [[8, 1, 1, 1], [8, 1, 1, 1]],
+                           'Default/network-NetWithLoss/net-Net/Mul-op3': [[8, 1, 1, 1], [8, 1, 1, 1]],
+                           'Default/network-NetWithLoss/ReduceSum-op4': [[8, 1, 1, 1]]}
+    assert strategies == expected_strategies
diff --git a/tests/ut/python/parallel/test_auto_parallel_two_bn.py b/tests/ut/python/parallel/test_auto_parallel_two_bn.py
new file mode 100644
index 0000000000..8eb6074f9f
--- /dev/null
+++ b/tests/ut/python/parallel/test_auto_parallel_two_bn.py
@@ -0,0 +1,70 @@
+import numpy as np
+from mindspore import context
+import mindspore as ms
+import mindspore.nn as nn
+from mindspore.ops import operations as P
+from mindspore import Tensor
+from mindspore.common.api import _executor
+from tests.ut.python.ops.test_math_ops import VirtualLoss
+from mindspore.parallel import set_algo_parameters
+from mindspore.parallel._utils import _reset_op_id as reset_op_id
+import re
+
+class NetWithLoss(nn.Cell):
+    def __init__(self, network):
+        super(NetWithLoss, self).__init__()
+        self.loss = VirtualLoss()
+        self.network = network
+
+    def construct(self, x):
+        predict = self.network(x)
+        return self.loss(predict)
+
+class Blockcell(nn.Cell):
+    def __init__(self):
+        super(Blockcell, self).__init__()
+        self.bn = nn.BatchNorm2d(64, momentum=0.9)
+
+    def construct(self, x):
+        out = self.bn(x)
+        return out
+
+def getBlock():
+    return Blockcell()
+
+def test_two_bn():
+    class Net(nn.Cell):
+        def __init__(self):
+            super().__init__()
+            self.block1 = getBlock()
+            self.block2 = getBlock()
+            self.relu = P.ReLU()
+            self.add = P.TensorAdd()
+            self.bias = Tensor(np.ones([64, 64]), dtype=ms.float32)
+
+        def construct(self, x):
+            out = self.block1(x)
+            out = self.relu(out)
+            out = self.add(out, self.bias)
+            out = self.block2(out)
+            return out
+
+    net = NetWithLoss(Net())
+    x = Tensor(np.ones([64, 64]), dtype=ms.float32)
+    context.set_context(save_graphs=True)
+    context.set_auto_parallel_context(device_num=8, global_rank=0)
+    context.set_auto_parallel_context(parallel_mode="auto_parallel")
+    set_algo_parameters(elementwise_op_strategy_follow=True)
+    reset_op_id()
+
+    _executor.compile(net, x, phase='train')
+    strategies = _executor._get_strategy(net)
+    assert len(strategies) == 4
+
+    for (k, v) in strategies.items():
+        if re.search('BatchNorm-op', k) is not None:
+            assert v == [[8, 1], [1], [1], [1], [1]]
+        elif re.search('TensorAdd-op', k) is not None:
+            assert v == [[8, 1], [8, 1]]
+        elif re.search('ReLU-op', k) is not None:
+            assert v == [[8, 1]]

From aacc85caecc53db7d4ff61ea76545f958ff47b96 Mon Sep 17 00:00:00 2001
From: WeibiaoYu <weibiao.yu@huawei.com>
Date: Sun, 26 Apr 2020 22:01:27 -0400
Subject: [PATCH 102/242] not supporte to do auto saving intergrated checkpoint
 files in manual mode parallel

---
 mindspore/train/callback.py      |  4 ++--
 mindspore/train/serialization.py | 37 --------------------------------
 2 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/mindspore/train/callback.py b/mindspore/train/callback.py
index b9635acc62..d14a1fab28 100644
--- a/mindspore/train/callback.py
+++ b/mindspore/train/callback.py
@@ -150,8 +150,8 @@ class CheckpointConfig:
         keep_checkpoint_max (int): Maximum step to save checkpoint. Default: 5.
         keep_checkpoint_per_n_minutes (int): Keep one checkpoint every n minutes. Default: 0.
             Can't be used with keep_checkpoint_max at the same time.
-        integrated_save (bool): Whether to intergrated save in automatic model parall scene. Default: True.
-            Integrated save function is only supported in automatic parall scene, not supported in manual parallel.
+        integrated_save (bool): Whether to intergrated save in automatic model parallel scene. Default: True.
+            Integrated save function is only supported in automatic parallel scene, not supported in manual parallel.
 
     Raises:
         ValueError: If the input_param is None or 0.
diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py
index ae17bf8116..74aa2c2253 100644
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -225,15 +225,6 @@ def load_param_into_net(net, parameter_dict):
         raise TypeError(msg)
 
     logger.info("Execute load parameter into net process.")
-    for name in parameter_dict:
-        for _, param in net.parameters_and_names():
-            if name == param.name and param.layerwise_parallel:
-                # layerwise parallel parameter data loaded from checkpoint file,
-                # was a complete(merged) data, need to be splited
-                new_param = parameter_dict[param.name]
-                _load_tensor_for_layerwise(new_param, param)
-                break
-
     param_not_load = []
     for _, param in net.parameters_and_names():
         if param.name in parameter_dict:
@@ -363,34 +354,6 @@ def _get_merged_param_data(net, param_name, param_data):
     return param_data
 
 
-def _load_tensor_for_layerwise(new_param, old_param):
-    """
-    Replaces parameters with sliced tensors by layerwise parallel strategies.
-
-    Args:
-        new_param (Parameter): The new layerwise parallel parameter, will be loaded into net.
-        old_param(Parameter): The current parameter in the net.
-    """
-    if not isinstance(new_param.data, Tensor) or not isinstance(old_param.data, Tensor):
-        logger.error("Failed to combine the net and the parameters.")
-        msg = ("layerwise parallel parameter should be a Tensor, but got {}.".format(type(new_param.data)))
-        raise TypeError(msg)
-
-    if old_param.data.shape() == new_param.data.shape():
-        return
-
-    from mindspore.parallel._tensor import _load_tensor
-    from mindspore.communication.management import get_group_size
-    dev_mat = [get_group_size()]
-    shape = new_param.data.shape()
-    for x in range(len(shape)):  # dim 0 set 0, others set -1
-        if x:
-            tensor_map.append(-1)
-
-    new_tensor = _load_tensor(new_param.data, dev_mat, tensor_map)
-    new_param.set_parameter_data(new_tensor)
-
-
 def _fill_param_into_net(net, parameter_list):
     """
     Fills parameter_list into net.

From ddc558fd722b48d4ded2c1a7a2722546513c926e Mon Sep 17 00:00:00 2001
From: "wangnan39@huawei.com" <wangnan39@huawei.com>
Date: Sun, 26 Apr 2020 20:59:15 +0800
Subject: [PATCH 103/242] fix weight decay error in optimizer AdamWeightDecay

---
 mindspore/nn/optim/adam.py | 60 +++++++++++++++++++++++++++++---------
 mindspore/nn/optim/lamb.py | 14 +++++----
 mindspore/nn/optim/sgd.py  |  8 ++++-
 3 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index 9ae1431247..055eaae7c6 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -31,8 +31,8 @@ _learning_rate_update_func = ['linear', 'cos', 'sin']
 adam_opt = C.MultitypeFuncGraph("adam_opt")
 
 
-@adam_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor")
-def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, gradient):
+@adam_opt.register("Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Bool")
+def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, gradient, decay_flag):
     """
     Update parameters.
 
@@ -67,7 +67,8 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, grad
     next_v = op_mul(beta2, v) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta2, op_square(gradient))
 
     update = next_m / (op_sqrt(next_v) + eps)
-    update = update + op_mul(weight_decay_tensor, param)
+    if decay_flag:
+        update = update + op_mul(weight_decay_tensor, param)
 
     update_with_lr = op_mul(lr, update)
     next_param = param - op_reshape(update_with_lr, op_shape(param))
@@ -90,6 +91,17 @@ def _check_param_value(beta1, beta2, eps, weight_decay, prim_name):
     validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, prim_name)
 
 
+def _check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, prim_name):
+    """Check the type of inputs."""
+    validator.check_float_positive('learning_rate', learning_rate, prim_name)
+    validator.check_float_legal_value('learning_rate', learning_rate, prim_name)
+    validator.check_float_positive('end_learning_rate', end_learning_rate, prim_name)
+    validator.check_float_legal_value('end_learning_rate', end_learning_rate, prim_name)
+    validator.check_float_positive('power', power, prim_name)
+    validator.check_float_legal_value('power', power, prim_name)
+    validator.check_integer('decay_steps', decay_steps, 0, Rel.GT, prim_name)
+
+
 @adam_opt.register("Function", "Tensor", "Tensor", "Tensor", "Tensor", "Tensor", "Number", "Tensor", "Tensor", "Tensor",
                    "Tensor")
 def _run_opt_with_one_number(opt, lr, beta1_power, beta2_power, beta1, beta2, eps, gradient, params, moment1,
@@ -126,8 +138,13 @@ class Adam(Optimizer):
     Args:
         params (list[Parameter]): A list of parameter, which will be updated. The element in `params`
                                   should be class mindspore.Parameter.
-        learning_rate (Union[float, Tensor, Iterable]): The Learning rate.
-            Iterable type is used for the dynamic learning rate.
+        learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
+                                                        Iterable or a Tensor and the dims of the Tensor is 1,
+                                                        use dynamic learning rate, then the i-th step will
+                                                        take the i-th value as the learning rate.
+                                                        When the learning_rate is float or learning_rate is a Tensor
+                                                        but the dims of the Tensor is 0, use fixed learning rate.
+                                                        Other cases are not supported. Default: 1e-3.
         beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0).
         beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0).
         eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
@@ -140,6 +157,8 @@ class Adam(Optimizer):
         weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
         loss_scale (float): A floating point value for the loss scale. Default: 1.0.
             Should be equal to or greater than 1.
+        decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default:
+                                 lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@@ -207,7 +226,13 @@ class AdamWeightDecay(Optimizer):
     Args:
         params (list[Parameter]): A list of parameter, which will be updated. The element in `params`
                                   should be class mindspore.Parameter.
-        learning_rate (float): A floating point value for the learning rate. Default: 1e-3.
+        learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
+                                                        Iterable or a Tensor and the dims of the Tensor is 1,
+                                                        use dynamic learning rate, then the i-th step will
+                                                        take the i-th value as the learning rate.
+                                                        When the learning_rate is float or learning_rate is a Tensor
+                                                        but the dims of the Tensor is 0, use fixed learning rate.
+                                                        Other cases are not supported. Default: 1e-3.
         beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9.
             Should be in range (0.0, 1.0).
         beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999.
@@ -215,6 +240,8 @@ class AdamWeightDecay(Optimizer):
         eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6.
             Should be greater than 0.
         weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
+        decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default:
+                                 lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@@ -228,10 +255,10 @@ class AdamWeightDecay(Optimizer):
         >>> optim = nn.AdamWeightDecay(params=net.trainable_params())
         >>> model = Model(net, loss_fn=loss, optimizer=optim, metrics=None)
    """
-    def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0):
+    def __init__(self, params, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-6, weight_decay=0.0,
+                 decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
         super(AdamWeightDecay, self).__init__(learning_rate, params)
         _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name)
-        self.lr = Tensor(np.array([learning_rate]).astype(np.float32))
         self.beta1 = Tensor(np.array([beta1]).astype(np.float32))
         self.beta2 = Tensor(np.array([beta2]).astype(np.float32))
         self.eps = Tensor(np.array([eps]).astype(np.float32))
@@ -240,13 +267,15 @@ class AdamWeightDecay(Optimizer):
         self.params = self.parameters
         self.moments1 = self.params.clone(prefix="adam_m", init='zeros')
         self.moments2 = self.params.clone(prefix="adam_v", init='zeros')
+        self.decay_flag = tuple(decay_filter(x) for x in self.params)
 
         self.hyper_map = C.HyperMap()
 
     def construct(self, gradients):
-        updated_velocity = self.hyper_map(F.partial(adam_opt, self.beta1, self.beta2, self.eps, self.lr,
+        lr = self.get_lr()
+        updated_velocity = self.hyper_map(F.partial(adam_opt, self.beta1, self.beta2, self.eps, lr,
                                                     self.weight_decay_tensor),
-                                          self.params, self.moments1, self.moments2, gradients)
+                                          self.params, self.moments1, self.moments2, gradients, self.decay_flag)
 
         return updated_velocity
 
@@ -269,6 +298,8 @@ class AdamWeightDecayDynamicLR(Optimizer):
         eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6.
             Should be greater than 0.
         weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
+        decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default:
+                                 lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
@@ -291,10 +322,11 @@ class AdamWeightDecayDynamicLR(Optimizer):
                  beta1=0.9,
                  beta2=0.999,
                  eps=1e-6,
-                 weight_decay=0.0):
+                 weight_decay=0.0,
+                 decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
         super(AdamWeightDecayDynamicLR, self).__init__(learning_rate, params)
         _check_param_value(beta1, beta2, eps, weight_decay, self.cls_name)
-
+        _check_learning_rate_value(learning_rate, end_learning_rate, decay_steps, power, self.cls_name)
         # turn them to scalar when me support scalar/tensor mix operations
         self.global_step = Parameter(initializer(0, [1]), name="global_step")
         self.decay_steps = Tensor(np.array([decay_steps]).astype(np.float32))
@@ -308,7 +340,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
         self.params = self.parameters
         self.moments1 = self.params.clone(prefix="adam_m", init='zeros')
         self.moments2 = self.params.clone(prefix="adam_v", init='zeros')
-
+        self.decay_flag = tuple(decay_filter(x) for x in self.params)
         self.hyper_map = C.HyperMap()
         self.min = P.Minimum()
         self.pow = P.Pow()
@@ -320,7 +352,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
         lr = self.diff_learning_rate * self.pow(self.one - p, self.power) + self.end_learning_rate
         updated_velocity = self.hyper_map(F.partial(adam_opt, self.beta1, self.beta2, self.eps, lr,
                                                     self.weight_decay_tensor),
-                                          self.params, self.moments1, self.moments2, gradients)
+                                          self.params, self.moments1, self.moments2, gradients, self.decay_flag)
 
         added_global_step = self.global_step + self.one
         F.control_depend(lr, added_global_step)
diff --git a/mindspore/nn/optim/lamb.py b/mindspore/nn/optim/lamb.py
index afcbf8cda4..01ec984453 100755
--- a/mindspore/nn/optim/lamb.py
+++ b/mindspore/nn/optim/lamb.py
@@ -112,16 +112,18 @@ def _check_param_value(decay_steps, warmup_steps, start_learning_rate,
                        end_learning_rate, power, beta1, beta2, eps, weight_decay, prim_name):
 
     """Check the type of inputs."""
-    validator.check_value_type("decay_steps", decay_steps, [int], prim_name)
-    validator.check_value_type("warmup_steps", warmup_steps, [int], prim_name)
-    validator.check_value_type("start_learning_rate", start_learning_rate, [float], prim_name)
-    validator.check_value_type("end_learning_rate", end_learning_rate, [float], prim_name)
-    validator.check_value_type("power", power, [float], prim_name)
+    validator.check_float_positive('start_learning_rate', start_learning_rate, prim_name)
+    validator.check_float_legal_value('start_learning_rate', start_learning_rate, prim_name)
+    validator.check_float_positive('end_learning_rate', end_learning_rate, prim_name)
+    validator.check_float_legal_value('end_learning_rate', end_learning_rate, prim_name)
+    validator.check_float_positive('power', power, prim_name)
+    validator.check_float_legal_value('power', power, prim_name)
+    validator.check_integer('decay_steps', decay_steps, 0, Rel.GT, prim_name)
+    validator.check_integer('warmup_steps', decay_steps, 0, Rel.GT, prim_name)
     validator.check_value_type("beta1", beta1, [float], prim_name)
     validator.check_value_type("beta2", beta2, [float], prim_name)
     validator.check_value_type("eps", eps, [float], prim_name)
     validator.check_value_type("weight_dacay", weight_decay, [float], prim_name)
-    validator.check_number_range("decay_steps", decay_steps, 1, float("inf"), Rel.INC_LEFT, prim_name)
     validator.check_number_range("beta1", beta1, 0.0, 1.0, Rel.INC_NEITHER, prim_name)
     validator.check_number_range("beta2", beta2, 0.0, 1.0, Rel.INC_NEITHER, prim_name)
     validator.check_number_range("eps", eps, 0.0, float("inf"), Rel.INC_NEITHER, prim_name)
diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py
index cda5aa904a..bf2ed21d50 100755
--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@@ -42,7 +42,13 @@ class SGD(Optimizer):
     Args:
         params (list[Parameter]): A list of parameter, which will be updated. The element in `params`
                                   should be class mindspore.Parameter.
-        learning_rate (float): A floating point value for the learning rate. Default: 0.1.
+        learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
+                                                        Iterable or a Tensor and the dims of the Tensor is 1,
+                                                        use dynamic learning rate, then the i-th step will
+                                                        take the i-th value as the learning rate.
+                                                        When the learning_rate is float or learning_rate is a Tensor
+                                                        but the dims of the Tensor is 0, use fixed learning rate.
+                                                        Other cases are not supported. Default: 0.1.
         momentum (float): A floating point value the momentum. Default: 0.
         dampening (float): A floating point value of dampening for momentum. Default: 0.
         weight_decay (float): Weight decay (L2 penalty). Default: 0.

From 81644a95dfa25cd5eebf5ac2b2478aaaf67a180a Mon Sep 17 00:00:00 2001
From: caifubi <caifubi1@huawei.com>
Date: Tue, 21 Apr 2020 10:11:20 +0800
Subject: [PATCH 104/242] change enable_loop_sink default value to True

---
 mindspore/ccsrc/utils/context/ms_context.cc | 3 ++-
 mindspore/context.py                        | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc
index 3a2de9ba0c..5e8fc48216 100644
--- a/mindspore/ccsrc/utils/context/ms_context.cc
+++ b/mindspore/ccsrc/utils/context/ms_context.cc
@@ -69,7 +69,6 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
   enable_task_sink_ = true;
   ir_fusion_flag_ = true;
   enable_hccl_ = false;
-  enable_loop_sink_ = false;
   enable_mem_reuse_ = true;
   enable_gpu_summary_ = true;
   precompile_only_ = false;
@@ -78,6 +77,7 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
   enable_dynamic_mem_pool_ = true;
   graph_memory_max_size_ = "0";
   variable_memory_max_size_ = "0";
+  enable_loop_sink_ = target == kAscendDevice || target == kDavinciDevice;
   MS_LOG(DEBUG) << "Create context with backend policy:" << policy << ", device target:" << target << ".";
 }
 
@@ -134,6 +134,7 @@ bool MsContext::set_device_target(const std::string &target) {
   } else {
     device_target_ = target;
   }
+  enable_loop_sink_ = device_target_ == kAscendDevice;
   MS_LOG(INFO) << "ms set context device target:" << target;
   return true;
 }
diff --git a/mindspore/context.py b/mindspore/context.py
index 311937fb9b..237b2143ed 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -516,7 +516,7 @@ def set_context(**kwargs):
         enable_ir_fusion (bool): Whether to enable ir fusion. Default: True.
         save_graphs (bool): Whether to save graphs. Default: False.
         enable_hccl (bool): Whether to enable hccl. Default: False.
-        enable_loop_sink (bool): Whether to enable loop sink. Default: False.
+        enable_loop_sink (bool): Whether to enable loop sink. Default: True.
         enable_task_sink (bool): Whether to enable task sink. Default: True.
         enable_mem_reuse (bool): Whether to enable memory reuse. Default: True.
         save_ms_model (bool): Whether to save lite model converted by graph. Default: False.

From 0f64f63ce26df4d146a28ac039333d1b82d1f46b Mon Sep 17 00:00:00 2001
From: fary86 <fary.fanrui@huawei.com>
Date: Mon, 27 Apr 2020 10:17:05 +0800
Subject: [PATCH 105/242] Fix error type of inputs of ge graph is not tensor

---
 mindspore/ccsrc/pipeline/pipeline_ge.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/pipeline/pipeline_ge.cc b/mindspore/ccsrc/pipeline/pipeline_ge.cc
index 4a7328d325..c442fba931 100644
--- a/mindspore/ccsrc/pipeline/pipeline_ge.cc
+++ b/mindspore/ccsrc/pipeline/pipeline_ge.cc
@@ -465,7 +465,7 @@ void ProcessGeArg(const std::map<std::string, ExecutorInfoPtr> &info, const py::
       if (converted->isa<tensor::Tensor>()) {
         inputs->push_back(converted->cast<tensor::TensorPtr>());
       } else {
-        MS_LOG(EXCEPTION) << "Args " << converted->ToString() << " is not tensor";
+        MS_EXCEPTION(TypeError) << "Args " << converted->ToString() << " is not tensor";
       }
     }
   }

From d51cbb6e33b27524d9dfa62bc1705656f4d8ab8b Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Sun, 26 Apr 2020 22:25:48 -0400
Subject: [PATCH 106/242] fix doc problems

---
 mindspore/nn/layer/normalization.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index 09a0b4bb27..644604d604 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -324,7 +324,7 @@ class GlobalBatchNorm(_BatchNorm):
 
     Args:
         num_features (int): `C` from an expected input of size (N, C, H, W).
-        device_num_each_group (int): The number of device in each group.
+        device_num_each_group (int): The number of devices in each group.
         eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
         momentum (float): A floating hyperparameter of the momentum for the
             running_mean and running_var computation. Default: 0.9.
@@ -350,7 +350,7 @@ class GlobalBatchNorm(_BatchNorm):
         Tensor, the normalized, scaled, offset tensor, of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
 
     Examples:
-        >>> global_bn_op = nn.GlobalBatchNorm(num_features=3, group=4)
+        >>> global_bn_op = nn.GlobalBatchNorm(num_features=3, device_num_each_group=4)
         >>> input = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32)
         >>> global_bn_op(input)
     """

From fd511d0729f5c3e6636884c5680767e7020cc5d7 Mon Sep 17 00:00:00 2001
From: caojian05 <caojian5@huawei.com>
Date: Fri, 24 Apr 2020 18:46:09 +0800
Subject: [PATCH 107/242] add distribute train for vgg16

---
 example/vgg16_cifar10/dataset.py              |  6 ++-
 example/vgg16_cifar10/run_distribute_train.sh | 53 +++++++++++++++++++
 example/vgg16_cifar10/train.py                | 29 +++++++---
 3 files changed, 80 insertions(+), 8 deletions(-)
 create mode 100755 example/vgg16_cifar10/run_distribute_train.sh

diff --git a/example/vgg16_cifar10/dataset.py b/example/vgg16_cifar10/dataset.py
index 4e82beb2e3..e8dfd777e6 100644
--- a/example/vgg16_cifar10/dataset.py
+++ b/example/vgg16_cifar10/dataset.py
@@ -28,7 +28,11 @@ def create_dataset(data_home, repeat_num=1, training=True):
     data_dir = os.path.join(data_home, "cifar-10-batches-bin")
     if not training:
         data_dir = os.path.join(data_home, "cifar-10-verify-bin")
-    data_set = ds.Cifar10Dataset(data_dir)
+
+    rank_size = int(os.environ.get("RANK_SIZE")) if os.environ.get("RANK_SIZE") else None
+    rank_id = int(os.environ.get("RANK_ID")) if os.environ.get("RANK_ID") else None
+    data_set = ds.Cifar10Dataset(data_dir, num_shards=rank_size, shard_id=rank_id)
+
     resize_height = cfg.image_height
     resize_width = cfg.image_width
     rescale = 1.0 / 255.0
diff --git a/example/vgg16_cifar10/run_distribute_train.sh b/example/vgg16_cifar10/run_distribute_train.sh
new file mode 100755
index 0000000000..c9b8dfc48f
--- /dev/null
+++ b/example/vgg16_cifar10/run_distribute_train.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 2 ]
+then 
+    echo "Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]"
+exit 1
+fi
+
+if [ ! -f $1 ]
+then 
+    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
+exit 1
+fi 
+
+if [ ! -d $2 ]
+then 
+    echo "error: DATA_PATH=$2 is not a directory"
+exit 1
+fi 
+
+ulimit -u unlimited
+export DEVICE_NUM=8
+export RANK_SIZE=8
+export MINDSPORE_HCCL_CONFIG_PATH=$1
+
+for((i=0; i<${DEVICE_NUM}; i++))
+do
+    export DEVICE_ID=$i
+    export RANK_ID=$i
+    rm -rf ./train_parallel$i
+    mkdir ./train_parallel$i
+    cp *.py ./train_parallel$i
+    cp *.sh ./train_parallel$i
+    cd ./train_parallel$i || exit
+    echo "start training for rank $RANK_ID, device $DEVICE_ID"
+    env > env.log
+    python train.py --data_path=$2 --device_id=$i &> log &
+    cd ..
+done
diff --git a/example/vgg16_cifar10/train.py b/example/vgg16_cifar10/train.py
index 87cea2af03..234e3f7c7e 100644
--- a/example/vgg16_cifar10/train.py
+++ b/example/vgg16_cifar10/train.py
@@ -17,16 +17,18 @@
 python train.py --data_path=$DATA_HOME --device_id=$DEVICE_ID
 """
 import argparse
+import os
 import random
 import numpy as np
 import mindspore.nn as nn
 from mindspore import Tensor
+from mindspore.communication.management import init
 from mindspore.nn.optim.momentum import Momentum
-from mindspore.train.model import Model
+from mindspore.train.model import Model, ParallelMode
 from mindspore import context
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
 from mindspore.model_zoo.vgg import vgg16
-import dataset
+from dataset import create_dataset
 from config import cifar_cfg as cfg
 random.seed(1)
 np.random.seed(1)
@@ -62,18 +64,31 @@ if __name__ == '__main__':
 
     context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
     context.set_context(device_id=args_opt.device_id)
+    context.set_context(enable_task_sink=True)
+    context.set_context(enable_loop_sink=True)
     context.set_context(enable_mem_reuse=True, enable_hccl=False)
 
+    device_num = int(os.environ.get("DEVICE_NUM", 1))
+    if device_num > 1:
+        context.reset_auto_parallel_context()
+        context.set_context(enable_hccl=True)
+        context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                          mirror_mean=True)
+        init()
+
+    dataset = create_dataset(args_opt.data_path, cfg.epoch_size)
+    batch_num = dataset.get_dataset_size()
+
     net = vgg16(num_classes=cfg.num_classes)
-    lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=50000 // cfg.batch_size)
+    lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num)
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum, weight_decay=cfg.weight_decay)
     loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
     model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
                   amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)
 
-    dataset = dataset.create_dataset(args_opt.data_path, cfg.epoch_size)
-    batch_num = dataset.get_dataset_size()
     config_ck = CheckpointConfig(save_checkpoint_steps=batch_num * 5, keep_checkpoint_max=cfg.keep_checkpoint_max)
+    time_cb = TimeMonitor(data_size=batch_num)
     ckpoint_cb = ModelCheckpoint(prefix="train_vgg_cifar10", directory="./", config=config_ck)
     loss_cb = LossMonitor()
-    model.train(cfg.epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb])
+    model.train(cfg.epoch_size, dataset, callbacks=[time_cb, ckpoint_cb, loss_cb])
+    print("train success")

From 356262547404d3731ba3086972e4fe3c1bf47519 Mon Sep 17 00:00:00 2001
From: leonwanghui <wanghui71leon@gmail.com>
Date: Mon, 27 Apr 2020 11:24:13 +0800
Subject: [PATCH 108/242] Bump the version number to 0.2.0-alpha

---
 README.md                                   | 31 ++++----
 RELEASE.md                                  | 72 ++++++++++++++++++
 build.bat                                   | 12 +--
 build.sh                                    |  4 +-
 docker/README.md                            | 19 +++--
 docker/mindspore-cpu/0.2.0-alpha/Dockerfile | 67 +++++++++++++++++
 docker/mindspore-gpu/0.2.0-alpha/Dockerfile | 83 +++++++++++++++++++++
 setup.py                                    |  2 +-
 8 files changed, 257 insertions(+), 33 deletions(-)
 create mode 100644 docker/mindspore-cpu/0.2.0-alpha/Dockerfile
 create mode 100644 docker/mindspore-gpu/0.2.0-alpha/Dockerfile

diff --git a/README.md b/README.md
index 3de87d3fec..e0ca8a9417 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 ![MindSpore Logo](docs/MindSpore-logo.png "MindSpore logo")
 ============================================================
 
-- [What is MindSpore?](#what-is-mindspore)
+- [What Is MindSpore?](#what-is-mindspore)
     - [Automatic Differentiation](#automatic-differentiation)
     - [Automatic Parallel](#automatic-parallel)
 - [Installation](#installation)
@@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem.
 
 <img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/>
 
-For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.1.0-alpha/architecture.html).
+For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.2.0-alpha/architecture.html).
 
 ### Automatic Differentiation
 
@@ -76,7 +76,7 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.
 
     ```
-    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl
+    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
     ```
 
 2. Run the following command to verify the install.
@@ -96,20 +96,22 @@ currently the containerized build options are supported as follows:
 
 | Hardware Platform | Docker Image Repository | Tag | Description |
 | :---------------- | :---------------------- | :-- | :---------- |
-| CPU | `mindspore/mindspore-cpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` CPU release. |
+| CPU | `mindspore/mindspore-cpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` CPU release. |
 |  |  | `devel` | Development environment provided to build MindSpore (with `CPU` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
 |  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `CPU` backend. |
-| GPU | `mindspore/mindspore-gpu` | `0.1.0-alpha` | Production environment with pre-installed MindSpore `0.1.0-alpha` GPU release. |
+| GPU | `mindspore/mindspore-gpu` | `x.y.z` | Production environment with pre-installed MindSpore `x.y.z` GPU release. |
 |  |  | `devel` | Development environment provided to build MindSpore (with `GPU CUDA10.1` backend) from the source, refer to https://www.mindspore.cn/install/en for installation details. |
-|  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU` backend. |
+|  |  | `runtime` | Runtime environment provided to install MindSpore binary package with `GPU CUDA10.1` backend. |
 | Ascend | <center>&mdash;</center> | <center>&mdash;</center> | Coming soon. |
 
+> **NOTICE:** For GPU `devel` docker image, it's NOT suggested to directly install the whl package after building from the source, instead we strongly RECOMMEND you transfer and install the whl package inside GPU `runtime` docker image.
+
 * CPU
 
-    For `CPU` backend, you can directly pull and run the image using the below command:
+    For `CPU` backend, you can directly pull and run the latest stable image using the below command:
     ```
-    docker pull mindspore/mindspore-cpu:0.1.0-alpha
-    docker run -it mindspore/mindspore-cpu:0.1.0-alpha python -c 'import mindspore'
+    docker pull mindspore/mindspore-cpu:0.2.0-alpha
+    docker run -it mindspore/mindspore-cpu:0.2.0-alpha python -c 'import mindspore'
     ```
 
 * GPU
@@ -124,20 +126,21 @@ currently the containerized build options are supported as follows:
     sudo systemctl restart docker
     ```
 
-    Then you can pull and run the image using the below command:
+    Then you can pull and run the latest stable image using the below command:
     ```
-    docker pull mindspore/mindspore-gpu:0.1.0-alpha
-    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.1.0-alpha /bin/bash
+    docker pull mindspore/mindspore-gpu:0.2.0-alpha
+    docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.2.0-alpha /bin/bash
     ```
 
     To test if the docker image works, please execute the python code below and check the output:
     ```python
     import numpy as np
+    import mindspore.context as context
     from mindspore import Tensor
     from mindspore.ops import functional as F
-    import mindspore.context as context
 
     context.set_context(device_target="GPU")
+
     x = Tensor(np.ones([1,3,3,4]).astype(np.float32))
     y = Tensor(np.ones([1,3,3,4]).astype(np.float32))
     print(F.tensor_add(x, y))
@@ -161,7 +164,7 @@ please check out `docker` folder for the details.
 
 ## Quickstart
 
-See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.1.0-alpha/quick_start/quick_start.html)
+See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.2.0-alpha/quick_start/quick_start.html)
 to implement the image classification.
 
 ## Docs
diff --git a/RELEASE.md b/RELEASE.md
index ce9064e4b1..416efd824a 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,75 @@
+# Release 0.2.0-alpha
+
+## Major Features and Improvements
+
+### Ascend 910 Training and Inference Framework
+* New models
+    * MobileNetV2: Inverted Residuals and Linear Bottlenecks.
+    * ResNet101: Deep Residual Learning for Image Recognition.
+
+* Frontend and User Interface
+   * Support for all python comparison operators.
+   * Support for math operators **,//,%. Support for other python operators like and/or/not/is/is not/ in/ not in.
+   * Support for the gradients of function with variable arguments.
+   * Support for tensor indexing assignment for certain indexing type.
+   * Support for dynamic learning rate.
+   * User interfaces change log
+     * DepthwiseConv2dNative, DepthwiseConv2dNativeBackpropFilter, DepthwiseConv2dNativeBackpropInput([!424](https://gitee.com/mindspore/mindspore/pulls/424))
+     * ReLU6, ReLU6Grad([!224](https://gitee.com/mindspore/mindspore/pulls/224))
+     * GeneratorDataset([!183](https://gitee.com/mindspore/mindspore/pulls/183))
+     * VOCDataset([!477](https://gitee.com/mindspore/mindspore/pulls/477))
+     * MindDataset, PKSampler([!514](https://gitee.com/mindspore/mindspore/pulls/514))
+     * map([!506](https://gitee.com/mindspore/mindspore/pulls/506))
+     * Conv([!226](https://gitee.com/mindspore/mindspore/pulls/226))
+     * Adam([!253](https://gitee.com/mindspore/mindspore/pulls/253))
+     * _set_fusion_strategy_by_idx, _set_fusion_strategy_by_size([!189](https://gitee.com/mindspore/mindspore/pulls/189))
+     * CheckpointConfig([!122](https://gitee.com/mindspore/mindspore/pulls/122))
+     * Constant([!54](https://gitee.com/mindspore/mindspore/pulls/54))
+* Executor and Performance Optimization
+    * Support parallel execution of data prefetching and forward/backward computing.
+    * Support parallel execution of gradient aggregation and forward/backward computing in distributed training scenarios.
+    * Support operator fusion optimization.
+    * Optimize compilation process and improve the performance.
+* Data processing, augmentation, and save format
+    * Support multi-process of GeneratorDataset/PyFunc for high performance
+    * Support variable batchsize
+    * Support new Dataset operators, such as filter,skip,take,TextLineDataset
+
+### Other Hardware Support
+* GPU platform
+    * Use dynamic memory pool by default on GPU.
+    * Support parallel execution of computation and communication.
+    * Support continuous address allocation by memory pool.
+* CPU platform
+    * Support for windows 10 OS.
+
+## Bugfixes
+* Models
+    * Fix mixed precision bug for VGG16 model ([!629](https://gitee.com/mindspore/mindspore/pulls/629)).
+* Python API
+    * Fix ControlDepend operator bugs on CPU and GPU ([!396](https://gitee.com/mindspore/mindspore/pulls/396)).
+    * Fix ArgMinWithValue operator bugs ([!338](https://gitee.com/mindspore/mindspore/pulls/338)).
+    * Fix Dense operator bugs on PyNative mode ([!276](https://gitee.com/mindspore/mindspore/pulls/276)).
+    * Fix MatMul operator bugs on PyNative mode ([!288](https://gitee.com/mindspore/mindspore/pulls/288)).
+* Executor
+    * Fix operator selection bugs and make it general ([!300](https://gitee.com/mindspore/mindspore/pulls/300)).
+    * Fix memory reuse bug for GetNext op ([!291](https://gitee.com/mindspore/mindspore/pulls/291)).
+* GPU platform
+    * Fix memory allocation in multi-graph scenarios ([!444](https://gitee.com/mindspore/mindspore/pulls/444)).
+    * Fix bias_add_grad under fp16 precision ([!598](https://gitee.com/mindspore/mindspore/pulls/598)).
+    * Fix support for fp16 kernels on nvidia 1080Ti([!571](https://gitee.com/mindspore/mindspore/pulls/571)).
+    * Fix parsing of tuple type parameters ([!316](https://gitee.com/mindspore/mindspore/pulls/316)).
+* Data processing
+    * Fix TypeErrors about can't pickle mindspore._c_dataengine.DEPipeline objects([!434](https://gitee.com/mindspore/mindspore/pulls/434))
+    * Add TFRecord file verification([!406](https://gitee.com/mindspore/mindspore/pulls/406))
+
+## Contributors
+Thanks goes to these wonderful people:
+
+Alexey_Shevlyakov, Cathy, Chong, Hoai, Jonathan, Junhan, JunhanHu, Peilin, SanjayChan, StrawNoBerry, VectorSL, Wei, WeibiaoYu, Xiaoda, Yanjun, YuJianfeng, ZPaC, Zhang, ZhangQinghua, ZiruiWu, amongo, anthonyaje, anzhengqi, biffex, caifubi, candanzg, caojian05, casgj, cathwong, ch-l, chang, changzherui, chenfei, chengang, chenhaozhe, chenjianping, chentingting, chenzomi, chujinjin, dengwentao, dinghao, fanglei, fary86, flywind, gaojing, geekun, gengdongjie, ghzl, gong, gongchen, gukecai, guohongzilong, guozhijian, gziyan, h.farahat, hesham, huangdongrun, huanghui, jiangzhiwen, jinyaohui, jjfeing, jojobugfree, jonathan_yan, jonyguo, jzw, kingfo, kisnwang, laiyongqiang, leonwanghui, lianliguang, lichen, lichenever, limingqi107, liubuyu, liuxiao, liyong, liyong126, lizhenyu, lupengcheng, lvliang, maoweiyong, ms_yan, mxm, ougongchang, panfengfeng, panyifeng, pengyanjun, penn, qianlong, seatea, simson, suteng, thlinh, vlne-v1, wangchengke, wanghua, wangnan39, wangqiuliang, wenchunjiang, wenkai, wukesong, xiefangqi, xulei, yanghaitao, yanghaoran, yangjie159, yangzhenzhang, yankai10, yanzhenxiang2020, yao_yf, yoonlee666, zhangbuxue, zhangz0911gm, zhangzheng, zhaojichen, zhaoting, zhaozhenlong, zhongligeng, zhoufeng, zhousiyi, zjun, zyli2020, yuhuijun, limingqi107, lizhenyu, chenweifeng.
+
+Contributions of any kind are welcome!
+
 # Release 0.1.0-alpha
 
 ## Main Features
diff --git a/build.bat b/build.bat
index ddb2e8affe..4e875fa11a 100644
--- a/build.bat
+++ b/build.bat
@@ -14,27 +14,27 @@
 @rem ============================================================================
 @echo off
 @title mindspore_build
- 
+
 SET BASEPATH=%CD%
 IF NOT EXIST %BASEPATH%/build (
          md "build"
          )
- 
+
 cd %BASEPATH%/build
 SET BUILD_PATH=%CD%
- 
+
 IF NOT EXIST %BUILD_PATH%/mindspore (
          md "mindspore"
          )
- 
+
 cd %CD%/mindspore
- 
+
 cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_CPU=ON -DENABLE_MINDDATA=ON -DUSE_GLOG=ON -G "CodeBlocks - MinGW Makefiles" ../..
 IF NOT %errorlevel% == 0 (
     echo "cmake fail."
     goto run_fail
     )
- 
+
 IF "%1%" == "" (
     cmake --build . --target package -- -j6
     ) ELSE (
diff --git a/build.sh b/build.sh
index b48014ed93..0b60344980 100755
--- a/build.sh
+++ b/build.sh
@@ -433,9 +433,9 @@ build_predict()
 
     cd "${BASEPATH}/predict/output/"
     if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then
-      tar -cf MSPredict-0.1.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
+      tar -cf MSPredict-0.2.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
     elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then
-      tar -cf MSPredict-0.1.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
+      tar -cf MSPredict-0.2.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
     fi
     echo "success to build predict project!"
 }
diff --git a/docker/README.md b/docker/README.md
index c6851fe531..bceeef0cae 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -4,14 +4,13 @@ This folder hosts all the `Dockerfile` to build MindSpore container images with
 
 ### MindSpore docker build command
 
-* CPU
+| Hardware Platform | Version | Build Command |
+| :---------------- | :------ | :------------ |
+| CPU | `x.y.z` | cd mindspore-cpu/x.y.z && docker build . -t mindspore/mindspore-cpu:x.y.z |
+|  | `devel` | cd mindspore-cpu/devel && docker build . -t mindspore/mindspore-cpu:devel |
+|  | `runtime` | cd mindspore-cpu/runtime && docker build . -t mindspore/mindspore-cpu:runtime |
+| GPU | `x.y.z` | cd mindspore-gpu/x.y.z  && docker build . -t mindspore/mindspore-gpu:x.y.z  |
+|  | `devel` | cd mindspore-gpu/devel && docker build . -t mindspore/mindspore-gpu:devel |
+|  | `runtime` | cd mindspore-gpu/runtime && docker build . -t mindspore/mindspore-gpu:runtime |
 
-    ```
-    cd mindspore-cpu/0.1.0-alpha && docker build . -t mindspore/mindspore-cpu:0.1.0-alpha
-    ```
-
-* GPU
-
-    ```
-    cd mindspore-gpu/0.1.0-alpha && docker build . -t mindspore/mindspore-gpu:0.1.0-alpha
-    ```
+> **NOTICE:** The `x.y.z` version shown above should be replaced with the real version number.
diff --git a/docker/mindspore-cpu/0.2.0-alpha/Dockerfile b/docker/mindspore-cpu/0.2.0-alpha/Dockerfile
new file mode 100644
index 0000000000..9524cee745
--- /dev/null
+++ b/docker/mindspore-cpu/0.2.0-alpha/Dockerfile
@@ -0,0 +1,67 @@
+FROM ubuntu:18.04
+
+MAINTAINER leonwanghui <leon.wanghui@huawei.com>
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV PATH /usr/local/bin:$PATH
+
+# Install base tools
+RUN apt update \
+    && DEBIAN_FRONTEND=noninteractive apt install -y \
+    vim \
+    wget \
+    curl \
+    xz-utils \
+    net-tools \
+    openssh-client \
+    git \
+    ntpdate \
+    tzdata \
+    tcl \
+    sudo \
+    bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+    gcc \
+    g++ \
+    zlibc \
+    make \
+    libgmp-dev \
+    patch \
+    autoconf \
+    libtool \
+    automake \
+    flex
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+    && cd /tmp \
+    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+    && tar -xvf v3.7.5.tar.gz \
+    && cd /tmp/cpython-3.7.5 \
+    && mkdir -p ${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -f /usr/local/bin/python \
+    && rm -f /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && rm -rf /tmp/cpython-3.7.5 \
+    && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+    && echo "[global]" > /root/.pip/pip.conf \
+    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
+
+# Install MindSpore cpu whl package
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
diff --git a/docker/mindspore-gpu/0.2.0-alpha/Dockerfile b/docker/mindspore-gpu/0.2.0-alpha/Dockerfile
new file mode 100644
index 0000000000..9b59f845f7
--- /dev/null
+++ b/docker/mindspore-gpu/0.2.0-alpha/Dockerfile
@@ -0,0 +1,83 @@
+FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04
+
+MAINTAINER leonwanghui <leon.wanghui@huawei.com>
+
+# Set env
+ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
+ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5
+ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH
+ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH
+
+# Install base tools
+RUN apt update \
+    && DEBIAN_FRONTEND=noninteractive apt install -y \
+    vim \
+    wget \
+    curl \
+    xz-utils \
+    net-tools \
+    openssh-client \
+    git \
+    ntpdate \
+    tzdata \
+    tcl \
+    sudo \
+    bash-completion
+
+# Install compile tools
+RUN DEBIAN_FRONTEND=noninteractive apt install -y \
+    gcc \
+    g++ \
+    zlibc \
+    make \
+    libgmp-dev \
+    patch \
+    autoconf \
+    libtool \
+    automake \
+    flex \
+    libnccl2=2.4.8-1+cuda10.1 \
+    libnccl-dev=2.4.8-1+cuda10.1
+
+# Set bash
+RUN echo "dash dash/sh boolean false" | debconf-set-selections
+RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
+
+# Install python (v3.7.5)
+RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
+    libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
+    && cd /tmp \
+    && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
+    && tar -xvf v3.7.5.tar.gz \
+    && cd /tmp/cpython-3.7.5 \
+    && mkdir -p ${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -f /usr/local/bin/python \
+    && rm -f /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
+    && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && rm -rf /tmp/cpython-3.7.5 \
+    && rm -f /tmp/v3.7.5.tar.gz
+
+# Set pip source
+RUN mkdir -pv /root/.pip \
+    && echo "[global]" > /root/.pip/pip.conf \
+    && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
+    && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
+
+# Install openmpi (v3.1.5)
+RUN cd /tmp \
+    && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \
+    && tar -xvf openmpi-3.1.5.tar.gz \
+    && cd /tmp/openmpi-3.1.5 \
+    && mkdir -p ${OMPI_ROOT_PATH} \
+    && ./configure --prefix=${OMPI_ROOT_PATH} \
+    && make -j4 \
+    && make install -j4 \
+    && rm -rf /tmp/openmpi-3.1.5 \
+    && rm -f /tmp/openmpi-3.1.5.tar.gz
+
+# Install MindSpore cuda-10.1 whl package
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
diff --git a/setup.py b/setup.py
index 82e6d70fcc..d929d5d707 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@ from setuptools import setup, find_packages
 from setuptools.command.egg_info import egg_info
 from setuptools.command.build_py import build_py
 
-version = '0.1.0'
+version = '0.2.0'
 
 backend_policy = os.getenv('BACKEND_POLICY')
 commit_id = os.getenv('COMMIT_ID').replace("\n", "")

From 4a79dde736f5323d4c13d84199e80cc08d7af7b4 Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Sun, 26 Apr 2020 23:33:24 -0400
Subject: [PATCH 109/242] fix doc problems

---
 mindspore/nn/layer/normalization.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index 644604d604..5faf046d18 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -17,6 +17,7 @@ from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore.common.parameter import Parameter
 from mindspore.common.initializer import initializer
+from mindspore.ops.primitive import constexpr
 from mindspore.common.tensor import Tensor
 import mindspore.common.dtype as mstype
 import mindspore.context as context
@@ -165,7 +166,9 @@ class _BatchNorm(Cell):
     def extend_repr(self):
         return 'num_features={}, eps={}, momentum={}, gamma={}, beta={}, moving_mean={}, moving_variance={}'.format(
             self.num_features, self.eps, self.momentum, self.gamma, self.beta, self.moving_mean, self.moving_variance)
-
+def _channel_check(channel, num_channel):
+    if channel != num_channel:
+        raise ValueError("the input channel is not equal with num_channels")
 
 class BatchNorm1d(_BatchNorm):
     r"""
@@ -508,6 +511,7 @@ class GroupNorm(Cell):
 
     def construct(self, x):
         batch, channel, height, width = self.shape(x)
+        _channel_check(channel, self.num_channels)
         x = self.reshape(x, (batch, self.num_groups, channel*height*width/self.num_groups))
         mean = self.reduce_mean(x, 2)
         var = self.reduce_sum(self.square(x - mean), 2) / (channel * height * width / self.num_groups - 1)

From e49cd12d925cc0c15228b031919034cf34006099 Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Sun, 26 Apr 2020 23:49:17 -0400
Subject: [PATCH 110/242] fix doc problems

---
 mindspore/nn/layer/normalization.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index 5faf046d18..ffde5cecec 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -166,6 +166,8 @@ class _BatchNorm(Cell):
     def extend_repr(self):
         return 'num_features={}, eps={}, momentum={}, gamma={}, beta={}, moving_mean={}, moving_variance={}'.format(
             self.num_features, self.eps, self.momentum, self.gamma, self.beta, self.moving_mean, self.moving_variance)
+
+@constexpr
 def _channel_check(channel, num_channel):
     if channel != num_channel:
         raise ValueError("the input channel is not equal with num_channels")

From 9e633b6c12db05357c709b1e0941425c0c789356 Mon Sep 17 00:00:00 2001
From: panyifeng <panyifeng@huawei.com>
Date: Thu, 23 Apr 2020 15:42:11 +0800
Subject: [PATCH 111/242] validate bprop rules

---
 mindspore/ccsrc/ir/dtype.cc                   |  1 +
 mindspore/ccsrc/operator/ops.cc               |  1 +
 mindspore/ccsrc/operator/ops.h                |  1 +
 mindspore/ccsrc/optimizer/ad/dfunctor.cc      |  8 --
 mindspore/ccsrc/optimizer/ad/dfunctor.h       |  7 +-
 mindspore/ccsrc/optimizer/ad/kprim.cc         | 46 +++++----
 mindspore/ccsrc/optimizer/irpass.cc           |  1 +
 mindspore/ccsrc/optimizer/irpass.h            |  1 +
 .../optimizer/irpass/special_op_eliminate.h   | 19 ++++
 mindspore/ccsrc/pipeline/pass.cc              |  1 +
 .../ccsrc/pipeline/static_analysis/prim.cc    |  7 ++
 mindspore/common/dtype.py                     |  3 +
 mindspore/ops/_grad/grad_array_ops.py         |  6 +-
 mindspore/ops/_grad/grad_math_ops.py          |  4 +-
 mindspore/ops/_grad/grad_nn_ops.py            |  2 +-
 .../multitype_ops/zeros_like_impl.py          |  4 +
 mindspore/ops/functional.py                   |  1 +
 mindspore/ops/operations/__init__.py          |  3 +-
 mindspore/ops/operations/other_ops.py         | 63 +++++++++++++
 tests/ut/python/model/test_bert_cell.py       |  8 +-
 tests/ut/python/model/test_mix_precision.py   |  2 +-
 tests/ut/python/ops/test_ops.py               | 22 ++---
 .../python/pynative_mode/test_cell_bprop.py   | 39 ++++++--
 .../python/pynative_mode/test_framstruct.py   | 93 +++++++++++++++++++
 .../pynative_mode/test_insert_grad_of.py      |  2 +-
 25 files changed, 275 insertions(+), 70 deletions(-)

diff --git a/mindspore/ccsrc/ir/dtype.cc b/mindspore/ccsrc/ir/dtype.cc
index 97291a3dc0..0ba25f2f66 100644
--- a/mindspore/ccsrc/ir/dtype.cc
+++ b/mindspore/ccsrc/ir/dtype.cc
@@ -695,6 +695,7 @@ REGISTER_PYBIND_DEFINE(
     (void)py::class_<String, Type, std::shared_ptr<String>>(m_sub, "String").def(py::init());
     (void)py::class_<RefKeyType, Type, std::shared_ptr<RefKeyType>>(m_sub, "RefKeyType").def(py::init());
     (void)py::class_<RefType, Type, std::shared_ptr<RefType>>(m_sub, "RefType").def(py::init());
+    (void)py::class_<TypeAnything, Type, std::shared_ptr<TypeAnything>>(m_sub, "TypeAnything").def(py::init());
   }));
 
 const TypePtr kTypeExternal = std::make_shared<External>();
diff --git a/mindspore/ccsrc/operator/ops.cc b/mindspore/ccsrc/operator/ops.cc
index 91a54e1fdb..407efe5689 100755
--- a/mindspore/ccsrc/operator/ops.cc
+++ b/mindspore/ccsrc/operator/ops.cc
@@ -213,6 +213,7 @@ const PrimitivePtr kPrimGetRefOrigin = std::make_shared<Primitive>("get_ref_orig
 const PrimitivePtr kPrimInsertGradientOf = std::make_shared<Primitive>("InsertGradientOf");
 const PrimitivePtr kPrimPrintShapeType = std::make_shared<Primitive>("PrintShapeType");
 const PrimitivePtr kPrimSameTypeShape = std::make_shared<Primitive>("SameTypeShape");
+const PrimitivePtr kPrimCheckBprop = std::make_shared<Primitive>("CheckBprop");
 const PrimitivePtr kPrimPrint = std::make_shared<Primitive>("Print");
 
 const PrimitivePtr kPrimMakeRef = std::make_shared<Primitive>("make_ref");
diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/operator/ops.h
index d84b2e4738..e938e5c64e 100755
--- a/mindspore/ccsrc/operator/ops.h
+++ b/mindspore/ccsrc/operator/ops.h
@@ -220,6 +220,7 @@ extern const PrimitivePtr kPrimInsertGradientOf;
 extern const PrimitivePtr kPrimPrintShapeType;
 extern const PrimitivePtr kPrimPrint;
 extern const PrimitivePtr kPrimSameTypeShape;
+extern const PrimitivePtr kPrimCheckBprop;
 extern const PrimitivePtr kPrimDepend;
 extern const PrimitivePtr kPrimStateSetItem;
 extern const PrimitivePtr kPrimScalarSummary;
diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.cc b/mindspore/ccsrc/optimizer/ad/dfunctor.cc
index 33f919e2ac..de368dbdd2 100644
--- a/mindspore/ccsrc/optimizer/ad/dfunctor.cc
+++ b/mindspore/ccsrc/optimizer/ad/dfunctor.cc
@@ -309,14 +309,6 @@ FuncGraphPtr DFunctor::KUserDefined(const FuncGraphPtr &primal) {
   auto bprop = primal->transforms().find("bprop");
   if (bprop != primal->transforms().end()) {
     FuncGraphPtr bprop_graph = bprop->second.func_graph();
-    const size_t param_diff = 1;
-    if (bprop_graph->output()->isa<CNode>() &&
-        bprop_graph->output()->cast<CNodePtr>()->size() + param_diff != bprop_graph->parameters().size()) {
-      // It does not matter with the final tangents, just a tip for debugging
-      MS_LOG(DEBUG) << "User defined Cell bprop " << primal->ToString() << " in scope "
-                    << primal->output()->scope()->name()
-                    << " output must be a tuple and output number should be the same with inputs.";
-    }
     resources_->manager()->AddFuncGraph(bprop_graph);
 
     if (bprop_graph->free_variables_nodes().size() != 0 || primal->free_variables_nodes().size() != 0) {
diff --git a/mindspore/ccsrc/optimizer/ad/dfunctor.h b/mindspore/ccsrc/optimizer/ad/dfunctor.h
index 3059736171..1358cc8f28 100644
--- a/mindspore/ccsrc/optimizer/ad/dfunctor.h
+++ b/mindspore/ccsrc/optimizer/ad/dfunctor.h
@@ -127,7 +127,7 @@ class KPrim {
   AnfNodePtr BuildOutput(const FuncGraphPtr &bprop_fg);
   void TransformArgs(const FuncGraphManagerPtr &mng, const FuncGraphPtr &bprop_fg, const FuncGraphPtr &outer,
                      std::vector<AnfNodePtr> *const transf_args);
-  void AddCheckTypeShapeOp(const FuncGraphPtr &bprop_fg);
+  void CheckBprop(const FuncGraphPtr &bprop_fg, const string &prim_to_check);
 
   Registry bprop_registry_;
   std::unordered_map<PrimitivePtr, MetaFuncGraphPtr> bprop_registry_meta_;
@@ -137,10 +137,7 @@ template <typename T>
 FuncGraphPtr KPrim::BpropToK(const T &primal, const FuncGraphPtr &bprop_fg) {
   MS_EXCEPTION_IF_NULL(primal);
   MS_EXCEPTION_IF_NULL(bprop_fg);
-
-  if (IsPrimitiveCNode(bprop_fg->output(), prim::kPrimMakeTuple)) {
-    AddCheckTypeShapeOp(bprop_fg);
-  }
+  CheckBprop(bprop_fg, primal->ToString());
 
   auto debug_info = std::make_shared<GraphDebugInfo>();
   debug_info->set_name(primal->ToString());
diff --git a/mindspore/ccsrc/optimizer/ad/kprim.cc b/mindspore/ccsrc/optimizer/ad/kprim.cc
index 2c8ddbfa82..c74670e55d 100644
--- a/mindspore/ccsrc/optimizer/ad/kprim.cc
+++ b/mindspore/ccsrc/optimizer/ad/kprim.cc
@@ -50,9 +50,13 @@ FuncGraphPtr KPrim::GetBprop(const PrimitivePtr &prim) {
                                        grad_op_child_scope_prefix + prim->name());
   ScopeGuard scope_guard(scope);
   py::function fn = prim->GetBpropFunction();
+  if (fn == nullptr || py::isinstance<py::none>(fn)) {
+    MS_LOG(DEBUG) << "Fail to find bprop function for " << prim->name() << ".";
+    return nullptr;
+  }
   FuncGraphPtr func_graph = parse::ParsePythonCode(fn);
   if (func_graph == nullptr) {
-    MS_LOG(WARNING) << "Fail to find bprop function for " << prim->name() << ".";
+    MS_LOG(ERROR) << "Fail to parse bprop function for " << prim->name() << ".";
     return nullptr;
   }
   return func_graph;
@@ -153,31 +157,23 @@ void KPrim::TransformArgs(const FuncGraphManagerPtr &mng, const FuncGraphPtr &bp
   }
 }
 
-void KPrim::AddCheckTypeShapeOp(const FuncGraphPtr &bprop_fg) {
+void KPrim::CheckBprop(const FuncGraphPtr &bprop_fg, const string &prim_to_check) {
   // bprop_fg has been checked in caller
-  auto same_type_shape = prim::GetPythonOps("same_type_shape", "mindspore.ops.functional")->cast<PrimitivePtr>();
-  MS_EXCEPTION_IF_NULL(same_type_shape);
-
-  std::vector<AnfNodePtr> bout_input;
-  bout_input.push_back(NewValueNode(prim::kPrimMakeTuple));
-
-  auto fg_out = bprop_fg->output();
-  MS_EXCEPTION_IF_NULL(fg_out);
-  auto cnode = fg_out->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(cnode);
-
-  auto &inputs = cnode->inputs();
-  auto params = bprop_fg->parameters();
-  std::vector<AnfNodePtr> sub_input;
-  for (size_t i = 1; i < inputs.size(); ++i) {
-    sub_input.clear();
-    sub_input.push_back(NewValueNode(same_type_shape));
-    sub_input.push_back(inputs[i]);
-    sub_input.push_back(params[i - 1]);
-    bout_input.push_back(bprop_fg->NewCNode(sub_input));
-  }
-  AnfNodePtr cbout = bprop_fg->NewCNode(bout_input);
-  bprop_fg->set_output(cbout);
+  auto check_bprop = prim::GetPythonOps("check_bprop", "mindspore.ops.functional")->cast<PrimitivePtr>();
+  MS_EXCEPTION_IF_NULL(check_bprop);
+  check_bprop->set_attr("prim_to_check", std::make_shared<StringImm>(prim_to_check));
+
+  std::vector<AnfNodePtr> inputs;
+  inputs.emplace_back(NewValueNode(prim::kPrimMakeTuple));
+  inputs.insert(inputs.begin() + 1, bprop_fg->parameters().begin(), bprop_fg->parameters().end() - 2);
+  AnfNodePtr params = bprop_fg->NewCNode(inputs);
+
+  inputs.clear();
+  inputs.push_back(NewValueNode(check_bprop));
+  inputs.push_back(bprop_fg->output());
+  inputs.push_back(params);
+  AnfNodePtr bprop_out = bprop_fg->NewCNode(inputs);
+  bprop_fg->set_output(bprop_out);
 }
 
 FuncGraphPtr KPrim::KUserDefinedCellBprop(const FuncGraphPtr bprop_fg) {
diff --git a/mindspore/ccsrc/optimizer/irpass.cc b/mindspore/ccsrc/optimizer/irpass.cc
index 3b44700e1c..2bd013cb08 100644
--- a/mindspore/ccsrc/optimizer/irpass.cc
+++ b/mindspore/ccsrc/optimizer/irpass.cc
@@ -67,6 +67,7 @@ OptimizeIRPassLib::OptimizeIRPassLib() {
     {prim::kPrimReduceMean, prim::kPrimReduceAll, prim::kPrimReduceSum, prim::kPrimReduceMax, prim::kPrimReduceMin});
   partial_eliminate_ = MakeSubstitution(PartialEliminater(), "partial_eliminate", IsCNodeDup);
   same_eliminate_ = MakeSubstitution(SameEliminater(), "same_eliminate", prim::kPrimSameTypeShape);
+  check_bprop_eliminate_ = MakeSubstitution(CheckBpropEliminater(), "check_bprop_eliminate", prim::kPrimCheckBprop);
   reset_defer_inline_ = MakeSubstitution(ResetDeferInline(), "reset_defer_inline", IsValueNode<FuncGraph>);
 
   // Env Item Eliminate
diff --git a/mindspore/ccsrc/optimizer/irpass.h b/mindspore/ccsrc/optimizer/irpass.h
index 0af22c5cd0..02bfee65d6 100644
--- a/mindspore/ccsrc/optimizer/irpass.h
+++ b/mindspore/ccsrc/optimizer/irpass.h
@@ -45,6 +45,7 @@ class OptimizeIRPassLib {
   SubstitutionPtr reduce_eliminate_;
   SubstitutionPtr partial_eliminate_;
   SubstitutionPtr same_eliminate_;
+  SubstitutionPtr check_bprop_eliminate_;
   SubstitutionPtr reset_defer_inline_;
 
   // Env Item Eliminate
diff --git a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h b/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h
index 2dd27a89c3..e06ccd862b 100644
--- a/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h
+++ b/mindspore/ccsrc/optimizer/irpass/special_op_eliminate.h
@@ -109,6 +109,25 @@ class SameEliminater : public AnfVisitor {
   AnfNodePtr x_{nullptr};
 };
 
+// {prim::kPrimCheckBprop, X, Y} -> X
+class CheckBpropEliminater : public AnfVisitor {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    x_ = nullptr;
+    AnfVisitor::Match(prim::kPrimCheckBprop, {IsNode, IsNode})(node);
+    return x_;
+  }
+
+  void Visit(const AnfNodePtr &node) override {
+    if (x_ == nullptr) {
+      x_ = node;
+    }
+  }
+
+ private:
+  AnfNodePtr x_{nullptr};
+};
+
 // Reset defer_inline flag
 class ResetDeferInline : public AnfVisitor {
  public:
diff --git a/mindspore/ccsrc/pipeline/pass.cc b/mindspore/ccsrc/pipeline/pass.cc
index 6ce6c4603d..d9f805fdc9 100644
--- a/mindspore/ccsrc/pipeline/pass.cc
+++ b/mindspore/ccsrc/pipeline/pass.cc
@@ -108,6 +108,7 @@ OptPassGroupMap GetOptPassesA(const opt::irpass::OptimizeIRPassLib &irpass) {
   });
   opt::OptPassConfig a_3 = opt::OptPassConfig({
     irpass.same_eliminate_,
+    irpass.check_bprop_eliminate_,
     irpass.replace_applicator_,
   });
   opt::OptPassConfig virtual_dataset = opt::OptPassConfig({irpass.virtual_dataset_eliminate_});
diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
index d71ad8f710..293f31707e 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
@@ -295,6 +295,9 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) {
     dic["shape"] = shape;
     dic["dtype"] = arg_slice->BuildType();
     dic["value"] = BuildValue(arg_slice->BuildValue());
+  } else if (abs_base->isa<AbstractRef>()) {
+    auto value = abs_base->cast<AbstractRefPtr>()->ref();
+    dic = ConvertAbstractToPython(value);
   } else if (abs_base->isa<AbstractTuple>()) {
     auto arg_tuple = dyn_cast<AbstractTuple>(abs_base);
     size_t len = arg_tuple->size();
@@ -327,6 +330,10 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) {
     dic["shape"] = py::none();
     dic["dtype"] = py::none();
     dic["value"] = py::none();
+  } else if (abs_base->isa<AbstractFunction>()) {
+    dic["shape"] = py::none();
+    dic["dtype"] = abs_base->BuildType();
+    dic["value"] = py::none();
   } else {
     auto value = abs_base->BuildValue();
     if ((*value == *kAnyValue)) {
diff --git a/mindspore/common/dtype.py b/mindspore/common/dtype.py
index 702e01effb..e6b9779f39 100644
--- a/mindspore/common/dtype.py
+++ b/mindspore/common/dtype.py
@@ -85,13 +85,16 @@ list_ = typing.List()
 tuple_ = typing.Tuple()
 tensor = typing.TensorType()
 function = typing.Function()
+function_type = typing.Function
 symbolic_key = typing.SymbolicKeyType()
 env_type = typing.EnvType()
+env_type_type = typing.EnvType
 type_type = typing.TypeType()
 type_none = typing.TypeNone()
 string = typing.String()
 type_refkey = typing.RefKeyType()
 tensor_type = typing.TensorType
+anything_type = typing.TypeAnything
 
 number_type = (int8,
                int16,
diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py
index 35d37b3ada..b9281a7456 100644
--- a/mindspore/ops/_grad/grad_array_ops.py
+++ b/mindspore/ops/_grad/grad_array_ops.py
@@ -211,11 +211,11 @@ def get_bprop_slice(self):
 
     def bprop(x, begin, size, out, dout):
         dx = P.Pad(_slice_grad_pad(begin, size, shape_op(x)))(dout)
-        return (dx,)
+        return (dx, zeros_like(begin), zeros_like(size))
 
     def bprop_gpu(x, begin, size, out, dout):
         dx = dx = G.SliceGrad()(dout, x, begin, size)
-        return (dx,)
+        return (dx, zeros_like(begin), zeros_like(size))
 
     if context.get_context('device_target') == "GPU":
         return bprop_gpu
@@ -262,7 +262,7 @@ def get_bprop_gather_v2(self):
         # Example: out_shape:(3,2,3) axis 2 -> (1,2,0)
         perm_2 = _generate_inverse_index(x_shp, axis)
         params_grad = transpose(params_grad, perm_2)
-        return params_grad, zeros_like(indices)
+        return params_grad, zeros_like(indices), zeros_like(axis)
     return bprop
 
 
diff --git a/mindspore/ops/_grad/grad_math_ops.py b/mindspore/ops/_grad/grad_math_ops.py
index c334050218..2f39fe8745 100755
--- a/mindspore/ops/_grad/grad_math_ops.py
+++ b/mindspore/ops/_grad/grad_math_ops.py
@@ -505,7 +505,7 @@ def get_bprop_reducemax(self):
 
     def bprop(x, axis, out, dout):
         dx = _min_or_max_grad(x, axis, out, dout)
-        return (dx,)
+        return (dx, zeros_like(axis))
     return bprop
 
 
@@ -528,7 +528,7 @@ def get_bprop_reducemin(self):
 
     def bprop(x, axis, out, dout):
         dx = _min_or_max_grad(x, axis, out, dout)
-        return (dx,)
+        return (dx, zeros_like(axis))
     return bprop
 
 
diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py
index e43d3d5d3a..baccdbbbb2 100755
--- a/mindspore/ops/_grad/grad_nn_ops.py
+++ b/mindspore/ops/_grad/grad_nn_ops.py
@@ -436,7 +436,7 @@ def get_bprop_onehot(self):
     """Grad definition for `OneHot` operation."""
 
     def bprop(indices, depth, on_value, off_value, out, dout):
-        return zeros_like(indices), zeros_like(depth)
+        return zeros_like(indices), zeros_like(depth), zeros_like(on_value), zeros_like(off_value)
     return bprop
 
 
diff --git a/mindspore/ops/composite/multitype_ops/zeros_like_impl.py b/mindspore/ops/composite/multitype_ops/zeros_like_impl.py
index 1c1a4f1d12..1308bfd62a 100644
--- a/mindspore/ops/composite/multitype_ops/zeros_like_impl.py
+++ b/mindspore/ops/composite/multitype_ops/zeros_like_impl.py
@@ -31,6 +31,10 @@ def _zeros_like_scala(x):
     """Returns 0 which has the same dtype as x where x is a scalar."""
     return 0
 
+@zeros_like_leaf.register("Bool")
+def _zeros_like_bool(x):
+    """Returns False if x is a bool."""
+    return False
 
 newenv = base.EnvInstance_()
 
diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py
index 4135133e85..4cae11aed1 100644
--- a/mindspore/ops/functional.py
+++ b/mindspore/ops/functional.py
@@ -56,6 +56,7 @@ tensor_pow = P.Pow()
 tensor_mod = P.FloorMod()
 strided_slice = P.StridedSlice()
 same_type_shape = P.SameTypeShape()
+check_bprop = P.CheckBprop()
 equal = P.Equal()
 not_equal = P.NotEqual()
 assign_sub = P.AssignSub()
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index c75c2031d7..868d3b359e 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -67,7 +67,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
                      SparseSoftmaxCrossEntropyWithLogits, Tanh,
                      TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl,
                      ApplyRMSProp, ApplyCenteredRMSProp)
-from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey
+from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey, CheckBprop
 from . import _quant_ops
 from ._quant_ops import *
 
@@ -179,6 +179,7 @@ __all__ = [
     'GeSwitch',
     'Merge',
     'SameTypeShape',
+    'CheckBprop',
     'CheckValid',
     'BoundingBoxEncode',
     'BoundingBoxDecode',
diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py
index 12a8a2cfde..5e66050d9a 100644
--- a/mindspore/ops/operations/other_ops.py
+++ b/mindspore/ops/operations/other_ops.py
@@ -269,3 +269,66 @@ class MakeRefKey(Primitive):
 
     def __call__(self):
         pass
+
+
+class CheckBprop(PrimitiveWithInfer):
+    """
+    Checks whether data type and shape of corresponding element from tuple x and y are the same.
+
+    Raises:
+        TypeError: If not the same.
+
+    Inputs:
+        - **input_x** (tuple[Tensor]) - The input_x contains the outputs of bprop to be checked.
+        - **input_y** (tuple[Tensor]) - The input_y contains the inputs of bprop to check against.
+
+    Outputs:
+        (tuple[Tensor]), the input_x,
+        if data type and shape of corresponding elements from `input_x` and `input_y` are the same.
+
+    Examples:
+        >>> input_x = (Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32),)
+        >>> input_y = (Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32),)
+        >>> out = P.CheckBprop()(input_x, input_y)
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        """init CheckBprop"""
+
+    def infer_shape(self, xshapes, yshapes):
+        tips = f'Bprop of {self.prim_to_check}'
+        if len(xshapes) < len(yshapes):
+            raise TypeError(f"{tips}, the size of output should be {len(yshapes)},"
+                            f" but got {len(xshapes)}.")
+        checking_range = len(yshapes)
+        for i in range(checking_range):
+            xshape = xshapes[i]
+            yshape = yshapes[i]
+            if not xshape or not yshape:
+                continue
+            if xshape != yshape:
+                raise TypeError(f"{tips}, the shape of {i}th output should be {yshape},"
+                                f" but got {xshape}.")
+        return xshapes
+
+    def infer_dtype(self, xdtypes, ydtypes):
+        tips = f'Bprop of {self.prim_to_check}'
+        if len(xdtypes) < len(ydtypes):
+            raise TypeError(f"{tips}, the size of output should be {len(ydtypes)},"
+                            f" but got {len(xdtypes)}.")
+        checking_range = len(ydtypes)
+        for i in range(checking_range):
+            xdtype = xdtypes[i]
+            ydtype = ydtypes[i]
+            if isinstance(xdtype, mstype.anything_type) or isinstance(ydtype, mstype.anything_type):
+                continue
+            if isinstance(ydtype, mstype.function_type):
+                if not isinstance(xdtype, mstype.env_type_type):
+                    raise TypeError(f"{tips}, the dtype of {i}th output should be {mstype.env_type_type},"
+                                    f" but got {xdtype}.")
+                continue
+            if xdtype != ydtype:
+                raise TypeError(f"{tips}, the dtype of {i}th output should be {ydtype},"
+                                f" but got {xdtype}.")
+        return xdtypes
diff --git a/tests/ut/python/model/test_bert_cell.py b/tests/ut/python/model/test_bert_cell.py
index fdaaac397b..2cb642c75f 100644
--- a/tests/ut/python/model/test_bert_cell.py
+++ b/tests/ut/python/model/test_bert_cell.py
@@ -317,7 +317,7 @@ test_case_cell_ops = [
                             initializer_range=0.02,
                             dropout_prob=0.1),
         'desc_inputs': [[1, 768], [1, 768]],
-        'desc_bprop': [[1, 128, 768]]}),  # maybe not right
+        'desc_bprop': [[1, 768]]}),
     ('BertTransformer_2', {
         'block': bert_trans(),
         'desc_inputs': [[1, 128, 768], [1, 128, 128]]}),
@@ -331,7 +331,7 @@ test_case_cell_ops = [
         'desc_inputs': [Tensor(np.random.rand(128).astype(np.int32)),
                         Tensor(np.random.rand(128).astype(np.int32)), [128]],
         'desc_bprop': [[1, 128, 768], [1, 128, 768], [1, 128, 768]],
-        'num_output': 3}),  # maybe not right
+        'num_output': 3}),
 
     ('BertModel_1', {
         'block': BertModel(config=BertConfig(batch_size=1,
@@ -342,7 +342,7 @@ test_case_cell_ops = [
         'desc_inputs': [Tensor(np.random.rand(128).astype(np.int32)),
                         Tensor(np.random.rand(128).astype(np.int32)), [128]],
         'desc_bprop': [[1, 128, 768], [1, 128, 768], [1, 128, 768]],
-        'num_output': 3}),  # maybe not right
+        'num_output': 3}),
 
     ('BertModel_2', {
         'block': BertModel(config=BertConfig(batch_size=1,
@@ -354,7 +354,7 @@ test_case_cell_ops = [
         'desc_inputs': [Tensor(np.random.rand(128).astype(np.int32)),
                         Tensor(np.random.rand(128).astype(np.int32)), [128]],
         'desc_bprop': [[1, 128, 768], [1, 128, 768], [1, 128, 768]],
-        'num_output': 3}),  # maybe not right
+        'num_output': 3}),
 
     ('BertPretrainingLoss', {
         'block': BertPretrainingLoss(config=BertConfig(batch_size=1)),
diff --git a/tests/ut/python/model/test_mix_precision.py b/tests/ut/python/model/test_mix_precision.py
index 0a8b185e8c..0c762f42b9 100644
--- a/tests/ut/python/model/test_mix_precision.py
+++ b/tests/ut/python/model/test_mix_precision.py
@@ -175,7 +175,7 @@ class GetParamGrad(nn.Cell):
 
 def test_grad_conv_prelu():
     shapes = [[64, 64, 112, 112]]
-    outshape = [[64, 64, 56, 56]]
+    outshape = [[64, 64, 112, 112]]
     net = IRBlockZ(inplanes=64, planes=64).add_flags_recursive(fp16=True)
     inputs = [convert(shp, dtype=np.float16) for shp in shapes]
     sens_shape = outshape[0]
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index d6622e76f4..6121933d70 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -585,7 +585,7 @@ test_case_nn_ops = [
     ('ReLUV2', {
         'block': P.ReLUV2(),
         'desc_inputs': [[1, 3, 4, 4]],
-        'desc_bprop': [[1, 3, 4, 4], [1, 3, 4, 4]]}),
+        'desc_bprop': [[1, 3, 4, 4], ([1, 1, 4, 4, 2], {'dtype': np.uint8})]}),
     ('ReLUGrad', {
         'block': G.ReluGrad(),
         'desc_inputs': [[1, 3, 4, 4], [1, 3, 4, 4]],
@@ -626,7 +626,7 @@ test_case_nn_ops = [
     ('MaxPoolWithArgmax', {
         'block': P.MaxPoolWithArgmax(ksize=2, strides=2),
         'desc_inputs': [[128, 32, 32, 64]],
-        'desc_bprop': [[128, 32, 8, 16], [128, 32, 8, 16]]}),
+        'desc_bprop': [[128, 32, 16, 32], ([128, 32, 4, 33], {'dtype': np.uint16})]}),
     ('SoftmaxCrossEntropyWithLogits', {
         'block': P.SoftmaxCrossEntropyWithLogits(),
         'desc_inputs': [[1, 10], [1, 10]],
@@ -639,7 +639,7 @@ test_case_nn_ops = [
     ('LogSoftmax', {
         'block': P.LogSoftmax(),
         'desc_inputs': [[64, 2]],
-        'desc_bprop': [[160, 30522]]}),
+        'desc_bprop': [[64, 2]]}),
     ('LogSoftmaxGrad', {
         'block': G.LogSoftmaxGrad(),
         'desc_inputs': [[16, 1234], [16, 1234]],
@@ -648,7 +648,7 @@ test_case_nn_ops = [
     ('LayerNorm', {
         'block': P.LayerNorm(),
         'desc_inputs': [[2, 16], [16], [16]],
-        'desc_bprop': [[2, 16], [2, 16], [2, 16]]}),
+        'desc_bprop': [[2, 16], [2, 1], [2, 1]]}),
     ('LayerNormGrad', {
         'block': G.LayerNormGrad(),
         'desc_inputs': [[2, 16], [2, 16], [2, 16], [2, 16], [16]],
@@ -845,7 +845,7 @@ test_case_nn_ops = [
         'block': P.OneHot(),
         'desc_const': [3, Tensor(1.0, mstype.float32), Tensor(0.0, mstype.float32)],
         'desc_inputs': [Tensor(np.array([64]).astype(np.int32))],
-        'desc_bprop': [[64, 2]]}),
+        'desc_bprop': [[1, 3]]}),
     ('ReduceProd_0', {
         'block': P.ReduceProd(),
         'desc_const': [0],
@@ -950,7 +950,7 @@ test_case_array_ops = [
         'block': P.Cast(),
         'desc_const': [mstype.int32],
         'desc_inputs': [[2, 3, 4, 5]],
-        'desc_bprop': [Tensor(np.ones((2, 3, 3, 5)).astype(np.int32))]}),
+        'desc_bprop': [Tensor(np.ones((2, 3, 4, 5)).astype(np.int32))]}),
     ('ExpandDims', {
         'block': P.ExpandDims(),
         'desc_const': [0],
@@ -1002,12 +1002,12 @@ test_case_array_ops = [
         'desc_inputs': [
             (Tensor(np.array([[0, 1], [2, 1]]).astype(np.int32)),
              Tensor(np.array([[0, 1], [2, 1]]).astype(np.int32)))],
-        'desc_bprop': [[4, 2]]}),
+        'desc_bprop': [([4, 2], {'dtype': np.int32})]}),
     ('ConcatV2_1', {
         'block': P.Concat(axis=2),
         'desc_inputs': [(Tensor(np.array([[[0, 1, 2]], [[2, 1, 2]]]).astype(np.int32)),
                          Tensor(np.array([[[0, 1]], [[2, 1]]]).astype(np.int32)))],
-        'desc_bprop': [[2, 1, 5]]}),
+        'desc_bprop': [([2, 1, 5], {'dtype': np.int32})]}),
     ('ConcatV2_2', {
         'block': NetForConcat(),
         'desc_inputs': [[2, 2]],
@@ -1042,7 +1042,7 @@ test_case_array_ops = [
     ('Pack_2', {
         'block': NetForPackInput(P.Pack()),
         'desc_inputs':[[2, 2]],
-        'desc_bprop':[[2, 2, 2]],
+        'desc_bprop':[[1, 2, 2]],
     }),
     ('Pack_3', {
         'block': NetForPackInput(P.Pack()),
@@ -1077,7 +1077,7 @@ test_case_array_ops = [
     ('SpaceToBatch_2', {
         'block': P.SpaceToBatch(2, [[1, 1], [0, 4]]),
         'desc_inputs': [[1, 3, 2, 2]],
-        'desc_bprop': [[4, 3, 2, 4]],
+        'desc_bprop': [[4, 3, 2, 3]],
     }),
     ('BatchToSpace_1', {
         'block': P.BatchToSpace(2, [[0, 0], [0, 0]]),
@@ -1124,7 +1124,7 @@ test_case_other_ops = [
         'desc_const': [(3, 3)],
         'desc_inputs': (Tensor(np.ones((2, 2), np.int32)),
                         Tensor(np.ones((2,), np.int32))),
-        'desc_bprop': [[3, 3]]}),
+        'desc_bprop': [([3, 3], {'dtype': np.int32})]}),
     ('SmoothL1Loss', {
         'block': P.SmoothL1Loss(),
         'desc_inputs': [[256, 4], [256, 4]],
diff --git a/tests/ut/python/pynative_mode/test_cell_bprop.py b/tests/ut/python/pynative_mode/test_cell_bprop.py
index c69b80412e..bd9f46d21d 100644
--- a/tests/ut/python/pynative_mode/test_cell_bprop.py
+++ b/tests/ut/python/pynative_mode/test_cell_bprop.py
@@ -229,12 +229,6 @@ class TwoInputBprop(nn.Cell):
     def bprop(self, x, y, out, dout):
         return 5 * x, 8 * y
 
-class TwoInput(nn.Cell):
-    def __init__(self):
-        super().__init__()
-        self.op = P.Mul()
-    def construct(self, x, y):
-        return  self.op(x, y)
 
 class TwoInputWithParameter(nn.Cell):
     def __init__(self):
@@ -301,8 +295,37 @@ class MulAddWithWrongOutputNum(nn.Cell):
     def construct(self, x, y):
         return 2 * x + y
     def bprop(self, x, y, out, dout):
-        return 2 * dout, 2 * y, out
+        return 2 * dout,
 
 def test_grad_mul_add_with_wrong_output_num():
     mul_add = MulAddWithWrongOutputNum()
-    C.grad_all(mul_add)(1, 2)
+    with pytest.raises(TypeError):
+        C.grad_all(mul_add)(1, 2)
+
+class MulAddWithWrongOutputType(nn.Cell):
+    def __init__(self):
+        super(MulAddWithWrongOutputType, self).__init__()
+    def construct(self, x, y):
+        return 2 * x + y
+    def bprop(self, x, y, out, dout):
+        return 2 * dout, 2
+
+def test_grad_mul_add_with_wrong_output_type():
+    mul_add = MulAddWithWrongOutputType()
+    with pytest.raises(TypeError):
+        C.grad_all(mul_add)(1, Tensor(np.ones([2, 2])))
+
+
+class MulAddWithWrongOutputShape(nn.Cell):
+    def __init__(self):
+        super(MulAddWithWrongOutputShape, self).__init__()
+        self.ones = Tensor(np.ones([2,]))
+    def construct(self, x, y):
+        return 2 * x + y
+    def bprop(self, x, y, out, dout):
+        return 2, self.ones
+
+def test_grad_mul_add_with_wrong_output_shape():
+    mul_add = MulAddWithWrongOutputShape()
+    with pytest.raises(TypeError):
+        C.grad_all(mul_add)(1, Tensor(np.ones([2, 2])))
diff --git a/tests/ut/python/pynative_mode/test_framstruct.py b/tests/ut/python/pynative_mode/test_framstruct.py
index eb3b76765a..7e504c405f 100644
--- a/tests/ut/python/pynative_mode/test_framstruct.py
+++ b/tests/ut/python/pynative_mode/test_framstruct.py
@@ -32,6 +32,8 @@ from ....mindspore_test_framework.utils.check_gradient import (
     OperationGradChecker, check_gradient, ScalarGradChecker)
 from ....mindspore_test_framework.utils.bprop_util import bprop
 import mindspore.context as context
+from mindspore.ops._grad.grad_base import bprop_getters
+from mindspore.ops.primitive import prim_attr_register, PrimitiveWithInfer
 
 
 def setup_module(module):
@@ -721,3 +723,94 @@ def test_grad_if_defer_inline():
     inp = Tensor(np.ones([128, 96]).astype(np.float32))
     grads = C.grad_all(network)(inp)
     assert grads == (Tensor(np.full([128, 96], 0.6, dtype=np.float32)),)
+
+def test_bprop_with_wrong_output_num():
+    class BpropWithWrongOutputNum(PrimitiveWithInfer):
+        @prim_attr_register
+        def __init__(self):
+            super(BpropWithWrongOutputNum, self).__init__('BpropWithWrongOutputNum')
+
+        def __call__(self, x, y):
+            return x
+
+        def infer_shape(self, x_shape, yshape):
+            return x_shape
+
+        def infer_dtype(self, x_type, y_type):
+            return x_type
+
+    @bprop_getters.register(BpropWithWrongOutputNum)
+    def get_bprop_with_wrong_output_num(self):
+        """Generate bprop for BpropWithWrongOutputNum"""
+        def bprop(x, y, out, dout):
+            return (dout,)
+        return bprop
+
+    class BpropWithWrongOutputNumCell(nn.Cell):
+        def __init__(self):
+            super(BpropWithWrongOutputNumCell, self).__init__()
+        def construct(self, x, y):
+            return BpropWithWrongOutputNum()(x, y)
+    with pytest.raises(TypeError):
+        C.grad_all(BpropWithWrongOutputNumCell())(1, 2)
+
+def test_bprop_with_wrong_output_type():
+    class BpropWithWrongOutputType(PrimitiveWithInfer):
+        @prim_attr_register
+        def __init__(self):
+            super(BpropWithWrongOutputType, self).__init__('BpropWithWrongOutputType')
+
+        def __call__(self, x):
+            return x
+
+        def infer_shape(self, x_shape):
+            return x_shape
+
+        def infer_dtype(self, x_type):
+            return x_type
+
+    @bprop_getters.register(BpropWithWrongOutputType)
+    def get_bprop_with_wrong_output_type(self):
+        """Generate bprop for BpropWithWrongOutputType"""
+        def bprop(x, out, dout):
+            return (1,)
+        return bprop
+
+    class BpropWithWrongOutputTypeCell(nn.Cell):
+        def __init__(self):
+            super(BpropWithWrongOutputTypeCell, self).__init__()
+        def construct(self, x):
+            return BpropWithWrongOutputType()(x)
+    with pytest.raises(TypeError):
+        C.grad_all(BpropWithWrongOutputTypeCell())(Tensor(np.ones([64, 10]).astype(np.int32)))
+
+def test_bprop_with_wrong_output_shape():
+    class BpropWithWrongOutputShape(PrimitiveWithInfer):
+        @prim_attr_register
+        def __init__(self):
+            super(BpropWithWrongOutputShape, self).__init__('BpropWithWrongOutputShape')
+
+        def __call__(self, x):
+            return x
+
+        def infer_shape(self, x_shape):
+            return x_shape
+
+        def infer_dtype(self, x_type):
+            return x_type
+
+    @bprop_getters.register(BpropWithWrongOutputShape)
+    def get_bprop_with_wrong_output_shape(self):
+        """Generate bprop for BpropWithWrongOutputShape"""
+        ones = Tensor(np.ones([2,]).astype(np.int32))
+        def bprop(x, out, dout):
+            return (ones,)
+        return bprop
+
+    class BpropWithWrongOutputShapeCell(nn.Cell):
+        def __init__(self):
+            super(BpropWithWrongOutputShapeCell, self).__init__()
+        def construct(self, x):
+            return BpropWithWrongOutputShape()(x)
+    with pytest.raises(TypeError):
+        C.grad_all(BpropWithWrongOutputShapeCell())(Tensor(np.ones([64, 10]).astype(np.int32)))
diff --git a/tests/ut/python/pynative_mode/test_insert_grad_of.py b/tests/ut/python/pynative_mode/test_insert_grad_of.py
index a11c5fa2b1..0527365a98 100644
--- a/tests/ut/python/pynative_mode/test_insert_grad_of.py
+++ b/tests/ut/python/pynative_mode/test_insert_grad_of.py
@@ -79,7 +79,7 @@ def test_InsertGradientOf_2():
 summary = P.ScalarSummary()
 def debug_gradient(dx):
     """ debug_gradient """
-    dx = summary("dx: ", dx)
+    summary("dx: ", dx)
     return dx
 
 debug = P.InsertGradientOf(debug_gradient)

From b314c1d62181b3509e4d7d12fffee0aec38be31e Mon Sep 17 00:00:00 2001
From: fary86 <fary.fanrui@huawei.com>
Date: Mon, 27 Apr 2020 14:34:30 +0800
Subject: [PATCH 112/242] Remove extra empty string from log text

---
 mindspore/ccsrc/ir/dtype.cc                   | 10 ++---
 mindspore/ccsrc/ir/manager.cc                 | 15 ++++---
 mindspore/ccsrc/ir/primitive.cc               |  4 +-
 .../ccsrc/operator/composite/composite.cc     |  5 +--
 mindspore/ccsrc/operator/prim_nn.cc           |  5 +--
 mindspore/ccsrc/operator/prim_statement.cc    |  3 +-
 mindspore/ccsrc/optimizer/optimizer.h         |  2 +-
 .../ccsrc/pipeline/parse/function_block.cc    |  4 +-
 mindspore/ccsrc/pipeline/pipeline.cc          |  6 +--
 .../static_analysis/analysis_context.cc       |  2 +-
 .../pipeline/static_analysis/evaluator.cc     | 11 +++--
 .../static_analysis/param_validator.cc        | 13 +++---
 .../static_analysis/param_validator.h         |  2 +-
 .../static_analysis/static_analysis.cc        |  4 +-
 .../ccsrc/pipeline/static_analysis/utils.cc   |  2 +-
 mindspore/ccsrc/transform/convert.cc          |  2 +-
 mindspore/ccsrc/vm/vm.cc                      | 42 +++++++++----------
 mindspore/ccsrc/vm/vmimpl.cc                  |  2 +-
 18 files changed, 64 insertions(+), 70 deletions(-)

diff --git a/mindspore/ccsrc/ir/dtype.cc b/mindspore/ccsrc/ir/dtype.cc
index 97291a3dc0..ac4bace5b1 100644
--- a/mindspore/ccsrc/ir/dtype.cc
+++ b/mindspore/ccsrc/ir/dtype.cc
@@ -345,7 +345,7 @@ TypePtr StringToNumberType(const std::string &type_name, const std::string &num_
       auto bits = std::stoi(type_name.substr(num_type_name.size()));
       type = std::make_shared<T>(bits);
     } catch (const std::exception &e) {
-      MS_LOG(EXCEPTION) << "" << num_type_name << " convert from string error " << e.what();
+      MS_LOG(EXCEPTION) << num_type_name << " convert from string error " << e.what();
     }
   }
   return type;
@@ -389,7 +389,7 @@ TypePtr TensorStrToType(const std::string &type_name) {
       }
       type = std::make_shared<TensorType>(element_type);
     } catch (const std::exception &e) {
-      MS_LOG(EXCEPTION) << "" << type_name << " convert from string error " << e.what();
+      MS_LOG(EXCEPTION) << type_name << " convert from string error " << e.what();
     }
   }
 
@@ -416,7 +416,7 @@ TypePtr ListStrToType(const std::string &type_name) {
       }
       type = std::make_shared<List>(element_types);
     } catch (const std::exception &e) {
-      MS_LOG(EXCEPTION) << "" << type_name << " convert from string error " << e.what();
+      MS_LOG(EXCEPTION) << type_name << " convert from string error " << e.what();
     }
   }
 
@@ -443,7 +443,7 @@ TypePtr TupleStrToType(const std::string &type_name) {
       }
       type = std::make_shared<Tuple>(element_types);
     } catch (const std::exception &e) {
-      MS_LOG(EXCEPTION) << "" << type_name << " convert from string error " << e.what();
+      MS_LOG(EXCEPTION) << type_name << " convert from string error " << e.what();
     }
   }
   return type;
@@ -484,7 +484,7 @@ TypePtr FunctionStrToType(const std::string &type_name) {
       }
       type = std::make_shared<Function>(args_type, retval);
     } catch (const std::exception &e) {
-      MS_LOG(EXCEPTION) << "" << type_name << " convert from string error " << e.what();
+      MS_LOG(EXCEPTION) << type_name << " convert from string error " << e.what();
     }
   }
   return type;
diff --git a/mindspore/ccsrc/ir/manager.cc b/mindspore/ccsrc/ir/manager.cc
index a53c9e95ae..150e68ef4d 100644
--- a/mindspore/ccsrc/ir/manager.cc
+++ b/mindspore/ccsrc/ir/manager.cc
@@ -888,7 +888,7 @@ void FuncGraphUserNodesCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr
 void FuncGraphJDirectCollector::OnModEdge(AnfNodePtr node, int, AnfNodePtr inp, EdgeProcessDirection direction) {
   if (IsValueNode<FuncGraph>(inp) && IsPrimitiveCNode(node, prim::kPrimJ)) {
     (void)Mod(node->func_graph(), GetValueNode<FuncGraphPtr>(inp), direction);
-    MS_LOG(DEBUG) << "" << node->func_graph()->ToString() << " users func graph "
+    MS_LOG(DEBUG) << node->func_graph()->ToString() << " users func graph "
                   << GetValueNode<FuncGraphPtr>(inp)->ToString() << " which contains J(func_graph), dir: " << direction;
   }
 }
@@ -945,7 +945,7 @@ FuncGraphSetPtr FuncGraphParentsTotalComputer::SeekParents(const FuncGraphPtr &f
 void FuncGraphParentsTotalComputer::RealRecompute(FuncGraphPtr fg) {
   MS_EXCEPTION_IF_NULL(fg);
   all_parents_direct_ = &(manager_->func_graph_parents_direct());
-  MS_LOG(DEBUG) << "" << fg->ToString() << " total func graph dep size:" << (*all_parents_direct_)[fg].size();
+  MS_LOG(DEBUG) << fg->ToString() << " total func graph dep size:" << (*all_parents_direct_)[fg].size();
   func_graph_parents_total_analysis_[fg].update(SeekParents(fg));
   MS_LOG(DEBUG) << "FuncGraphParentsTotalComputer end: " << func_graph_parents_total_analysis_[fg].size();
 }
@@ -1074,7 +1074,7 @@ void FuncGraphsUsedTotalComputer::RealRecompute(FuncGraphPtr fg) {
         if (func_graph_used_total_analysis_[fg].count(used_fg) == 0) {
           todo_new.push_back(used_fg);
         }
-        MS_LOG(DEBUG) << "" << fg->ToString() << " add func graph " << used_fg->ToString();
+        MS_LOG(DEBUG) << fg->ToString() << " add func graph " << used_fg->ToString();
         func_graph_used_total_analysis_[fg].add(used_fg);
       }
     }
@@ -1138,7 +1138,7 @@ void RecursiveComputer::CheckRecursiveGraphs(const FuncGraphPtr &fg, std::list<F
 bool FuncGraphJTotalComputer::SeekJ(const FuncGraphPtr &fg, const FuncGraphSetPtr &path) {
   MS_EXCEPTION_IF_NULL(path);
   if (path->contains(fg)) {
-    MS_LOG(DEBUG) << "" << fg->ToString() << " had been checked";
+    MS_LOG(DEBUG) << fg->ToString() << " had been checked";
     return false;
   }
   MS_EXCEPTION_IF_NULL(manager_);
@@ -1149,7 +1149,7 @@ bool FuncGraphJTotalComputer::SeekJ(const FuncGraphPtr &fg, const FuncGraphSetPt
       std::find_if(func_graph_counter_map[fg].begin(), func_graph_counter_map[fg].end(),
                    [path](const std::pair<FuncGraphPtr, int> iter) { return !path->contains(iter.first); });
     if (contains_j != func_graph_counter_map[fg].end()) {
-      MS_LOG(DEBUG) << "" << fg->ToString() << " contains J(" << contains_j->first->ToString() << ")";
+      MS_LOG(DEBUG) << fg->ToString() << " contains J(" << contains_j->first->ToString() << ")";
       return true;
     }
   }
@@ -1160,12 +1160,11 @@ bool FuncGraphJTotalComputer::SeekJ(const FuncGraphPtr &fg, const FuncGraphSetPt
   for (auto &item : used[fg]) {
     auto used_g = item.first;
     if (SeekJ(used_g, path)) {
-      MS_LOG(DEBUG) << "" << fg->ToString() << " users func graph " << used_g->ToString()
-                    << " which contains J(func_graph)";
+      MS_LOG(DEBUG) << fg->ToString() << " users func graph " << used_g->ToString() << " which contains J(func_graph)";
       return true;
     }
   }
-  MS_LOG(DEBUG) << "" << fg->ToString() << " doesn't contain J(func_graph)";
+  MS_LOG(DEBUG) << fg->ToString() << " doesn't contain J(func_graph)";
   return false;
 }
 
diff --git a/mindspore/ccsrc/ir/primitive.cc b/mindspore/ccsrc/ir/primitive.cc
index d40f8a265d..d848f9c0d8 100644
--- a/mindspore/ccsrc/ir/primitive.cc
+++ b/mindspore/ccsrc/ir/primitive.cc
@@ -145,14 +145,14 @@ py::function PrimitivePy::GetComputeFunction() {
   static const char *const compute_func_name = "vm_impl";
 
   if (py::hasattr(python_obj_, compute_func_name)) {
-    MS_LOG(INFO) << "" << name() << " compute_func_name";
+    MS_LOG(INFO) << name() << " compute_func_name";
     py::function fn = python_obj_.attr(compute_func_name).cast<py::function>();
     return fn;
   }
 
   static const std::string vm_module = "mindspore.ops.vm_impl_registry";
   static const std::string get_vm_impl_fn = "get_vm_impl_fn";
-  MS_LOG(INFO) << "" << name() << ": get_vm_impl_fn";
+  MS_LOG(INFO) << name() << ": get_vm_impl_fn";
   py::function get_fn = parse::python_adapter::GetPyFn(vm_module, get_vm_impl_fn);
   py::function vm_fn = get_fn(python_obj_);
 
diff --git a/mindspore/ccsrc/operator/composite/composite.cc b/mindspore/ccsrc/operator/composite/composite.cc
index 88db8b8ff8..da4700b053 100644
--- a/mindspore/ccsrc/operator/composite/composite.cc
+++ b/mindspore/ccsrc/operator/composite/composite.cc
@@ -676,7 +676,7 @@ void MultitypeFuncGraph::Register(const std::vector<std::string> &types_name, co
   for (auto &type_name : types_name) {
     auto type_ptr = StringToType(type_name);
     if (type_ptr == nullptr) {
-      MS_LOG(EXCEPTION) << "" << type_name << " convert from string error ";
+      MS_LOG(EXCEPTION) << type_name << " convert from string error ";
     }
     types.push_back(type_ptr);
   }
@@ -955,8 +955,7 @@ int CheckSliceMember(const AbstractBasePtr &member, int default_value, const std
     return default_value;
   }
 
-  MS_LOG(EXCEPTION) << "" << member_name << " should be a AbstractScalar or AbstractNone, but got "
-                    << member->ToString();
+  MS_LOG(EXCEPTION) << member_name << " should be a AbstractScalar or AbstractNone, but got " << member->ToString();
 }
 
 void GenerateTupleSliceParameter(const AbstractTuplePtr &tuple, const AbstractSlicePtr &slice, int *start_index,
diff --git a/mindspore/ccsrc/operator/prim_nn.cc b/mindspore/ccsrc/operator/prim_nn.cc
index 3591168187..1f9f650ac9 100644
--- a/mindspore/ccsrc/operator/prim_nn.cc
+++ b/mindspore/ccsrc/operator/prim_nn.cc
@@ -246,7 +246,7 @@ AbstractBasePtr InferImplBiasAddGrad(const AnalysisEnginePtr &, const PrimitiveP
   // Inputs: at least one tensor(y_backprop)
   // Outputs: dbias
   if (args_spec_list.empty()) {
-    MS_LOG(EXCEPTION) << "" << primitive->name() << " evaluator at least has 1 parameters, while the input size is "
+    MS_LOG(EXCEPTION) << primitive->name() << " evaluator at least has 1 parameters, while the input size is "
                       << args_spec_list.size() << ".";
   }
 
@@ -255,8 +255,7 @@ AbstractBasePtr InferImplBiasAddGrad(const AnalysisEnginePtr &, const PrimitiveP
   MS_EXCEPTION_IF_NULL(shape_y);
   std::vector<int> y_dims = shape_y->shape();
   if (y_dims.size() < 2) {
-    MS_LOG(EXCEPTION) << "" << primitive->name() << " input y backprop, dim should >= 2, while " << y_dims.size()
-                      << ".";
+    MS_LOG(EXCEPTION) << primitive->name() << " input y backprop, dim should >= 2, while " << y_dims.size() << ".";
   }
   std::vector<int> bias_dims = {y_dims[1]};
   ShapePtr ret_shape = std::make_shared<Shape>(bias_dims);
diff --git a/mindspore/ccsrc/operator/prim_statement.cc b/mindspore/ccsrc/operator/prim_statement.cc
index 239aed5bde..0b9d491ce6 100644
--- a/mindspore/ccsrc/operator/prim_statement.cc
+++ b/mindspore/ccsrc/operator/prim_statement.cc
@@ -80,8 +80,7 @@ AbstractBasePtr InferImplDot(const AnalysisEnginePtr &, const PrimitivePtr &prim
   auto y_shp_value = y_shp->shape();
   // Should be matrix which shape size is 2.
   if (x_shp_value.size() != 2 || y_shp_value.size() != 2) {
-    MS_LOG(EXCEPTION) << "" << op_name
-                      << " evaluator requires input two 2D tensors, while the dimensions of two tensors are "
+    MS_LOG(EXCEPTION) << op_name << " evaluator requires input two 2D tensors, while the dimensions of two tensors are "
                       << x_shp_value.size() << ", " << y_shp_value.size() << " ";
   }
   if (x_shp_value[1] != y_shp_value[0] && x_shp_value[1] != Shape::SHP_ANY && y_shp_value[0] != Shape::SHP_ANY) {
diff --git a/mindspore/ccsrc/optimizer/optimizer.h b/mindspore/ccsrc/optimizer/optimizer.h
index c4455484c4..cadbde0842 100644
--- a/mindspore/ccsrc/optimizer/optimizer.h
+++ b/mindspore/ccsrc/optimizer/optimizer.h
@@ -171,7 +171,7 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
           };
           use_profile ? (WITH(MsProfile::GetProfile()->Step(pass_names_[i])) opt_func) : opt_func();
 #ifdef DEBUG
-          MS_LOG(DEBUG) << "" << name_ << " round " << counter << " OptPass " << pass_names_[i] << " end.";
+          MS_LOG(DEBUG) << name_ << " round " << counter << " OptPass " << pass_names_[i] << " end.";
           auto fg_name = name_ + "_r" + std::to_string(counter) + "_" + std::to_string(i) + "_" + pass_names_[i];
           func_graph->DumpFuncGraph(fg_name);
           DumpIR(fg_name + ".ir", func_graph);
diff --git a/mindspore/ccsrc/pipeline/parse/function_block.cc b/mindspore/ccsrc/pipeline/parse/function_block.cc
index 156f727b9e..16b0dfe30e 100644
--- a/mindspore/ccsrc/pipeline/parse/function_block.cc
+++ b/mindspore/ccsrc/pipeline/parse/function_block.cc
@@ -37,7 +37,7 @@ void FunctionBlock::AddPrevBlock(const FunctionBlockPtr &block) { prev_blocks_.p
 
 // write variable records the variable name to corresponding node
 void FunctionBlock::WriteVariable(const std::string &var_name, const AnfNodePtr &node) {
-  MS_LOG(DEBUG) << "" << func_graph_->ToString() << " write var " << var_name << " with node " << node->DebugString();
+  MS_LOG(DEBUG) << func_graph_->ToString() << " write var " << var_name << " with node " << node->DebugString();
   vars_[var_name] = node;
 }
 
@@ -71,7 +71,7 @@ AnfNodePtr FunctionBlock::ReadVariable(const std::string &var) {
   TraceManager::DebugTrace(std::make_shared<TracePhi>(debug_info));
   ParameterPtr phi_param = std::make_shared<Parameter>(func_graph());
   TraceManager::EndTrace();
-  MS_LOG(DEBUG) << "" << func_graph_->ToString() << " generate phi node " << phi_param->ToString() << " for " << var;
+  MS_LOG(DEBUG) << func_graph_->ToString() << " generate phi node " << phi_param->ToString() << " for " << var;
   func_graph()->add_parameter(phi_param);
   phi_nodes_[phi_param] = var;
   WriteVariable(var, phi_param);
diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc
index 251a0c2d84..930f3f0e4b 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -333,7 +333,7 @@ void ExecutorPy::GetGeBackendPolicy() const {
   MS_EXCEPTION_IF_NULL(ms_context);
   std::string backend = ms_context->backend_policy();
   if (backend != "ge") {
-    MS_LOG(EXCEPTION) << "" << backend << " backend policy is not supported under ge backend!";
+    MS_LOG(EXCEPTION) << backend << " backend policy is not supported under ge backend!";
   }
 }
 
@@ -491,10 +491,10 @@ void RunPipelineAction(const ActionItem &action, pipeline::ResourcePtr resource,
 
   // load MindSpore IR from file
   if (action.first == "symbol_resolve") {
-    MS_LOG(DEBUG) << "" << action.first << " read ir file: " << ir_file;
+    MS_LOG(DEBUG) << action.first << " read ir file: " << ir_file;
     std::vector<FuncGraphPtr> graphs = ImportIR(ir_file);
     if (graphs.size() == 0) {
-      MS_LOG(EXCEPTION) << "" << action.first << " read ir file " << ir_file << " failed as no graph found";
+      MS_LOG(EXCEPTION) << action.first << " read ir file " << ir_file << " failed as no graph found";
     }
     auto manager = resource->manager();
     MS_EXCEPTION_IF_NULL(manager);
diff --git a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc b/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc
index aeaa6b17f8..cd68268118 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc
@@ -78,7 +78,7 @@ AnalysisContextPtr AnalysisContext::Filter(const FuncGraphPtr &func_graph) {
       oss << ", context: " << iter.second.lock()->ToString() << "]";
     }
     oss << "}";
-    MS_LOG(EXCEPTION) << "" << oss.str() << " NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info());
+    MS_LOG(EXCEPTION) << oss.str() << " NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info());
   }
   return parent_context;
 }
diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
index 402ef98001..66837ddcd1 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
@@ -33,8 +33,7 @@ void InferEntryLogging(const EvaluatorPtr &evaluator, const AbstractBasePtrList
     MS_LOG(DEBUG) << "Evaluator " << evaluator->ToString() << " run for " << out_conf->node()->scope()->name();
   }
   for (size_t i = 0; i < arg_spec_list.size(); i++) {
-    MS_LOG(DEBUG) << "" << evaluator->ToString() << " input[" << i
-                  << "] abstract value: " << arg_spec_list[i]->ToString();
+    MS_LOG(DEBUG) << evaluator->ToString() << " input[" << i << "] abstract value: " << arg_spec_list[i]->ToString();
   }
 }
 
@@ -139,7 +138,7 @@ AbstractBasePtrList FuncGraphEvaluator::NormalizeArgs(const AbstractBasePtrList
                            MS_EXCEPTION_IF_NULL(arg);
                            return arg->Broaden();
                          });
-    MS_LOG(DEBUG) << "" << func_graph_->ToString() << " original: " << mindspore::ToString(args_spec_list)
+    MS_LOG(DEBUG) << func_graph_->ToString() << " original: " << mindspore::ToString(args_spec_list)
                   << ", broaded: " << mindspore::ToString(broaded_list);
     return broaded_list;
   }
@@ -232,20 +231,20 @@ AbstractBasePtr Evaluator::Run(AnalysisEnginePtr engine, const ConfigPtrList &ar
   MS_EXCEPTION_IF_NULL(cache_);
   auto iter = cache_->find(args_spec_list);
   if (iter == cache_->end()) {
-    MS_LOG(DEBUG) << "" << evaluator_name << " cache miss, call Infer().";
+    MS_LOG(DEBUG) << evaluator_name << " cache miss, call Infer().";
     AbstractBasePtr ret = Infer(engine, args_spec_list);
     if (ret == nullptr) {
       InferFailLogging(shared_from_base<Evaluator>(), args_spec_list, out_conf);
       MS_LOG(EXCEPTION) << "Evaluator " << evaluator_name << " result is nullptr.";
     }
     MS_EXCEPTION_IF_NULL(ret);
-    MS_LOG(DEBUG) << "" << evaluator_name << " set cache. return: " << ret->ToString() << ".";
+    MS_LOG(DEBUG) << evaluator_name << " set cache. return: " << ret->ToString() << ".";
     (*cache_)[args_spec_list] = ret;
     trace::TraceGraphInferLeave(shared_from_base<Evaluator>());
     return ret;
   } else {
     MS_EXCEPTION_IF_NULL(iter->second);
-    MS_LOG(DEBUG) << "" << evaluator_name << " cache hit. return: " << iter->second->ToString() << ".";
+    MS_LOG(DEBUG) << evaluator_name << " cache hit. return: " << iter->second->ToString() << ".";
     trace::TraceGraphInferLeave(shared_from_base<Evaluator>());
     return iter->second;
   }
diff --git a/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc b/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc
index 69f6af0dc0..2cbd33c162 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/param_validator.cc
@@ -103,7 +103,7 @@ ShapePtr CheckShapeSame(const std::string &op, const AbstractTensorPtr &tensor_b
   ShapePtr shape_base = tensor_base->shape();
   ShapePtr shape = tensor->shape();
   if (*shape != *shape_base) {
-    MS_LOG(EXCEPTION) << "" << op << " evaluator first arg shape " << tensor->shape()->ToString()
+    MS_LOG(EXCEPTION) << op << " evaluator first arg shape " << tensor->shape()->ToString()
                       << " are not consistent with second arg shape " << tensor_base->shape()->ToString();
   }
   return shape_base;
@@ -113,7 +113,7 @@ TypePtr CheckDtypeSame(const std::string &op, const AbstractTensorPtr &tensor_ba
   TypePtr type_base = tensor_base->element()->BuildType();
   TypePtr type = tensor->element()->BuildType();
   if (*type != *type_base) {
-    MS_LOG(EXCEPTION) << "" << op << " evaluator first arg dtype " << type_base->ToString()
+    MS_LOG(EXCEPTION) << op << " evaluator first arg dtype " << type_base->ToString()
                       << " are not consistent with second arg dtype " << type->ToString();
   }
   return type_base;
@@ -121,14 +121,14 @@ TypePtr CheckDtypeSame(const std::string &op, const AbstractTensorPtr &tensor_ba
 
 int CheckAxis(const std::string &op, const ValuePtr &axis, int minimum, int max) {
   if (axis == nullptr) {
-    MS_LOG(EXCEPTION) << "" << op << " evaluator axis is null";
+    MS_LOG(EXCEPTION) << op << " evaluator axis is null";
   }
   if (!axis->isa<Int32Imm>()) {
-    MS_LOG(EXCEPTION) << "" << op << " evaluator axis should be int, but got " << axis->type_name();
+    MS_LOG(EXCEPTION) << op << " evaluator axis should be int, but got " << axis->type_name();
   }
   int axis_value = GetValue<int>(axis);
   if (axis_value > max || axis_value < minimum) {
-    MS_LOG(EXCEPTION) << "" << op << " evaluator axis value should be in the range [" << minimum << ", " << max
+    MS_LOG(EXCEPTION) << op << " evaluator axis value should be in the range [" << minimum << ", " << max
                       << "], but get " << axis_value;
   }
   return axis_value;
@@ -136,8 +136,7 @@ int CheckAxis(const std::string &op, const ValuePtr &axis, int minimum, int max)
 void CheckArgsSize(const std::string &op, const mindspore::abstract::AbstractBasePtrList &args_spec_list,
                    size_t size_expect) {
   if (args_spec_list.size() != size_expect) {
-    MS_LOG(EXCEPTION) << "" << op << " input args size should be " << size_expect << ", but got "
-                      << args_spec_list.size();
+    MS_LOG(EXCEPTION) << op << " input args size should be " << size_expect << ", but got " << args_spec_list.size();
   }
 
   for (size_t i = 0; i < size_expect; i++) {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/param_validator.h b/mindspore/ccsrc/pipeline/static_analysis/param_validator.h
index 5904c7e67a..ecb9529a58 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/param_validator.h
+++ b/mindspore/ccsrc/pipeline/static_analysis/param_validator.h
@@ -70,7 +70,7 @@ ABSTRACT_REPORT_NAME_TRAITS(Class)
 template <typename T>
 std::shared_ptr<T> CheckArg(const std::string &op, const AbstractBasePtrList &args_spec_list, size_t index) {
   if (index >= args_spec_list.size()) {
-    MS_EXCEPTION(ValueError) << "" << op << " evaluator args list index out of bound, size " << args_spec_list.size()
+    MS_EXCEPTION(ValueError) << op << " evaluator args list index out of bound, size " << args_spec_list.size()
                              << ", index " << index;
   }
   auto arg = dyn_cast<T>(args_spec_list[index]);
diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
index 4afc3509ba..69deaa1ec1 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
@@ -122,7 +122,7 @@ AnalysisResult AnalysisEngine::Run(const FuncGraphPtr &func_graph, const Abstrac
   MS_EXCEPTION_IF_NULL(root_context->func_graph());
   AnfNodeConfigPtr output_conf = MakeConfig(root_context->func_graph()->get_return(), root_context);
   MS_EXCEPTION_IF_NULL(func_graph);
-  MS_LOG(INFO) << "" << func_graph->ToString() << ": Run finished.";
+  MS_LOG(INFO) << func_graph->ToString() << ": Run finished.";
 
   AnalysisResult result;
   MS_EXCEPTION_IF_NULL(output_conf);
@@ -167,7 +167,7 @@ AbstractBasePtr AnalysisEngine::Eval(const AnfNodeConfigPtr &conf) {
   for (auto iter : compute_conf_stack_) {
     buffer << " -> " << iter->DebugString();
   }
-  MS_LOG(DEBUG) << "" << buffer.str();
+  MS_LOG(DEBUG) << buffer.str();
 #endif
   MS_LOG(DEBUG) << "Begin Eval NodeConfig " << conf->ToString();
   MS_EXCEPTION_IF_NULL(node);
diff --git a/mindspore/ccsrc/pipeline/static_analysis/utils.cc b/mindspore/ccsrc/pipeline/static_analysis/utils.cc
index 997a089301..4c399f6ffc 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/utils.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/utils.cc
@@ -175,7 +175,7 @@ std::vector<int> RealBroadcast(const std::string &op, std::vector<int> x_shape,
       output_i = x_i;
     } else {
       MS_LOG(EXCEPTION)
-        << "" << op
+        << op
         << " evaluator the shape of first tensor and the shape of second tensor do not meet the broadcasting "
            "requirements";
     }
diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
index e057b26f02..a265438d6e 100644
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -622,7 +622,7 @@ void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) {
     auto node_itor = params_.find(name);
     // if name not in params_, create a node in graph
     if (node_itor == params_.end()) {
-      MS_LOG(WARNING) << "" << name << " is not in params, and create a new node.";
+      MS_LOG(WARNING) << name << " is not in params, and create a new node.";
       ParameterPtr param = anf_graph_->add_parameter();
       name = name + "_temp";
       param->set_name(name);
diff --git a/mindspore/ccsrc/vm/vm.cc b/mindspore/ccsrc/vm/vm.cc
index 95ceceb67f..a897c72f8f 100644
--- a/mindspore/ccsrc/vm/vm.cc
+++ b/mindspore/ccsrc/vm/vm.cc
@@ -216,8 +216,8 @@ void FinalVM::InstCall(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start";
   const size_t args_size = 1;
   if (args.size() != args_size) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is "
-                  << args.size() << ".";
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " << args.size()
+                  << ".";
     return;
   }
 
@@ -232,8 +232,8 @@ void FinalVM::InstTailCall(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start";
   const size_t args_size = 3;
   if (args.size() != args_size) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is "
-                  << args.size() << ".";
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " << args.size()
+                  << ".";
     return;
   }
 
@@ -261,7 +261,7 @@ void FinalVM::InstTailCall(const VectorRef &args) {
 void FinalVM::InstSwitchReturn(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start";
   if (args.size() != 1) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires one parameter, while the input size is " << args.size() << ".";
+    MS_LOG(ERROR) << __FUNCTION__ << " requires one parameter, while the input size is " << args.size() << ".";
     return;
   }
   Pop(1);
@@ -272,8 +272,8 @@ void FinalVM::InstReturn(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start";
   const size_t args_size = 2;
   if (args.size() != args_size) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is "
-                  << args.size() << ".";
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " << args.size()
+                  << ".";
     return;
   }
 
@@ -295,7 +295,7 @@ void FinalVM::InstPartial(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start";
   const size_t args_size = 1;
   if (args.size() < args_size) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is "
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is "
                   << args.size() << ".";
     return;
   }
@@ -314,8 +314,8 @@ void FinalVM::InstPartial(const VectorRef &args) {
 void FinalVM::InstSimuSwitch(const VectorRef &args) {
   const size_t args_size = 4;
   if (args.size() != args_size) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is "
-                  << args.size() << ".";
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " << args.size()
+                  << ".";
     return;
   }
   bool cond = utils::cast<bool>(args[0]);
@@ -368,8 +368,8 @@ void FinalVM::InstSimuSwitch(const VectorRef &args) {
 void FinalVM::InstRealSwitch(const VectorRef &args) {
   const size_t args_size = 3;
   if (args.size() != args_size) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is "
-                  << args.size() << ".";
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameters, while the input size is " << args.size()
+                  << ".";
     return;
   }
 
@@ -378,7 +378,7 @@ void FinalVM::InstRealSwitch(const VectorRef &args) {
   int vfalse = utils::cast<int>(args[2]);
 
   BaseRef c = Ref(cond);
-  MS_LOG(DEBUG) << "" << vtrue << " false:" << vfalse << " InstSwitch: " << c.ToString();
+  MS_LOG(DEBUG) << vtrue << " false:" << vfalse << " InstSwitch: " << c.ToString();
   bool bool_value = false;
   if (backend_->GetCond(c, &bool_value)) {
     MS_LOG(DEBUG) << "Cond:" << bool_value;
@@ -417,8 +417,8 @@ void FinalVM::InstPush(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start";
   const size_t args_size = 1;
   if (args.size() != args_size) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is "
-                  << args.size() << ".";
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " << args.size()
+                  << ".";
     return;
   }
 
@@ -431,8 +431,8 @@ void FinalVM::InstInput(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start";
   const size_t args_size = 1;
   if (args.size() != args_size) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is "
-                  << args.size() << ".";
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " << args.size()
+                  << ".";
     return;
   }
 
@@ -445,13 +445,13 @@ void FinalVM::InstPadStack(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start";
   const size_t args_size = 1;
   if (args.size() != args_size) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is "
-                  << args.size() << ".";
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " parameter, while the input size is " << args.size()
+                  << ".";
     return;
   }
 
   int sz = utils::cast<int>(args[0]);
-  MS_LOG(DEBUG) << "" << insts_stack_.size() << " need padstack " << sz << " sp_ " << sp_;
+  MS_LOG(DEBUG) << insts_stack_.size() << " need padstack " << sz << " sp_ " << sp_;
   size_t stack_size = insts_stack_.size();
   int need = sz - (static_cast<int>(stack_size) - sp_);
   if (need > 0) {
@@ -501,7 +501,7 @@ void FinalVM::InstPushPrim(const VectorRef &args) {
   MS_LOG(DEBUG) << "Start: " << args.size();
   const size_t args_size = 2;
   if (args.size() < args_size) {
-    MS_LOG(ERROR) << "" << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is "
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is "
                   << args.size() << ".";
     return;
   }
diff --git a/mindspore/ccsrc/vm/vmimpl.cc b/mindspore/ccsrc/vm/vmimpl.cc
index 017121f334..d83bb8f190 100644
--- a/mindspore/ccsrc/vm/vmimpl.cc
+++ b/mindspore/ccsrc/vm/vmimpl.cc
@@ -445,7 +445,7 @@ BaseRef RunOperation(const PrimitivePtr &prim, const VectorRef &args) {
   MS_LOG(DEBUG) << "operation start " << prim->name();
   auto func = operation != nullptr ? operation->GetComputeFunction() : prim->GetComputeFunction();
   if (py::isinstance<py::none>(func)) {
-    MS_LOG(EXCEPTION) << "" << prim->name() << " 's compute function is not implemented";
+    MS_LOG(EXCEPTION) << prim->name() << " 's compute function is not implemented";
   }
 
   py::tuple py_args = py::tuple(args.size());

From 1cb3bead328e530d8307e77dcb2126f049f2f574 Mon Sep 17 00:00:00 2001
From: yanghaitao <yanghaitao1@huawei.com>
Date: Mon, 27 Apr 2020 14:54:26 +0800
Subject: [PATCH 113/242] skip black line for TextFileDataset

---
 .../ccsrc/dataset/engine/datasetops/source/text_file_op.cc | 7 ++++++-
 tests/ut/data/dataset/testTextFileDataset/1.txt            | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
index 2b62616366..17bb65233d 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
@@ -143,6 +143,9 @@ Status TextFileOp::LoadFile(const std::string &file, const int64_t start_offset,
   std::unique_ptr<TensorQTable> tensor_table = std::make_unique<TensorQTable>();
 
   while (getline(handle, line)) {
+    if (line.empty()) {
+      continue;
+    }
     // If read to the end offset of this file, break.
     if (rows_total >= end_offset) {
       break;
@@ -425,7 +428,9 @@ int64_t TextFileOp::CountTotalRows(const std::string &file) {
   std::string line;
   int64_t count = 0;
   while (getline(handle, line)) {
-    count++;
+    if (!line.empty()) {
+      count++;
+    }
   }
 
   return count;
diff --git a/tests/ut/data/dataset/testTextFileDataset/1.txt b/tests/ut/data/dataset/testTextFileDataset/1.txt
index 9d911eacc0..a5ffab4fdc 100644
--- a/tests/ut/data/dataset/testTextFileDataset/1.txt
+++ b/tests/ut/data/dataset/testTextFileDataset/1.txt
@@ -1,3 +1,4 @@
 This is a text file.
+
 Be happy every day.
 Good luck to everyone.

From 89b797ae864b6dd98ff2f9d79ccb3707a334ad10 Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Mon, 27 Apr 2020 02:55:56 -0400
Subject: [PATCH 114/242] fix doc problems

---
 mindspore/nn/layer/normalization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index ffde5cecec..d0bafec55c 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -170,7 +170,7 @@ class _BatchNorm(Cell):
 @constexpr
 def _channel_check(channel, num_channel):
     if channel != num_channel:
-        raise ValueError("the input channel is not equal with num_channels")
+        raise ValueError("the input channel is not equal with num_channel")
 
 class BatchNorm1d(_BatchNorm):
     r"""

From f720200a8435a0047e728d334bb09b65f05e7b57 Mon Sep 17 00:00:00 2001
From: huanghui <huanghui44@huawei.com>
Date: Mon, 27 Apr 2020 15:07:25 +0800
Subject: [PATCH 115/242] Disable ConfusionMulGrad fusion pass

---
 .../ccsrc/pre_activate/ascend/ascend_backend_optimization.cc    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
index f213611a4d..947e6d3804 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@@ -46,7 +46,6 @@
 #include "pre_activate/ascend/ir_fusion/mul_addn_fusion.h"
 #include "pre_activate/ascend/ir_fusion/matmul_biasadd_fusion.h"
 #include "pre_activate/ascend/ir_fusion/remove_reshape_pair.h"
-#include "pre_activate/ascend/ir_fusion/confusion_mul_grad_fusion.h"
 #include "pre_activate/ascend/ir_fusion/derelu_fusion.h"
 #include "pre_activate/ascend/format_type/insert_trans_op.h"
 #include "pre_activate/pass/getitem_tuple.h"
@@ -97,7 +96,6 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) {
   ir_fusion_pm->AddPass(std::make_shared<MatmulBiasaddFusion>());
   ir_fusion_pm->AddPass(std::make_shared<AddnFission>());
   ir_fusion_pm->AddPass(std::make_shared<DereluFusion>());
-  ir_fusion_pm->AddPass(std::make_shared<ConfusionMulGradFusion>());
   ir_fusion_pm->AddPass(std::make_shared<TransposeTransDataFusion>());
   ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>());
 }

From 8cef3aff7c7c7cee25952465b4c3b0d669040404 Mon Sep 17 00:00:00 2001
From: chang zherui <760161589@qq.com>
Date: Mon, 27 Apr 2020 15:10:03 +0800
Subject: [PATCH 116/242] fix load checkpoint bug

---
 mindspore/train/serialization.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py
index df1a79fb13..6ab45358eb 100644
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -258,16 +258,17 @@ def _load_dismatch_prefix_params(net, parameter_dict, param_not_load):
     longest_name = param_not_load[0]
     while prefix_name != longest_name and param_not_load:
         logger.debug("Count: {} parameters has not been loaded, try to load continue.".format(len(param_not_load)))
-        longest_name = sorted(param_not_load, key=len, reverse=True)[0]
         prefix_name = longest_name
         for net_param_name in param_not_load:
             for dict_name in parameter_dict:
                 if dict_name.endswith(net_param_name):
-                    tmp_name = dict_name[:-len(net_param_name)]
-                    prefix_name = prefix_name if len(prefix_name) < len(tmp_name) else tmp_name
+                    prefix_name = dict_name[:-len(net_param_name)]
+                    break
+            if prefix_name != longest_name:
+                break
 
         if prefix_name != longest_name:
-            logger.info("Remove parameter prefix name: {}, continue to load.".format(prefix_name))
+            logger.warning("Remove parameter prefix name: {}, continue to load.".format(prefix_name))
             for _, param in net.parameters_and_names():
                 new_param_name = prefix_name + param.name
                 if param.name in param_not_load and new_param_name in parameter_dict:

From c87e174661832dc4f0563cc3656e46e369158e18 Mon Sep 17 00:00:00 2001
From: yanghaitao <yanghaitao1@huawei.com>
Date: Mon, 27 Apr 2020 15:35:42 +0800
Subject: [PATCH 117/242] checkout python_multiprocess type for map

---
 mindspore/dataset/engine/validators.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index c9f7d54f13..bdeb97c812 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -700,9 +700,11 @@ def check_map(method):
         nreq_param_list = ['columns_order']
         nreq_param_int = ['num_parallel_workers']
         nreq_param_columns = ['input_columns', 'output_columns']
+        nreq_param_bool = ['python_multiprocessing']
 
         check_param_type(nreq_param_list, param_dict, list)
         check_param_type(nreq_param_int, param_dict, int)
+        check_param_type(nreq_param_bool, param_dict, bool)
         for param_name in nreq_param_columns:
             param = param_dict.get(param_name)
             if param is not None:

From 3bacea7bbacd14295d907dfa9c7e3c999a453e6d Mon Sep 17 00:00:00 2001
From: xiefangqi <xiefangqi2@huawei.com>
Date: Mon, 27 Apr 2020 14:24:23 +0800
Subject: [PATCH 118/242] fix gpu hung problem

---
 mindspore/dataset/engine/iterators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/dataset/engine/iterators.py b/mindspore/dataset/engine/iterators.py
index 81bad14810..ebee204b37 100644
--- a/mindspore/dataset/engine/iterators.py
+++ b/mindspore/dataset/engine/iterators.py
@@ -248,7 +248,7 @@ class Iterator:
         return self.depipeline.GetNumClasses()
 
     def __deepcopy__(self, memo):
-        return Iterator(copy.deepcopy(self.dataset, memo))
+        return self
 
 
 class DictIterator(Iterator):

From 1a6f62bd252e094cb8f0de972114c06d3720524a Mon Sep 17 00:00:00 2001
From: VectorSL <shiliang10@huawei.com>
Date: Mon, 27 Apr 2020 16:09:00 +0800
Subject: [PATCH 119/242] gpu update type check

---
 mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc b/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc
index e38cc02e23..b00b5c263d 100644
--- a/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc
+++ b/mindspore/ccsrc/kernel/gpu/gpu_kernel_factory.cc
@@ -68,13 +68,18 @@ std::string GpuKernelFactory::SupportedTypeList(const std::string &kernel_name)
     return type_lists;
   }
   for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) {
-    std::string type_list = "[";
+    std::string type_list = "in[";
     auto attr = (iter->second)[attr_index].first;
     for (size_t input_index = 0; input_index < attr.GetInputSize(); ++input_index) {
       type_list = type_list + TypeId2String(attr.GetInputAttr(input_index).first) +
                   ((input_index == (attr.GetInputSize() - 1)) ? "" : " ");
     }
-    type_lists = type_lists + type_list + "] ";
+    type_list = type_list + "], out[";
+    for (size_t input_index = 0; input_index < attr.GetOutputSize(); ++input_index) {
+      type_list = type_list + TypeId2String(attr.GetOutputAttr(input_index).first) +
+                  ((input_index == (attr.GetOutputSize() - 1)) ? "" : " ");
+    }
+    type_lists = type_lists + type_list + "]; ";
   }
   return type_lists;
 }

From 46d291c3c026adcf08d3ab34a09a6ec68b1e8f5d Mon Sep 17 00:00:00 2001
From: simson <526422051@qq.com>
Date: Mon, 27 Apr 2020 11:53:52 +0800
Subject: [PATCH 120/242] modify error type of abnormal inputs

---
 mindspore/ccsrc/pipeline/static_analysis/evaluator.cc | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
index 99cb893104..fa3d1386fa 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
@@ -90,11 +90,9 @@ AbstractBasePtr BaseFuncGraphEvaluator::Infer(AnalysisEnginePtr engine, const Ab
   MS_EXCEPTION_IF_NULL(fg);
   std::size_t nargs = fg->parameters().size();
   if (args_spec_list.size() != nargs) {
-    MS_LOG(EXCEPTION) << "Function " << fg->ToString() << ", The number of parameters of this function is "
-                      << fg->parameters().size()
-                      << ","
-                         " but the number of provided arguments is "
-                      << args_spec_list.size() << ". NodeInfo: " << trace::GetDebugInfo(fg->debug_info());
+    MS_EXCEPTION(ValueError) << "Function " << fg->ToString() << ", The number of parameters of this function is "
+                             << fg->parameters().size() << ", but the number of provided arguments is "
+                             << args_spec_list.size() << ". NodeInfo: " << trace::GetDebugInfo(fg->debug_info());
   }
   MS_EXCEPTION_IF_NULL(parent_context_);
   MS_EXCEPTION_IF_NULL(engine);

From 1c5e54812de4ff7d5875d79a590a0496d250c77a Mon Sep 17 00:00:00 2001
From: lizhenyu <lizhenyu13@huawei.com>
Date: Mon, 27 Apr 2020 14:44:53 +0800
Subject: [PATCH 121/242] fix bug of import _akg failed

---
 mindspore/_akg/__init__.py | 47 +----------------------------
 mindspore/_akg/add_path.py | 61 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 46 deletions(-)
 create mode 100644 mindspore/_akg/add_path.py

diff --git a/mindspore/_akg/__init__.py b/mindspore/_akg/__init__.py
index a343e3532a..d0c1f0ffe4 100644
--- a/mindspore/_akg/__init__.py
+++ b/mindspore/_akg/__init__.py
@@ -13,51 +13,6 @@
 # limitations under the License.
 
 """__init__"""
-from __future__ import absolute_import as _abs
-import sys
-import os
+from . import add_path
 from .op_build import op_build
 from .message import compilewithjson
-
-
-def AKGAddPath():
-    """_akg add path."""
-    pwd = os.path.dirname(os.path.realpath(__file__))
-    tvm_path = os.path.realpath(pwd)
-    if tvm_path not in sys.path:
-        sys.path.insert(0, tvm_path)
-    else:
-        sys.path.remove(tvm_path)
-        sys.path.insert(0, tvm_path)
-
-
-class AKGMetaPathFinder:
-    """class AKGMetaPath finder."""
-
-    def find_module(self, fullname, path=None):
-        """method _akg find module."""
-        if fullname.startswith("_akg.tvm"):
-            rname = fullname[5:]
-            return AKGMetaPathLoader(rname)
-        if fullname.startswith("_akg.topi"):
-            rname = fullname[5:]
-            return AKGMetaPathLoader(rname)
-        return None
-
-
-class AKGMetaPathLoader:
-    """class AKGMetaPathLoader loader."""
-    def __init__(self, rname):
-        self.__rname = rname
-
-    def load_module(self, fullname):
-        if self.__rname in sys.modules:
-            sys.modules.pop(self.__rname)
-        AKGAddPath()
-        __import__(self.__rname, globals(), locals())
-        self.__target_module = sys.modules[self.__rname]
-        sys.modules[fullname] = self.__target_module
-        return self.__target_module
-
-
-sys.meta_path.insert(0, AKGMetaPathFinder())
diff --git a/mindspore/_akg/add_path.py b/mindspore/_akg/add_path.py
new file mode 100644
index 0000000000..a9fd0d4a09
--- /dev/null
+++ b/mindspore/_akg/add_path.py
@@ -0,0 +1,61 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""add tvm path"""
+import sys
+import os
+
+
+def AKGAddPath():
+    """_akg add path."""
+    pwd = os.path.dirname(os.path.realpath(__file__))
+    tvm_path = os.path.realpath(pwd)
+    if tvm_path not in sys.path:
+        sys.path.insert(0, tvm_path)
+    else:
+        sys.path.remove(tvm_path)
+        sys.path.insert(0, tvm_path)
+
+
+class AKGMetaPathFinder:
+    """class AKGMetaPath finder."""
+
+    def find_module(self, fullname, path=None):
+        """method _akg find module."""
+        if fullname.startswith("_akg.tvm"):
+            rname = fullname[5:]
+            return AKGMetaPathLoader(rname)
+        if fullname.startswith("_akg.topi"):
+            rname = fullname[5:]
+            return AKGMetaPathLoader(rname)
+        return None
+
+
+class AKGMetaPathLoader:
+    """class AKGMetaPathLoader loader."""
+
+    def __init__(self, rname):
+        self.__rname = rname
+
+    def load_module(self, fullname):
+        if self.__rname in sys.modules:
+            sys.modules.pop(self.__rname)
+        AKGAddPath()
+        __import__(self.__rname, globals(), locals())
+        self.__target_module = sys.modules[self.__rname]
+        sys.modules[fullname] = self.__target_module
+        return self.__target_module
+
+
+sys.meta_path.insert(0, AKGMetaPathFinder())

From c8221cce4d9d971ea164d5a2f752ed5d7adea3b1 Mon Sep 17 00:00:00 2001
From: wanghua <wanghua36@huawei.com>
Date: Fri, 24 Apr 2020 15:17:35 +0800
Subject: [PATCH 122/242] modify pre-traning script

---
 example/Bert_NEZHA_cnwiki/config.py           |  57 -------
 example/Bert_NEZHA_cnwiki/train.py            |  96 ------------
 .../README.md                                 |  58 ++++---
 example/bert_clue/config.py                   |  89 +++++++++++
 example/bert_clue/dataset.py                  |  58 +++++++
 example/bert_clue/run_distribute_pretrain.sh  |  66 ++++++++
 example/bert_clue/run_pretrain.py             | 144 ++++++++++++++++++
 example/bert_clue/run_standalone_pretrain.sh  |  46 ++++++
 8 files changed, 441 insertions(+), 173 deletions(-)
 delete mode 100644 example/Bert_NEZHA_cnwiki/config.py
 delete mode 100644 example/Bert_NEZHA_cnwiki/train.py
 rename example/{Bert_NEZHA_cnwiki => bert_clue}/README.md (69%)
 create mode 100644 example/bert_clue/config.py
 create mode 100644 example/bert_clue/dataset.py
 create mode 100644 example/bert_clue/run_distribute_pretrain.sh
 create mode 100644 example/bert_clue/run_pretrain.py
 create mode 100644 example/bert_clue/run_standalone_pretrain.sh

diff --git a/example/Bert_NEZHA_cnwiki/config.py b/example/Bert_NEZHA_cnwiki/config.py
deleted file mode 100644
index a704d9a264..0000000000
--- a/example/Bert_NEZHA_cnwiki/config.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-network config setting, will be used in train.py
-"""
-
-from easydict import EasyDict as edict
-import mindspore.common.dtype as mstype
-from mindspore.model_zoo.Bert_NEZHA import BertConfig
-bert_train_cfg = edict({
-    'epoch_size': 10,
-    'num_warmup_steps': 0,
-    'start_learning_rate': 1e-4,
-    'end_learning_rate': 0.0,
-    'decay_steps': 1000,
-    'power': 10.0,
-    'save_checkpoint_steps': 2000,
-    'keep_checkpoint_max': 10,
-    'checkpoint_prefix': "checkpoint_bert",
-    # please add your own dataset path
-    'DATA_DIR': "/your/path/examples.tfrecord",
-    # please add your own dataset schema path
-    'SCHEMA_DIR': "/your/path/datasetSchema.json"
-})
-bert_net_cfg = BertConfig(
-    batch_size=16,
-    seq_length=128,
-    vocab_size=21136,
-    hidden_size=1024,
-    num_hidden_layers=24,
-    num_attention_heads=16,
-    intermediate_size=4096,
-    hidden_act="gelu",
-    hidden_dropout_prob=0.0,
-    attention_probs_dropout_prob=0.0,
-    max_position_embeddings=512,
-    type_vocab_size=2,
-    initializer_range=0.02,
-    use_relative_positions=True,
-    input_mask_from_dataset=True,
-    token_type_ids_from_dataset=True,
-    dtype=mstype.float32,
-    compute_type=mstype.float16,
-)
diff --git a/example/Bert_NEZHA_cnwiki/train.py b/example/Bert_NEZHA_cnwiki/train.py
deleted file mode 100644
index 2610542a9a..0000000000
--- a/example/Bert_NEZHA_cnwiki/train.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-NEZHA (NEural contextualiZed representation for CHinese lAnguage understanding) is the Chinese pretrained language
-model currently based on BERT developed by Huawei.
-1. Prepare data
-Following the data preparation as in BERT, run command as below to get dataset for training:
-    python ./create_pretraining_data.py \
-      --input_file=./sample_text.txt \
-      --output_file=./examples.tfrecord \
-      --vocab_file=./your/path/vocab.txt \
-      --do_lower_case=True \
-      --max_seq_length=128 \
-      --max_predictions_per_seq=20 \
-      --masked_lm_prob=0.15 \
-      --random_seed=12345 \
-      --dupe_factor=5
-2. Pretrain
-First, prepare the distributed training environment, then adjust configurations in config.py, finally run train.py.
-"""
-
-import os
-import numpy as np
-from config import bert_train_cfg, bert_net_cfg
-import mindspore.dataset.engine.datasets as de
-import mindspore.dataset.transforms.c_transforms as C
-from mindspore import context
-from mindspore.common.tensor import Tensor
-import mindspore.common.dtype as mstype
-from mindspore.train.model import Model
-from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
-from mindspore.model_zoo.Bert_NEZHA import BertNetworkWithLoss, BertTrainOneStepCell
-from mindspore.nn.optim import Lamb
-_current_dir = os.path.dirname(os.path.realpath(__file__))
-
-def create_train_dataset(batch_size):
-    """create train dataset"""
-    # apply repeat operations
-    repeat_count = bert_train_cfg.epoch_size
-    ds = de.TFRecordDataset([bert_train_cfg.DATA_DIR], bert_train_cfg.SCHEMA_DIR,
-                            columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
-                                          "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"])
-    type_cast_op = C.TypeCast(mstype.int32)
-    ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
-    ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
-    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
-    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
-    # apply batch operations
-    ds = ds.batch(batch_size, drop_remainder=True)
-    ds = ds.repeat(repeat_count)
-    return ds
-
-def weight_variable(shape):
-    """weight variable"""
-    np.random.seed(1)
-    ones = np.random.uniform(-0.1, 0.1, size=shape).astype(np.float32)
-    return Tensor(ones)
-
-def train_bert():
-    """train bert"""
-    context.set_context(mode=context.GRAPH_MODE)
-    context.set_context(device_target="Ascend")
-    context.set_context(enable_task_sink=True)
-    context.set_context(enable_loop_sink=True)
-    context.set_context(enable_mem_reuse=True)
-    ds = create_train_dataset(bert_net_cfg.batch_size)
-    netwithloss = BertNetworkWithLoss(bert_net_cfg, True)
-    optimizer = Lamb(netwithloss.trainable_params(), decay_steps=bert_train_cfg.decay_steps,
-                     start_learning_rate=bert_train_cfg.start_learning_rate,
-                     end_learning_rate=bert_train_cfg.end_learning_rate, power=bert_train_cfg.power,
-                     warmup_steps=bert_train_cfg.num_warmup_steps, decay_filter=lambda x: False)
-    netwithgrads = BertTrainOneStepCell(netwithloss, optimizer=optimizer)
-    netwithgrads.set_train(True)
-    model = Model(netwithgrads)
-    config_ck = CheckpointConfig(save_checkpoint_steps=bert_train_cfg.save_checkpoint_steps,
-                                 keep_checkpoint_max=bert_train_cfg.keep_checkpoint_max)
-    ckpoint_cb = ModelCheckpoint(prefix=bert_train_cfg.checkpoint_prefix, config=config_ck)
-    model.train(ds.get_repeat_count(), ds, callbacks=[LossMonitor(), ckpoint_cb], dataset_sink_mode=False)
-
-if __name__ == '__main__':
-    train_bert()
diff --git a/example/Bert_NEZHA_cnwiki/README.md b/example/bert_clue/README.md
similarity index 69%
rename from example/Bert_NEZHA_cnwiki/README.md
rename to example/bert_clue/README.md
index cd86b3bdd6..55b200e941 100644
--- a/example/Bert_NEZHA_cnwiki/README.md
+++ b/example/bert_clue/README.md
@@ -4,20 +4,26 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](
 
 ## Requirements
 - Install [MindSpore](https://www.mindspore.cn/install/en).
-- Download the zhwiki dataset from <https://dumps.wikimedia.org/zhwiki> for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wiliextractor). Convert the dataset to TFRecord format and move the files to a specified path.
+- Download the zhwiki dataset from <https://dumps.wikimedia.org/zhwiki> for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wil
+kiextractor). Convert the dataset to TFRecord format and move the files to a specified path.
 - Download the CLUE dataset from <https://www.cluebenchmarks.com> for fine-tuning and evaluation.
 >  Notes:
    If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file.
 
 ## Running the Example
 ### Pre-Training
-- Set options in `config.py`. Make sure the 'DATA_DIR'(path to the dataset) and 'SCHEMA_DIR'(path to the json schema file) are set to your own path. Click [here](https://www.mindspore.cn/tutorial/zh-CN/master/use/data_preparation/loading_the_datasets.html#tfrecord) for more information about dataset and the json schema file.
+- Set options in `config.py`, including lossscale, optimizer and network. Click [here](https://www.mindspore.cn/tutorial/zh-CN/master/use/data_preparation/loading_the_datasets.html#tfrecord) for more information about dataset and the json schema file.
 
-- Run `run_pretrain.py` for pre-training of BERT-base and BERT-NEZHA model.
+- Run `run_standalone_pretrain.sh` for non-distributed pre-training of BERT-base and BERT-NEZHA model.
 
-    ``` bash
-    python run_pretrain.py --backend=ms
+    ``` bash   
+    sh run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_PATH
     ```
+- Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model.
+
+    ``` bash   
+    sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_HCCL_CONFIG_PATH MINDSPORE_PATH
+    ```  
 
 ### Fine-Tuning
 - Set options in `finetune_config.py`. Make sure the 'data_file', 'schema_file' and 'ckpt_file' are set to your own path, set the 'pre_training_ckpt' to save the checkpoint files generated.
@@ -40,30 +46,42 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](
 ## Usage
 ### Pre-Training
 ``` 
-usage: run_pretrain.py [--backend BACKEND]
-
-optional parameters:
-    --backend, BACKEND            MindSpore backend: ms
+usage: run_pretrain.py  [--distribute DISTRIBUTE] [--epoch_size N] [----device_num N] [--device_id N] 
+                        [--enable_task_sink ENABLE_TASK_SINK] [--enable_loop_sink ENABLE_LOOP_SINK]
+                        [--enable_mem_reuse ENABLE_MEM_REUSE] [--enable_save_ckpt ENABLE_SAVE_CKPT]
+                        [--enable_lossscale ENABLE_LOSSSCALE] [--do_shuffle DO_SHUFFLE]
+                        [--enable_data_sink ENABLE_DATA_SINK] [--data_sink_steps N] [--checkpoint_path CHECKPOINT_PATH]
+                        [--save_checkpoint_steps N] [--save_checkpoint_num N] 
+                        [--data_dir DATA_DIR] [--schema_dir SCHEMA_DIR]
+
+options:
+    --distribute               pre_training by serveral devices: "true"(training by more than 1 device) | "false", default is "false"
+    --epoch_size               epoch size: N, default is 1
+    --device_num               number of used devices: N, default is 1
+    --device_id                device id: N, default is 0
+    --enable_task_sink         enable task sink: "true" | "false", default is "true"
+    --enable_loop_sink         enable loop sink: "true" | "false", default is "true"
+    --enable_mem_reuse         enable memory reuse: "true" | "false", default is "true"
+    --enable_save_ckpt         enable save checkpoint: "true" | "false", default is "true"
+    --enable_lossscale         enable lossscale: "true" | "false", default is "true"
+    --do_shuffle               enable shuffle: "true" | "false", default is "true"
+    --enable_data_sink         enable data sink: "true" | "false", default is "true"
+    --data_sink_steps          set data sink steps: N, default is 1
+    --checkpoint_path          path to save checkpoint files: PATH, default is ""
+    --save_checkpoint_steps    steps for saving checkpoint files: N, default is 1000
+    --save_checkpoint_num      number for saving checkpoint files: N, default is 1
+    --data_dir                 path to dataset directory: PATH, default is ""
+    --schema_dir               path to schema.json file, PATH, default is ""
 ```
-
 ## Options and Parameters
 It contains of parameters of BERT model and options for training, which is set in file `config.py`, `finetune_config.py` and `evaluation_config.py` respectively.
 ### Options:
 ```
 Pre-Training:
     bert_network                    version of BERT model: base | large, default is base
-    epoch_size                      repeat counts of training: N, default is 40
-    dataset_sink_mode               use dataset sink mode or not: True | False, default is True
-    do_shuffle                      shuffle the dataset or not: True | False, default is True
-    do_train_with_lossscale         use lossscale or not: True | False, default is True
     loss_scale_value                initial value of loss scale: N, default is 2^32
     scale_factor                    factor used to update loss scale: N, default is 2
-    scale_window                    steps for once updatation of loss scale: N, default is 1000
-    save_checkpoint_steps           steps to save a checkpoint: N, default is 2000
-    keep_checkpoint_max             numbers to save checkpoint: N, default is 1
-    init_ckpt                       checkpoint file to load: PATH, default is ""
-    data_dir                        dataset file to load: PATH, default is "/your/path/cn-wiki-128"
-    schema_dir                      dataset schema file to load: PATH, default is "your/path/datasetSchema.json"
+    scale_window                    steps for once updatation of loss scale: N, default is 1000   
     optimizer                       optimizer used in the network: AdamWerigtDecayDynamicLR | Lamb | Momentum, default is "Lamb"
 
 Fine-Tuning:
diff --git a/example/bert_clue/config.py b/example/bert_clue/config.py
new file mode 100644
index 0000000000..2d49121c50
--- /dev/null
+++ b/example/bert_clue/config.py
@@ -0,0 +1,89 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+network config setting, will be used in dataset.py, run_pretrain.py
+"""
+from easydict import EasyDict as edict
+import mindspore.common.dtype as mstype
+from mindspore.model_zoo.Bert_NEZHA import BertConfig
+cfg = edict({
+    'bert_network': 'base',
+    'loss_scale_value': 2**32,
+    'scale_factor': 2,
+    'scale_window': 1000,
+    'optimizer': 'Lamb',
+    'AdamWeightDecayDynamicLR': edict({
+        'learning_rate': 3e-5,
+        'end_learning_rate': 0.0,
+        'power': 5.0,
+        'weight_decay': 1e-5,
+        'eps': 1e-6,
+    }),
+    'Lamb': edict({
+        'start_learning_rate': 3e-5,
+        'end_learning_rate': 0.0,
+        'power': 10.0,
+        'warmup_steps': 10000,
+        'weight_decay': 0.01,
+        'eps': 1e-6,
+        'decay_filter': lambda x: False,
+    }),
+    'Momentum': edict({
+        'learning_rate': 2e-5,
+        'momentum': 0.9,
+    }),
+})
+if cfg.bert_network == 'base':
+    bert_net_cfg = BertConfig(
+        batch_size=16,
+        seq_length=128,
+        vocab_size=21136,
+        hidden_size=768,
+        num_hidden_layers=12,
+        num_attention_heads=12,
+        intermediate_size=3072,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        max_position_embeddings=512,
+        type_vocab_size=2,
+        initializer_range=0.02,
+        use_relative_positions=False,
+        input_mask_from_dataset=True,
+        token_type_ids_from_dataset=True,
+        dtype=mstype.float32,
+        compute_type=mstype.float16,
+    )
+else:
+    bert_net_cfg = BertConfig(
+        batch_size=16,
+        seq_length=128,
+        vocab_size=21136,
+        hidden_size=1024,
+        num_hidden_layers=12,
+        num_attention_heads=16,
+        intermediate_size=4096,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        max_position_embeddings=512,
+        type_vocab_size=2,
+        initializer_range=0.02,
+        use_relative_positions=True,
+        input_mask_from_dataset=True,
+        token_type_ids_from_dataset=True,
+        dtype=mstype.float32,
+        compute_type=mstype.float16,
+    )
diff --git a/example/bert_clue/dataset.py b/example/bert_clue/dataset.py
new file mode 100644
index 0000000000..671f0dca0f
--- /dev/null
+++ b/example/bert_clue/dataset.py
@@ -0,0 +1,58 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+Data operations, will be used in run_pretrain.py
+"""
+import os
+import mindspore.common.dtype as mstype
+import mindspore.dataset.engine.datasets as de
+import mindspore.dataset.transforms.c_transforms as C
+from mindspore import log as logger
+from config import bert_net_cfg
+
+
+def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", enable_data_sink="true",
+                        data_sink_steps=1, data_dir=None, schema_dir=None):
+    """create train dataset"""
+    # apply repeat operations
+    repeat_count = epoch_size
+    files = os.listdir(data_dir)
+    data_files = []
+    for file_name in files:
+        data_files.append(data_dir+file_name)
+    ds = de.TFRecordDataset(data_files, schema_dir,
+                            columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
+                                          "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
+                            shuffle=(do_shuffle == "true"), num_shards=device_num, shard_id=rank,
+                            shard_equal_rows=True)
+    ori_dataset_size = ds.get_dataset_size()
+    new_size = ori_dataset_size
+    if enable_data_sink == "true":
+        new_size = data_sink_steps * bert_net_cfg.batch_size
+    ds.set_dataset_size(new_size)
+    repeat_count = int(repeat_count * ori_dataset_size // ds.get_dataset_size())
+    type_cast_op = C.TypeCast(mstype.int32)
+    ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
+    ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
+    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
+    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
+    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
+    # apply batch operations
+    ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
+    ds = ds.repeat(repeat_count)
+    logger.info("data size: {}".format(ds.get_dataset_size()))
+    logger.info("repeatcount: {}".format(ds.get_repeat_count()))
+    return ds
diff --git a/example/bert_clue/run_distribute_pretrain.sh b/example/bert_clue/run_distribute_pretrain.sh
new file mode 100644
index 0000000000..93d68d8e9d
--- /dev/null
+++ b/example/bert_clue/run_distribute_pretrain.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the scipt as: "
+echo "sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_HCCL_CONFIG_PATH MINDSPORE_PATH"
+echo "for example: sh run_distribute_pretrain.sh 8 40 /path/zh-wiki/ /path/Schema.json /path/hccl.json /path/mindspore"
+echo "It is better to use absolute path."
+echo "=============================================================================================================="
+
+EPOCH_SIZE=$2
+DATA_DIR=$3
+SCHEMA_DIR=$4
+MINDSPORE_PATH=$6
+
+export PYTHONPATH=$MINDSPORE_PATH/build/package:$PYTHONPATH
+export MINDSPORE_HCCL_CONFIG_PATH=$5
+export RANK_SIZE=$1
+
+for((i=0;i<RANK_SIZE;i++))
+do
+    export DEVICE_ID=$i
+    start=`expr $i \* 12`
+    end=`expr $start \+ 11`
+    cmdopt=$start"-"$end
+
+    rm -rf LOG$i
+    mkdir ./LOG$i
+    cp  *.py ./LOG$i
+    cd ./LOG$i || exit
+    export RANK_ID=$i
+    echo "start training for rank $i, device $DEVICE_ID"
+    env > env.log
+    taskset -c $cmdopt python ../run_pretrain.py  \
+    --distribute="true" \
+    --epoch_size=$EPOCH_SIZE \
+    --device_id=$DEVICE_ID \
+    --device_num=$RANK_SIZE \
+    --enable_task_sink="true" \
+    --enable_loop_sink="true" \
+    --enable_mem_reuse="true" \
+    --enable_save_ckpt="true" \
+    --enable_lossscale="true" \
+    --do_shuffle="true" \
+    --enable_data_sink="true" \
+    --data_sink_steps=1 \
+    --checkpoint_path="" \
+    --save_checkpoint_steps=1000 \
+    --save_checkpoint_num=1 \
+    --data_dir=$DATA_DIR \
+    --schema_dir=$SCHEMA_DIR > log.txt 2>&1 &
+    cd ../
+done
diff --git a/example/bert_clue/run_pretrain.py b/example/bert_clue/run_pretrain.py
new file mode 100644
index 0000000000..25c78e08d8
--- /dev/null
+++ b/example/bert_clue/run_pretrain.py
@@ -0,0 +1,144 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+#################pre_train bert example on zh-wiki########################
+python run_pretrain.py
+"""
+
+import os
+import argparse
+import mindspore.communication.management as D
+from mindspore import context
+from mindspore.train.model import Model
+from mindspore.train.parallel_utils import ParallelMode
+from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
+from mindspore.train.callback import Callback, ModelCheckpoint, CheckpointConfig
+from mindspore.model_zoo.Bert_NEZHA import BertNetworkWithLoss, BertTrainOneStepCell, BertTrainOneStepWithLossScaleCell
+from mindspore.nn.optim import Lamb, Momentum, AdamWeightDecayDynamicLR
+from dataset import create_bert_dataset
+from config import cfg, bert_net_cfg
+_current_dir = os.path.dirname(os.path.realpath(__file__))
+
+class LossCallBack(Callback):
+    """
+    Monitor the loss in training.
+    If the loss in NAN or INF terminating training.
+    Note:
+        if per_print_times is 0 do not print loss.
+    Args:
+        per_print_times (int): Print loss every times. Default: 1.
+    """
+    def __init__(self, per_print_times=1):
+        super(LossCallBack, self).__init__()
+        if not isinstance(per_print_times, int) or per_print_times < 0:
+            raise ValueError("print_step must be int and >= 0")
+        self._per_print_times = per_print_times
+    def step_end(self, run_context):
+        cb_params = run_context.original_args()
+        with open("./loss.log", "a+") as f:
+            f.write("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
+                                                                 str(cb_params.net_outputs)))
+            f.write('\n')
+
+def run_pretrain():
+    """pre-train bert_clue"""
+    parser = argparse.ArgumentParser(description='bert pre_training')
+    parser.add_argument("--distribute", type=str, default="false", help="Run distribute, default is false.")
+    parser.add_argument("--epoch_size", type=int, default="1", help="Epoch size, default is 1.")
+    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
+    parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
+    parser.add_argument("--enable_task_sink", type=str, default="true", help="Enable task sink, default is true.")
+    parser.add_argument("--enable_loop_sink", type=str, default="true", help="Enable loop sink, default is true.")
+    parser.add_argument("--enable_mem_reuse", type=str, default="true", help="Enable mem reuse, default is true.")
+    parser.add_argument("--enable_save_ckpt", type=str, default="true", help="Enable save checkpoint, default is true.")
+    parser.add_argument("--enable_lossscale", type=str, default="true", help="Use lossscale or not, default is not.")
+    parser.add_argument("--do_shuffle", type=str, default="true", help="Enable shuffle for dataset, default is true.")
+    parser.add_argument("--enable_data_sink", type=str, default="true", help="Enable data sink, default is true.")
+    parser.add_argument("--data_sink_steps", type=int, default="1", help="Sink steps for each epoch, default is 1.")
+    parser.add_argument("--checkpoint_path", type=str, default="", help="Checkpoint file path")
+    parser.add_argument("--save_checkpoint_steps", type=int, default=1000, help="Save checkpoint steps, "
+                                                                                "default is 1000.")
+    parser.add_argument("--save_checkpoint_num", type=int, default=1, help="Save checkpoint numbers, default is 1.")
+    parser.add_argument("--data_dir", type=str, default="", help="Data path, it is better to use absolute path")
+    parser.add_argument("--schema_dir", type=str, default="", help="Schema path, it is better to use absolute path")
+
+    args_opt = parser.parse_args()
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
+    context.set_context(enable_task_sink=(args_opt.enable_task_sink == "true"),
+                        enable_loop_sink=(args_opt.enable_loop_sink == "true"),
+                        enable_mem_reuse=(args_opt.enable_mem_reuse == "true"))
+    context.set_context(reserve_class_name_in_scope=False)
+
+    if args_opt.distribute == "true":
+        device_num = args_opt.device_num
+        context.reset_auto_parallel_context()
+        context.set_context(enable_hccl=True)
+        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
+                                          device_num=device_num)
+        D.init()
+        rank = args_opt.device_id % device_num
+    else:
+        context.set_context(enable_hccl=False)
+        rank = 0
+        device_num = 1
+
+    ds = create_bert_dataset(args_opt.epoch_size, device_num, rank, args_opt.do_shuffle, args_opt.enable_data_sink,
+                             args_opt.data_sink_steps, args_opt.data_dir, args_opt.schema_dir)
+
+    netwithloss = BertNetworkWithLoss(bert_net_cfg, True)
+
+    if cfg.optimizer == 'Lamb':
+        optimizer = Lamb(netwithloss.trainable_params(), decay_steps=ds.get_dataset_size() * ds.get_repeat_count(),
+                         start_learning_rate=cfg.Lamb.start_learning_rate, end_learning_rate=cfg.Lamb.end_learning_rate,
+                         power=cfg.Lamb.power, warmup_steps=cfg.Lamb.warmup_steps, weight_decay=cfg.Lamb.weight_decay,
+                         eps=cfg.Lamb.eps, decay_filter=cfg.Lamb.decay_filter)
+    elif cfg.optimizer == 'Momentum':
+        optimizer = Momentum(netwithloss.trainable_params(), learning_rate=cfg.Momentum.learning_rate,
+                             momentum=cfg.Momentum.momentum)
+    elif cfg.optimizer == 'AdamWeightDecayDynamicLR':
+        optimizer = AdamWeightDecayDynamicLR(netwithloss.trainable_params(),
+                                             decay_steps=ds.get_dataset_size() * ds.get_repeat_count(),
+                                             learning_rate=cfg.AdamWeightDecayDynamicLR.learning_rate,
+                                             end_learning_rate=cfg.AdamWeightDecayDynamicLR.end_learning_rate,
+                                             power=cfg.AdamWeightDecayDynamicLR.power,
+                                             weight_decay=cfg.AdamWeightDecayDynamicLR.weight_decay,
+                                             eps=cfg.AdamWeightDecayDynamicLR.eps)
+    else:
+        raise ValueError("Don't support optimizer {}, only support [Lamb, Momentum, AdamWeightDecayDynamicLR]".
+                         format(cfg.optimizer))
+    callback = [LossCallBack()]
+    if args_opt.enable_save_ckpt == "true":
+        config_ck = CheckpointConfig(save_checkpoint_steps=args_opt.save_checkpoint_steps,
+                                     keep_checkpoint_max=args_opt.save_checkpoint_num)
+        ckpoint_cb = ModelCheckpoint(prefix='checkpoint_bert', config=config_ck)
+        callback.append(ckpoint_cb)
+
+    if args_opt.checkpoint_path:
+        param_dict = load_checkpoint(args_opt.checkpoint_path)
+        load_param_into_net(netwithloss, param_dict)
+
+    if args_opt.enable_lossscale == "true":
+        update_cell = DynamicLossScaleUpdateCell(loss_scale_value=cfg.loss_scale_value,
+                                                 scale_factor=cfg.scale_factor,
+                                                 scale_window=cfg.scale_window)
+        netwithgrads = BertTrainOneStepWithLossScaleCell(netwithloss, optimizer=optimizer,
+                                                         scale_update_cell=update_cell)
+    else:
+        netwithgrads = BertTrainOneStepCell(netwithloss, optimizer=optimizer)
+
+    model = Model(netwithgrads)
+    model.train(ds.get_repeat_count(), ds, callbacks=callback, dataset_sink_mode=(args_opt.enable_data_sink == "true"))
+if __name__ == '__main__':
+    run_pretrain()
diff --git a/example/bert_clue/run_standalone_pretrain.sh b/example/bert_clue/run_standalone_pretrain.sh
new file mode 100644
index 0000000000..aeffca7b04
--- /dev/null
+++ b/example/bert_clue/run_standalone_pretrain.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the scipt as: "
+echo "sh run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_PATH"
+echo "for example: sh run_standalone_pretrain.sh 0 40 /path/zh-wiki/ /path/Schema.json /path/mindspore"
+echo "=============================================================================================================="
+
+DEVICE_ID=$1
+EPOCH_SIZE=$2
+DATA_DIR=$3
+SCHEMA_DIR=$4
+MINDSPORE_PATH=$5
+export PYTHONPATH=$MINDSPORE_PATH/build/package:$PYTHONPATH
+
+python run_pretrain.py  \
+    --distribute="false" \
+    --epoch_size=$EPOCH_SIZE \
+    --device_id=$DEVICE_ID \
+    --enable_task_sink="true" \
+    --enable_loop_sink="true" \
+    --enable_mem_reuse="true" \
+    --enable_save_ckpt="true" \
+    --enable_lossscale="true" \
+    --do_shuffle="true" \
+    --enable_data_sink="true" \
+    --data_sink_steps=1 \
+    --checkpoint_path="" \
+    --save_checkpoint_steps=1000 \
+    --save_checkpoint_num=1 \
+    --data_dir=$DATA_DIR \
+    --schema_dir=$SCHEMA_DIR > log.txt 2>&1 &

From e64c755ad69a41f48d899a709e2bc196dcc34d82 Mon Sep 17 00:00:00 2001
From: kpy <kuangpeiyu@huawei.com>
Date: Fri, 24 Apr 2020 17:51:55 +0800
Subject: [PATCH 123/242] change tensor equal bug

---
 mindspore/ccsrc/ir/meta_tensor.cc |  9 ---------
 mindspore/ccsrc/ir/meta_tensor.h  |  3 ---
 mindspore/common/tensor.py        | 11 +++++++++++
 mindspore/ops/functional.py       |  2 ++
 tests/vm_impl/math_ops_vm_impl.py | 12 ++++++------
 5 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/mindspore/ccsrc/ir/meta_tensor.cc b/mindspore/ccsrc/ir/meta_tensor.cc
index fe41abcef4..af6b4f7ffc 100644
--- a/mindspore/ccsrc/ir/meta_tensor.cc
+++ b/mindspore/ccsrc/ir/meta_tensor.cc
@@ -185,14 +185,6 @@ bool Tensor::operator==(const Tensor &tensor) const {
   return (MetaTensor::operator==(tensor) && data_ == tensor.data_);
 }
 
-bool Tensor::ValueEqualPy(const py::object &other) const {
-  if (!py::isinstance<Tensor>(other)) {
-    MS_LOG(WARNING) << "compare other not a tensor";
-    return false;
-  }
-  return ValueEqual(py::cast<Tensor>(other));
-}
-
 bool Tensor::ValueEqual(const Tensor &other) const {
   auto equal = [&other, this]() -> bool {
     auto np = py::module::import("numpy");
@@ -542,7 +534,6 @@ REGISTER_PYBIND_DEFINE(Tensor, ([](const py::module *m) {
                              )mydelimiter")
                            .def("__str__", &Tensor::ToString)
                            .def("__repr__", &Tensor::ToStringRepr)
-                           .def("__eq__", &Tensor::ValueEqualPy)
                            .def(py::pickle(
                              [](const Tensor &t) {  // __getstate__
                                /* Return a tuple that fully encodes the state of the object */
diff --git a/mindspore/ccsrc/ir/meta_tensor.h b/mindspore/ccsrc/ir/meta_tensor.h
index 1f6c866f11..ff76a1d4f9 100644
--- a/mindspore/ccsrc/ir/meta_tensor.h
+++ b/mindspore/ccsrc/ir/meta_tensor.h
@@ -329,9 +329,6 @@ class Tensor : public MetaTensor {
   // It is different from 'operator==' which just compare shape/type/address, it do real value comparison.
   bool ValueEqual(const Tensor &other) const;
 
-  // It is different from 'operator==' which just compare shape/type/address, it do real value comparison.
-  bool ValueEqualPy(const py::object &other) const;
-
   bool operator==(const Value &other) const override {
     if (other.isa<Tensor>()) {
       auto other_ = static_cast<const Tensor &>(other);
diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py
index 70b8b169ca..5504f2b483 100644
--- a/mindspore/common/tensor.py
+++ b/mindspore/common/tensor.py
@@ -74,6 +74,17 @@ class Tensor(Tensor_):
         out = tensor_operator_registry.get('__add__')(self, other)
         return out
 
+    def __eq__(self, other):
+        if not isinstance(other, Tensor):
+            return False
+        x = self.asnumpy()
+        y = other.asnumpy()
+        out = np.equal(x, y)
+        return Tensor(np.array(out))
+
+    def __hash__(self):
+        return hash(id(self))
+
     def __mul__(self, other):
         check_type('tensor input_data', other, (Tensor, float, int))
         out = tensor_operator_registry.get('__mul__')(self, other)
diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py
index 4135133e85..a2473fe709 100644
--- a/mindspore/ops/functional.py
+++ b/mindspore/ops/functional.py
@@ -144,3 +144,5 @@ stop_gradient = Primitive("stop_gradient")
 tensor_operator_registry.register('__add__', tensor_add)
 tensor_operator_registry.register('__mul__', tensor_mul)
 tensor_operator_registry.register('__div__', tensor_div)
+#ms cannot support Tensor(True) compare
+tensor_operator_registry.register('__eq__', equal)
diff --git a/tests/vm_impl/math_ops_vm_impl.py b/tests/vm_impl/math_ops_vm_impl.py
index 01df0b824e..e42ba92d5e 100644
--- a/tests/vm_impl/math_ops_vm_impl.py
+++ b/tests/vm_impl/math_ops_vm_impl.py
@@ -172,7 +172,7 @@ def vm_impl_equal(self):
         x = x.asnumpy()
         y = y.asnumpy()
         out = vm.equal(x, y)
-        return Tensor(out)
+        return Tensor(np.array(out))
     return vm_impl
 
 
@@ -183,7 +183,7 @@ def vm_impl_not_equal(self):
         x = x.asnumpy()
         y = y.asnumpy()
         out = vm.not_equal(x, y)
-        return Tensor(out)
+        return Tensor(np.array(out))
     return vm_impl
 
 
@@ -194,7 +194,7 @@ def vm_impl_greater(self):
         x = x.asnumpy()
         y = y.asnumpy()
         out = vm.greater(x, y)
-        return Tensor(out)
+        return Tensor(np.array(out))
     return vm_impl
 
 @vm_impl_getters.register(P.Maximum)
@@ -219,17 +219,17 @@ def vm_impl_minimum(self):
     return vm_impl
 
 @vm_impl_getters.register(P.Less)
-def vm_impl_greater(self):
+def vm_impl_less(self):
     """Generate vm_impl function for Less"""
     def vm_impl(x, y):
         x = x.asnumpy()
         y = y.asnumpy()
         out = vm.less(x, y)
-        return Tensor(out)
+        return Tensor(np.array(out))
     return vm_impl
 
 @vm_impl_getters.register(P.ScalarCast)
-def vm_impl_greater(self):
+def vm_impl_scalar_cast(self):
     """Generate vm_impl function for ScalarCast"""
     def vm_impl(x, t):
         np_type = dtype_to_nptype(t)

From ab04b3dc4bcffdb71e97c03a2c133979836696ae Mon Sep 17 00:00:00 2001
From: wenkai <wenkai8@huawei.com>
Date: Mon, 27 Apr 2020 17:15:21 +0800
Subject: [PATCH 124/242] fix np.histograms(bins='auto') sometimes calc very
 small width and very large bucket number, which lead to error/long compute
 time.

---
 mindspore/train/summary/_summary_adapter.py   | 34 ++++++++++++++++++-
 .../train/summary/test_histogram_summary.py   |  3 +-
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/mindspore/train/summary/_summary_adapter.py b/mindspore/train/summary/_summary_adapter.py
index 7db80de693..9669d0f054 100644
--- a/mindspore/train/summary/_summary_adapter.py
+++ b/mindspore/train/summary/_summary_adapter.py
@@ -15,6 +15,7 @@
 """Generate the summary event which conform to proto format."""
 import time
 import socket
+import math
 from enum import Enum, unique
 import numpy as np
 from PIL import Image
@@ -292,6 +293,36 @@ def _get_tensor_summary(tag: str, np_value, summary_tensor):
     return summary_tensor
 
 
+def _calc_histogram_bins(count):
+    """
+    Calculates experience-based optimal bins number for histogram.
+
+    There should be enough number in each bin. So we calc bin numbers according to count. For very small count(1 -
+    10), we assign carefully chosen number. For large count, we tried to make sure there are 9-10 numbers in each
+    bucket on average. Too many bins will slow down performance, so we set max number of bins to 90.
+
+    Args:
+        count (int): Valid number count for the tensor.
+
+    Returns:
+        int, number of histogram bins.
+    """
+    number_per_bucket = 10
+    max_bins = 90
+
+    if not count:
+        return 1
+    if count <= 5:
+        return 2
+    if count <= 10:
+        return 3
+    if count <= 880:
+        # note that math.ceil(881/10) + 1 equals 90
+        return int(math.ceil(count / number_per_bucket) + 1)
+
+    return max_bins
+
+
 def _fill_histogram_summary(tag: str, np_value: np.array, summary_histogram) -> None:
     """
     Package the histogram summary.
@@ -347,7 +378,8 @@ def _fill_histogram_summary(tag: str, np_value: np.array, summary_histogram) ->
 
         return
 
-    counts, edges = np.histogram(np_value, bins='auto', range=(tensor_min, tensor_max))
+    bin_number = _calc_histogram_bins(masked_value.count())
+    counts, edges = np.histogram(np_value, bins=bin_number, range=(tensor_min, tensor_max))
 
     for ind, count in enumerate(counts):
         bucket = summary_histogram.buckets.add()
diff --git a/tests/ut/python/train/summary/test_histogram_summary.py b/tests/ut/python/train/summary/test_histogram_summary.py
index 50204cd757..53c62990b1 100644
--- a/tests/ut/python/train/summary/test_histogram_summary.py
+++ b/tests/ut/python/train/summary/test_histogram_summary.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from mindspore.common.tensor import Tensor
 from mindspore.train.summary.summary_record import SummaryRecord, _cache_summary_tensor_data
+from mindspore.train.summary._summary_adapter import _calc_histogram_bins
 from .summary_reader import SummaryReader
 
 CUR_DIR = os.getcwd()
@@ -139,7 +140,7 @@ def test_histogram_summary_same_value():
         event = reader.read_event()
         LOG.debug(event)
 
-        assert len(event.summary.value[0].histogram.buckets) == 1
+        assert len(event.summary.value[0].histogram.buckets) == _calc_histogram_bins(dim1 * dim2)
 
 
 def test_histogram_summary_high_dims():

From c046874b0389839edf911af618f3d7dabbf30d5b Mon Sep 17 00:00:00 2001
From: zhouyuanshen <zhouyuanshen@huawei.com>
Date: Mon, 27 Apr 2020 10:08:09 +0800
Subject: [PATCH 125/242] fix bug in infer_dtype function of hcom operations

---
 mindspore/ops/operations/comm_ops.py       | 17 ++++++++++-------
 tests/ut/python/communication/test_comm.py |  2 +-
 tests/ut/python/parallel/test_bool_grad.py |  5 +++--
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/mindspore/ops/operations/comm_ops.py b/mindspore/ops/operations/comm_ops.py
index 969091de97..5fb5f3ed95 100644
--- a/mindspore/ops/operations/comm_ops.py
+++ b/mindspore/ops/operations/comm_ops.py
@@ -45,7 +45,6 @@ class AllReduce(PrimitiveWithInfer):
 
     Note:
         The operation of AllReduce does not support "prod" currently.
-        The input of AllReduce does not support dtype "Bool".
         Tensor must have same shape and format in all processes participating in the collective.
 
     Args:
@@ -103,7 +102,7 @@ class AllReduce(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, x_dtype):
-        if x_dtype == mstype.bool_:
+        if x_dtype.element_type() == mstype.bool_:
             raise TypeError("AllReduce does not support 'Bool' as the dtype of input!")
         return x_dtype
 
@@ -161,7 +160,7 @@ class AllGather(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, x_dtype):
-        if x_dtype == mstype.bool_:
+        if x_dtype.element_type() == mstype.bool_:
             raise TypeError(f"{self.name} does not support 'Bool' as the dtype of input!")
         return x_dtype
 
@@ -176,6 +175,7 @@ class ReduceScatter(PrimitiveWithInfer):
     Note:
         The back propagation of the op is not surported yet. Stay tuned for more.
         Tensor must have the same shape and format in all processes participating in the collective.
+
     Args:
         op (str): Specifies an operation used for element-wise reductions,
                   like sum, max, avg. Default: ReduceOp.SUM.
@@ -218,7 +218,7 @@ class ReduceScatter(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, x_dtype):
-        if x_dtype == mstype.bool_:
+        if x_dtype.element_type() == mstype.bool_:
             raise TypeError(f"{self.name} does not support 'Bool' as the dtype of input!")
         return x_dtype
 
@@ -275,8 +275,11 @@ class Broadcast(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, x_dtype):
-        if x_dtype == mstype.bool_:
-            raise TypeError(f"{self.name} does not support 'Bool' as the dtype of input!")
+        if not isinstance(x_dtype, tuple):
+            raise TypeError(f"{self.name}'s input should be a tuple!")
+        for _ele in x_dtype:
+            if _ele.element_type() == mstype.bool_:
+                raise TypeError(f"{self.name} does not support 'Bool' as the dtype of input!")
         return x_dtype
 
 
@@ -318,7 +321,7 @@ class _AlltoAll(PrimitiveWithInfer):
         return x_shape
 
     def infer_dtype(self, x_dtype):
-        if x_dtype == mstype.bool_:
+        if x_dtype.element_type() == mstype.bool_:
             raise TypeError(f"{self.name} does not support 'Bool' as the dtype of input!")
         return x_dtype
 
diff --git a/tests/ut/python/communication/test_comm.py b/tests/ut/python/communication/test_comm.py
index 38fd7199fd..885c8fa9e3 100644
--- a/tests/ut/python/communication/test_comm.py
+++ b/tests/ut/python/communication/test_comm.py
@@ -55,7 +55,7 @@ class BroadCastNet(nn.Cell):
         self.broadcast = Broadcast(0)
 
     def construct(self, x):
-        x = self.broadcast((x))
+        x, = self.broadcast((x,))
         x = self.dense(x)
         return x
 
diff --git a/tests/ut/python/parallel/test_bool_grad.py b/tests/ut/python/parallel/test_bool_grad.py
index f3cdfc8030..491707103b 100644
--- a/tests/ut/python/parallel/test_bool_grad.py
+++ b/tests/ut/python/parallel/test_bool_grad.py
@@ -52,7 +52,7 @@ class CommonNet(nn.Cell):
     def __init__(self):
         super(CommonNet, self).__init__()
         self.weight = Parameter(Tensor(np.ones([256, 64]), dtype=ms.float32), name="mul_weight")
-        self.logicalnot = P.LogicalNot().set_strategy(((4,1),))
+        self.logicalnot = P.LogicalNot().set_strategy(((4,2),))
         self.equal = P.Equal().set_strategy(((4,2),(4,2)))
 
     def construct(self, x, label):
@@ -78,4 +78,5 @@ def common_net():
 
 
 def test_bool_grad():
-    common_net()
\ No newline at end of file
+    common_net()
+

From 8f48db291a6524b891ed5420f48b5c80ec493830 Mon Sep 17 00:00:00 2001
From: liubuyu <liubuyu1@huawei.com>
Date: Mon, 27 Apr 2020 18:26:57 +0800
Subject: [PATCH 126/242] refresh parameter format

---
 .../ascend/ascend_backend_optimization.cc     |  2 +
 .../ir_fusion/refresh_parameter_format.cc     | 71 +++++++++++++++++++
 .../ir_fusion/refresh_parameter_format.h      | 40 +++++++++++
 3 files changed, 113 insertions(+)
 create mode 100644 mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
 create mode 100644 mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h

diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
index ead48a7059..28b57359c5 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@@ -38,6 +38,7 @@
 #include "pre_activate/ascend/ir_fusion/adam_apply_one_fusion.h"
 #include "pre_activate/ascend/ir_fusion/adam_apply_one_with_decay_rule.h"
 #include "pre_activate/ascend/ir_fusion/parameter_and_transop_fusion.h"
+#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h"
 #include "pre_activate/ascend/ir_fusion/transpose_transdata_fusion.h"
 #include "pre_activate/ascend/ir_fusion/transdata_split.h"
 #include "pre_activate/ascend/ir_fission/topk_split.h"
@@ -267,6 +268,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
   other_pm->AddPass(std::make_shared<AllReduceFusion>());
   other_pm->AddPass(std::make_shared<AllGatherFusion>());
   other_pm->AddPass(std::make_shared<ParameterTransOpFusion>());
+  other_pm->AddPass(std::make_shared<RefreshParameterFormat>());
   other_pm->AddPass(std::make_shared<BufferFusion>());
   other_pm->AddPass(std::make_shared<GetitemTuple>());
   other_pm->AddPass(std::make_shared<CommonSubexpressionElimination>());
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
new file mode 100644
index 0000000000..857670a384
--- /dev/null
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.cc
@@ -0,0 +1,71 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pre_activate/ascend/ir_fusion/refresh_parameter_format.h"
+#include "session/anf_runtime_algorithm.h"
+#include "utils/utils.h"
+#include "operator/ops.h"
+#include "device/kernel_info.h"
+#include "pre_activate/common/helper.h"
+#include "pre_activate/common/optimizer.h"
+#include "pre_activate/ascend/ascend_helper.h"
+
+namespace mindspore {
+namespace opt {
+void DoRefresh(const CNodePtr &cnode) {
+  if (cnode == nullptr) {
+    MS_LOG(EXCEPTION) << "node is nullptr";
+  }
+  for (size_t input_index = 0; input_index < AnfAlgo::GetInputTensorNum(cnode); input_index++) {
+    auto input_kernel_node = AnfAlgo::GetInputNode(cnode, input_index);
+    if (input_kernel_node->isa<Parameter>()) {
+      std::shared_ptr<kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder =
+        std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
+      auto cnode_input_format = AnfAlgo::GetInputFormat(cnode, input_index);
+      auto kernel_node_format = AnfAlgo::GetOutputFormat(input_kernel_node, 0);
+      auto dtype = AnfAlgo::GetOutputDeviceDataType(input_kernel_node, 0);
+      if (kernel_node_format != cnode_input_format) {
+        builder->SetOutputsFormat({cnode_input_format});
+        builder->SetOutputsDeviceType({dtype});
+        AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), input_kernel_node.get());
+      }
+    }
+  }
+}
+
+bool RefreshParameterFormat::Run(const FuncGraphPtr &func_graph) {
+  if (func_graph == nullptr) {
+    MS_LOG(ERROR) << "func_graph is nullptr.";
+    return false;
+  }
+  std::vector<AnfNodePtr> node_list = TopoSort(func_graph->get_return());
+  for (auto node : node_list) {
+    if (node == nullptr || !node->isa<CNode>()) {
+      continue;
+    }
+    auto cnode = node->cast<CNodePtr>();
+    if (cnode == nullptr) {
+      continue;
+    }
+    auto node_name = AnfAlgo::GetCNodeName(cnode);
+    if (node_name == kBNTrainingUpdateOpName) {
+      DoRefresh(cnode);
+    }
+  }
+  return true;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h
new file mode 100644
index 0000000000..0ba688b134
--- /dev/null
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/refresh_parameter_format.h
@@ -0,0 +1,40 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_
+
+#include <vector>
+#include <memory>
+#include <utility>
+#include "ir/anf.h"
+#include "pre_activate/common/pass.h"
+
+namespace mindspore {
+namespace opt {
+class RefreshParameterFormat : public Pass {
+ public:
+  explicit RefreshParameterFormat(size_t groups = 1) : Pass("refresh_parameter_format"), groups_(groups) {}
+  ~RefreshParameterFormat() override = default;
+  bool Run(const FuncGraphPtr &graph) override;
+
+ private:
+  size_t groups_ = 1;
+};
+}  // namespace opt
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FUSION_REFRESH_PARAMETER_FORMAT_H_

From 09b2dcb3fbf2a7ca8cda5deaa364b15e2ac5d305 Mon Sep 17 00:00:00 2001
From: wandongdong <wandongdong1@huawei.com>
Date: Fri, 24 Apr 2020 21:15:13 +0800
Subject: [PATCH 127/242] add mobilenetv2

---
 example/mobilenetv2_imagenet2012/README.md    | 101 +++++++
 example/mobilenetv2_imagenet2012/config.py    |  35 +++
 example/mobilenetv2_imagenet2012/dataset.py   |  84 ++++++
 example/mobilenetv2_imagenet2012/eval.py      |  56 ++++
 example/mobilenetv2_imagenet2012/launch.py    | 150 +++++++++
 .../mobilenetv2_imagenet2012/lr_generator.py  |  54 ++++
 example/mobilenetv2_imagenet2012/run_infer.sh |  33 ++
 example/mobilenetv2_imagenet2012/run_train.sh |  33 ++
 example/mobilenetv2_imagenet2012/train.py     | 149 +++++++++
 mindspore/model_zoo/mobilenet.py              | 284 ++++++++++++++++++
 10 files changed, 979 insertions(+)
 create mode 100644 example/mobilenetv2_imagenet2012/README.md
 create mode 100644 example/mobilenetv2_imagenet2012/config.py
 create mode 100644 example/mobilenetv2_imagenet2012/dataset.py
 create mode 100644 example/mobilenetv2_imagenet2012/eval.py
 create mode 100644 example/mobilenetv2_imagenet2012/launch.py
 create mode 100644 example/mobilenetv2_imagenet2012/lr_generator.py
 create mode 100644 example/mobilenetv2_imagenet2012/run_infer.sh
 create mode 100644 example/mobilenetv2_imagenet2012/run_train.sh
 create mode 100644 example/mobilenetv2_imagenet2012/train.py
 create mode 100644 mindspore/model_zoo/mobilenet.py

diff --git a/example/mobilenetv2_imagenet2012/README.md b/example/mobilenetv2_imagenet2012/README.md
new file mode 100644
index 0000000000..bb5288908d
--- /dev/null
+++ b/example/mobilenetv2_imagenet2012/README.md
@@ -0,0 +1,101 @@
+# MobileNetV2 Example
+
+## Description
+
+This is an example of training MobileNetV2 with ImageNet2012 dataset in MindSpore. 
+
+## Requirements
+
+* Install [MindSpore](https://www.mindspore.cn/install/en). 
+
+* Download the dataset [ImageNet2012](http://www.image-net.org/). 
+
+> Unzip the ImageNet2012 dataset to any path you want and the folder structure should be as follows:
+> ```
+> .  
+> ├── train  # train dataset
+> └── val   # infer dataset
+> ```
+
+## Example structure
+
+``` shell
+.
+├── config.py               # parameter configuration
+├── dataset.py              # data preprocessing
+├── eval.py                 # infer script
+├── launch.py               # launcher for distributed training
+├── lr_generator.py         # generate learning rate for each step
+├── run_infer.sh            # launch infering
+├── run_train.sh            # launch training
+└── train.py                # train script
+```
+
+## Parameter configuration
+
+Parameters for both training and inference can be set in 'config.py'. 
+
+``` 
+"num_classes": 1000,                    # dataset class num
+"image_height": 224,                    # image height
+"image_width": 224,                     # image width
+"batch_size": 256,                      # training or infering batch size
+"epoch_size": 200,                      # total training epochs, including warmup_epochs
+"warmup_epochs": 4,                     # warmup epochs
+"lr": 0.4,                              # base learning rate
+"momentum": 0.9,                        # momentum
+"weight_decay": 4e-5,                   # weight decay
+"loss_scale": 1024,                     # loss scale
+"save_checkpoint": True,                # whether save checkpoint
+"save_checkpoint_epochs": 1,            # the epoch interval between two checkpoints
+"keep_checkpoint_max": 200,             # only keep the last keep_checkpoint_max checkpoint
+"save_checkpoint_path": "./checkpoint"  # path to save checkpoint
+```
+
+## Running the example
+
+### Train
+
+#### Usage
+Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
+
+#### Launch
+
+``` 
+# training example
+sh run_train.sh 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet
+```
+
+#### Result
+
+Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log  will be redirected to `./train/train.log` like followings. 
+
+``` 
+epoch: [  0/200], step:[  624/  625], loss:[5.258/5.258], time:[140412.236], lr:[0.100]
+epoch time: 140522.500, per step time: 224.836, avg loss: 5.258
+epoch: [  1/200], step:[  624/  625], loss:[3.917/3.917], time:[138221.250], lr:[0.200]
+epoch time: 138331.250, per step time: 221.330, avg loss: 3.917
+```
+
+### Infer
+
+#### Usage
+
+Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]
+
+#### Launch
+
+``` 
+# infer example
+sh run_infer.sh ~/imagenet ~/train/mobilenet-200_625.ckpt
+```
+
+> checkpoint can be produced in training process. 
+
+#### Result
+
+Inference result will be stored in the example path, you can find result like the followings in `val.log`. 
+
+``` 
+result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt
+```
diff --git a/example/mobilenetv2_imagenet2012/config.py b/example/mobilenetv2_imagenet2012/config.py
new file mode 100644
index 0000000000..32df4eabc9
--- /dev/null
+++ b/example/mobilenetv2_imagenet2012/config.py
@@ -0,0 +1,35 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+network config setting, will be used in train.py and eval.py
+"""
+from easydict import EasyDict as ed
+
+config = ed({
+    "num_classes": 1000,
+    "image_height": 224,
+    "image_width": 224,
+    "batch_size": 256,
+    "epoch_size": 200,
+    "warmup_epochs": 4,
+    "lr": 0.4,
+    "momentum": 0.9,
+    "weight_decay": 4e-5,
+    "loss_scale": 1024,
+    "save_checkpoint": True,
+    "save_checkpoint_epochs": 1,
+    "keep_checkpoint_max": 200,
+    "save_checkpoint_path": "./checkpoint",
+})
diff --git a/example/mobilenetv2_imagenet2012/dataset.py b/example/mobilenetv2_imagenet2012/dataset.py
new file mode 100644
index 0000000000..9df34d51dc
--- /dev/null
+++ b/example/mobilenetv2_imagenet2012/dataset.py
@@ -0,0 +1,84 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+create train or eval dataset.
+"""
+import os
+import mindspore.common.dtype as mstype
+import mindspore.dataset.engine as de
+import mindspore.dataset.transforms.vision.c_transforms as C
+import mindspore.dataset.transforms.c_transforms as C2
+from config import config
+
+
+def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
+    """
+    create a train or eval dataset
+
+    Args:
+        dataset_path(string): the path of dataset.
+        do_train(bool): whether dataset is used for train or eval.
+        repeat_num(int): the repeat times of dataset. Default: 1
+        batch_size(int): the batch size of dataset. Default: 32
+
+    Returns:
+        dataset
+    """
+    rank_size = int(os.getenv("RANK_SIZE"))
+    rank_id = int(os.getenv("RANK_ID"))
+
+    if rank_size == 1:
+        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True)
+    else:
+        ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=16, shuffle=True,
+                                     num_shards=rank_size, shard_id=rank_id)
+
+    resize_height = config.image_height
+    resize_width = config.image_width
+    rescale = 1.0 / 255.0
+    shift = 0.0
+    buffer_size = 1000
+
+    # define map operations
+    decode_op = C.Decode()
+    resize_crop_op = C.RandomResizedCrop(resize_height, scale=(0.2, 1.0))
+    horizontal_flip_op = C.RandomHorizontalFlip()
+
+    resize_op = C.Resize((256, 256))
+    center_crop = C.CenterCrop(resize_width)
+    rescale_op = C.Rescale(rescale, shift)
+    normalize_op = C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    change_swap_op = C.HWC2CHW()
+
+    if do_train:
+        trans = [decode_op, resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op]
+    else:
+        trans = [decode_op, resize_op, center_crop, rescale_op, normalize_op, change_swap_op]
+
+    type_cast_op = C2.TypeCast(mstype.int32)
+
+    ds = ds.map(input_columns="image", operations=trans)
+    ds = ds.map(input_columns="label", operations=type_cast_op)
+
+    # apply shuffle operations
+    ds = ds.shuffle(buffer_size=buffer_size)
+
+    # apply batch operations
+    ds = ds.batch(batch_size, drop_remainder=True)
+
+    # apply dataset repeat operation
+    ds = ds.repeat(repeat_num)
+
+    return ds
diff --git a/example/mobilenetv2_imagenet2012/eval.py b/example/mobilenetv2_imagenet2012/eval.py
new file mode 100644
index 0000000000..6c51fc042b
--- /dev/null
+++ b/example/mobilenetv2_imagenet2012/eval.py
@@ -0,0 +1,56 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+eval.
+"""
+import os
+import argparse
+from dataset import create_dataset
+from config import config
+from mindspore import context
+from mindspore.model_zoo.mobilenet import mobilenet_v2
+from mindspore.train.model import Model
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
+
+parser = argparse.ArgumentParser(description='Image classification')
+parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
+parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
+args_opt = parser.parse_args()
+
+device_id = int(os.getenv('DEVICE_ID'))
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False)
+context.set_context(enable_task_sink=True)
+context.set_context(enable_loop_sink=True)
+context.set_context(enable_mem_reuse=True)
+
+if __name__ == '__main__':
+    context.set_context(enable_hccl=False)
+
+    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+    net = mobilenet_v2()
+
+    dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
+    step_size = dataset.get_dataset_size()
+
+    if args_opt.checkpoint_path:
+        param_dict = load_checkpoint(args_opt.checkpoint_path)
+        load_param_into_net(net, param_dict)
+    net.set_train(False)
+
+    model = Model(net, loss_fn=loss, metrics={'acc'})
+    res = model.eval(dataset)
+    print("result:", res, "ckpt=", args_opt.checkpoint_path)
diff --git a/example/mobilenetv2_imagenet2012/launch.py b/example/mobilenetv2_imagenet2012/launch.py
new file mode 100644
index 0000000000..5a8977c64b
--- /dev/null
+++ b/example/mobilenetv2_imagenet2012/launch.py
@@ -0,0 +1,150 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""launch train script"""
+import os
+import sys
+import subprocess
+import json
+from argparse import ArgumentParser
+
+
+def parse_args():
+    """
+    parse args .
+
+    Args:
+
+    Returns:
+        args.
+
+    Examples:
+        >>> parse_args()
+    """
+    parser = ArgumentParser(description="mindspore distributed training launch "
+                                        "helper utilty that will spawn up "
+                                        "multiple distributed processes")
+    parser.add_argument("--nproc_per_node", type=int, default=1,
+                        help="The number of processes to launch on each node, "
+                             "for D training, this is recommended to be set "
+                             "to the number of D in your system so that "
+                             "each process can be bound to a single D.")
+    parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
+                        help="will use the visible devices sequentially")
+    parser.add_argument("--server_id", type=str, default="",
+                        help="server ip")
+    parser.add_argument("--training_script", type=str,
+                        help="The full path to the single D training "
+                             "program/script to be launched in parallel, "
+                             "followed by all the arguments for the "
+                             "training script")
+    # rest from the training program
+    args, unknown = parser.parse_known_args()
+    args.training_script_args = unknown
+    return args
+
+
+def main():
+    print("start", __file__)
+    args = parse_args()
+    print(args)
+    visible_devices = args.visible_devices.split(',')
+    assert os.path.isfile(args.training_script)
+    assert len(visible_devices) >= args.nproc_per_node
+    print('visible_devices:{}'.format(visible_devices))
+    if not args.server_id:
+        print('pleaser input server ip!!!')
+        exit(0)
+    print('server_id:{}'.format(args.server_id))
+
+    # construct hccn_table
+    hccn_configs = open('/etc/hccn.conf', 'r').readlines()
+    device_ips = {}
+    for hccn_item in hccn_configs:
+        hccn_item = hccn_item.strip()
+        if hccn_item.startswith('address_'):
+            device_id, device_ip = hccn_item.split('=')
+            device_id = device_id.split('_')[1]
+            device_ips[device_id] = device_ip
+            print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
+    hccn_table = {}
+    hccn_table['board_id'] = '0x0000'
+    hccn_table['chip_info'] = '910'
+    hccn_table['deploy_mode'] = 'lab'
+    hccn_table['group_count'] = '1'
+    hccn_table['group_list'] = []
+    instance_list = []
+    usable_dev = ''
+    for instance_id in range(args.nproc_per_node):
+        instance = {}
+        instance['devices'] = []
+        device_id = visible_devices[instance_id]
+        device_ip = device_ips[device_id]
+        usable_dev += str(device_id)
+        instance['devices'].append({
+            'device_id': device_id,
+            'device_ip': device_ip,
+        })
+        instance['rank_id'] = str(instance_id)
+        instance['server_id'] = args.server_id
+        instance_list.append(instance)
+    hccn_table['group_list'].append({
+        'device_num': str(args.nproc_per_node),
+        'server_num': '1',
+        'group_name': '',
+        'instance_count': str(args.nproc_per_node),
+        'instance_list': instance_list,
+    })
+    hccn_table['para_plane_nic_location'] = 'device'
+    hccn_table['para_plane_nic_name'] = []
+    for instance_id in range(args.nproc_per_node):
+        eth_id = visible_devices[instance_id]
+        hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
+    hccn_table['para_plane_nic_num'] = str(args.nproc_per_node)
+    hccn_table['status'] = 'completed'
+
+    # save hccn_table to file
+    table_path = os.getcwd()
+    if not os.path.exists(table_path):
+        os.mkdir(table_path)
+    table_fn = os.path.join(table_path,
+                            'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id))
+    with open(table_fn, 'w') as table_fp:
+        json.dump(hccn_table, table_fp, indent=4)
+    sys.stdout.flush()
+
+    # spawn the processes
+    current_env = os.environ.copy()
+    current_env["RANK_SIZE"] = str(args.nproc_per_node)
+    if args.nproc_per_node > 1:
+        current_env["MINDSPORE_HCCL_CONFIG_PATH"] = table_fn
+    processes = []
+    cmds = []
+    for rank_id in range(0, args.nproc_per_node):
+        current_env["RANK_ID"] = str(rank_id)
+        current_env["DEVICE_ID"] = visible_devices[rank_id]
+        cmd = [sys.executable, "-u"]
+        cmd.append(args.training_script)
+        cmd.extend(args.training_script_args)
+        process = subprocess.Popen(cmd, env=current_env)
+        processes.append(process)
+        cmds.append(cmd)
+    for process, cmd in zip(processes, cmds):
+        process.wait()
+        if process.returncode != 0:
+            raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/example/mobilenetv2_imagenet2012/lr_generator.py b/example/mobilenetv2_imagenet2012/lr_generator.py
new file mode 100644
index 0000000000..68bbfe3158
--- /dev/null
+++ b/example/mobilenetv2_imagenet2012/lr_generator.py
@@ -0,0 +1,54 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""learning rate generator"""
+import math
+import numpy as np
+
+
+def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
+    """
+    generate learning rate array
+
+    Args:
+       global_step(int): total steps of the training
+       lr_init(float): init learning rate
+       lr_end(float): end learning rate
+       lr_max(float): max learning rate
+       warmup_epochs(int): number of warmup epochs
+       total_epochs(int): total epoch of training
+       steps_per_epoch(int): steps of one epoch
+
+    Returns:
+       np.array, learning rate array
+    """
+    lr_each_step = []
+    total_steps = steps_per_epoch * total_epochs
+    warmup_steps = steps_per_epoch * warmup_epochs
+    for i in range(total_steps):
+        if i < warmup_steps:
+            lr = lr_init + (lr_max - lr_init) * i / warmup_steps
+        else:
+            lr = lr_end + \
+                 (lr_max - lr_end) * \
+                 (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2.
+        if lr < 0.0:
+            lr = 0.0
+        lr_each_step.append(lr)
+
+    current_step = global_step
+    lr_each_step = np.array(lr_each_step).astype(np.float32)
+    learning_rate = lr_each_step[current_step:]
+
+    return learning_rate
diff --git a/example/mobilenetv2_imagenet2012/run_infer.sh b/example/mobilenetv2_imagenet2012/run_infer.sh
new file mode 100644
index 0000000000..dc1e4d0b5d
--- /dev/null
+++ b/example/mobilenetv2_imagenet2012/run_infer.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+if [ $# != 2 ]
+then
+    echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
+exit 1
+fi
+
+if [ ! -d $1 ]
+then
+    echo "error: DATASET_PATH=$1 is not a directory"
+exit 1
+fi
+
+if [ ! -f $2 ]
+then
+    echo "error: CHECKPOINT_PATH=$2 is not a file"
+exit 1
+fi
+
+BASEPATH=$(cd "`dirname $0`" || exit; pwd)
+export PYTHONPATH=${BASEPATH}:$PYTHONPATH
+export DEVICE_ID=0
+export RANK_ID=0
+export RANK_SIZE=1
+if [ -d "eval" ];
+then
+    rm -rf ./eval
+fi
+mkdir ./eval
+cd ./eval || exit
+python ${BASEPATH}/eval.py \
+        --checkpoint_path=$2 \
+        --dataset_path=$1 &> infer.log &  # dataset val folder path
diff --git a/example/mobilenetv2_imagenet2012/run_train.sh b/example/mobilenetv2_imagenet2012/run_train.sh
new file mode 100644
index 0000000000..3f92b4f172
--- /dev/null
+++ b/example/mobilenetv2_imagenet2012/run_train.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+if [ $# != 4 ]
+then
+    echo "Usage: sh run_train.sh [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]"
+exit 1
+fi
+
+if [ $1 -lt 1 ] && [ $1 -gt 8 ]
+then
+    echo "error: DEVICE_NUM=$1 is not in (1-8)"
+exit 1
+fi
+
+if [ ! -d $4 ]
+then
+    echo "error: DATASET_PATH=$4 is not a directory"
+exit 1
+fi
+
+BASEPATH=$(cd "`dirname $0`" || exit; pwd)
+export PYTHONPATH=${BASEPATH}:$PYTHONPATH
+if [ -d "train" ];
+then
+    rm -rf ./train
+fi
+mkdir ./train
+cd ./train || exit
+python ${BASEPATH}/launch.py \
+        --nproc_per_node=$1 \
+        --visible_devices=$3 \
+        --server_id=$2 \
+        --training_script=${BASEPATH}/train.py \
+        --dataset_path=$4 &> train.log &  # dataset train folder
diff --git a/example/mobilenetv2_imagenet2012/train.py b/example/mobilenetv2_imagenet2012/train.py
new file mode 100644
index 0000000000..584e89fe43
--- /dev/null
+++ b/example/mobilenetv2_imagenet2012/train.py
@@ -0,0 +1,149 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""train_imagenet."""
+import os
+import time
+import argparse
+import random
+import numpy as np
+from dataset import create_dataset
+from lr_generator import get_lr
+from config import config
+from mindspore import context
+from mindspore import Tensor
+from mindspore.model_zoo.mobilenet import mobilenet_v2
+from mindspore.parallel._auto_parallel_context import auto_parallel_context
+from mindspore.nn.optim.momentum import Momentum
+from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
+
+from mindspore.train.model import Model, ParallelMode
+
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
+from mindspore.train.loss_scale_manager import FixedLossScaleManager
+import mindspore.dataset.engine as de
+from mindspore.communication.management import init
+
+random.seed(1)
+np.random.seed(1)
+de.config.set_seed(1)
+
+parser = argparse.ArgumentParser(description='Image classification')
+parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
+args_opt = parser.parse_args()
+
+device_id = int(os.getenv('DEVICE_ID'))
+rank_id = int(os.getenv('RANK_ID'))
+rank_size = int(os.getenv('RANK_SIZE'))
+run_distribute = rank_size > 1
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False)
+context.set_context(enable_task_sink=True)
+context.set_context(enable_loop_sink=True)
+context.set_context(enable_mem_reuse=True)
+
+
+class Monitor(Callback):
+    """
+    Monitor loss and time.
+
+    Args:
+        lr_init (numpy array): train lr
+
+    Returns:
+        None.
+
+    Examples:
+        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
+    """
+
+    def __init__(self, lr_init=None):
+        super(Monitor, self).__init__()
+        self.lr_init = lr_init
+        self.lr_init_len = len(lr_init)
+
+    def epoch_begin(self, run_context):
+        self.losses = []
+        self.epoch_time = time.time()
+
+    def epoch_end(self, run_context):
+        cb_params = run_context.original_args()
+
+        epoch_mseconds = (time.time() - self.epoch_time) * 1000
+        per_step_mseconds = epoch_mseconds / cb_params.batch_num
+        print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds,
+                                                                                      per_step_mseconds,
+                                                                                      np.mean(self.losses)
+                                                                                      ), flush=True)
+
+    def step_begin(self, run_context):
+        self.step_time = time.time()
+
+    def step_end(self, run_context):
+        cb_params = run_context.original_args()
+        step_mseconds = (time.time() - self.step_time) * 1000
+        step_loss = cb_params.net_outputs
+
+        if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
+            step_loss = step_loss[0]
+        if isinstance(step_loss, Tensor):
+            step_loss = np.mean(step_loss.asnumpy())
+
+        self.losses.append(step_loss)
+        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num
+
+        print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format(
+            cb_params.cur_epoch_num - 1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss,
+            np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]), flush=True)
+
+
+if __name__ == '__main__':
+    if run_distribute:
+        context.set_context(enable_hccl=True)
+        context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                          parameter_broadcast=True, mirror_mean=True)
+        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+        init()
+    else:
+        context.set_context(enable_hccl=False)
+
+    epoch_size = config.epoch_size
+    net = mobilenet_v2(num_classes=config.num_classes)
+    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+
+    print("train args: ", args_opt, "\ncfg: ", config,
+          "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size))
+
+    dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True,
+                             repeat_num=epoch_size, batch_size=config.batch_size)
+    step_size = dataset.get_dataset_size()
+
+    loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
+    lr = Tensor(get_lr(global_step=0, lr_init=0, lr_end=0, lr_max=config.lr,
+                       warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size))
+    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
+                   config.weight_decay, config.loss_scale)
+
+    model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, amp_level='O0',
+                  keep_batchnorm_fp32=False)
+
+    cb = None
+    if rank_id == 0:
+        cb = [Monitor(lr_init=lr.asnumpy())]
+        if config.save_checkpoint:
+            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size,
+                                         keep_checkpoint_max=config.keep_checkpoint_max)
+            ckpt_cb = ModelCheckpoint(prefix="mobilenet", directory=config.save_checkpoint_path, config=config_ck)
+            cb += [ckpt_cb]
+    model.train(epoch_size, dataset, callbacks=cb)
diff --git a/mindspore/model_zoo/mobilenet.py b/mindspore/model_zoo/mobilenet.py
new file mode 100644
index 0000000000..1d4f1b10b5
--- /dev/null
+++ b/mindspore/model_zoo/mobilenet.py
@@ -0,0 +1,284 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""MobileNetV2 model define"""
+import numpy as np
+import mindspore.nn as nn
+from mindspore.ops import operations as P
+from mindspore.ops.operations import TensorAdd
+from mindspore import Parameter, Tensor
+from mindspore.common.initializer import initializer
+
+__all__ = ['MobileNetV2', 'mobilenet_v2']
+
+
+def _make_divisible(v, divisor, min_value=None):
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    :param v:
+    :param divisor:
+    :param min_value:
+    :return:
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class GlobalAvgPooling(nn.Cell):
+    """
+    Global avg pooling definition.
+
+    Args:
+
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> GlobalAvgPooling()
+    """
+    def __init__(self):
+        super(GlobalAvgPooling, self).__init__()
+        self.mean = P.ReduceMean(keep_dims=False)
+
+    def construct(self, x):
+        x = self.mean(x, (2, 3))
+        return x
+
+
+class DepthwiseConv(nn.Cell):
+    """
+    Depthwise Convolution warpper definition.
+
+    Args:
+        in_planes (int): Input channel.
+        kernel_size (int): Input kernel size.
+        stride (int): Stride size.
+        pad_mode (str): pad mode in (pad, same, valid)
+        channel_multiplier (int): Output channel multiplier
+        has_bias (bool): has bias or not
+
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> DepthwiseConv(16, 3, 1, 'pad', 1, channel_multiplier=1)
+    """
+    def __init__(self, in_planes, kernel_size, stride, pad_mode, pad, channel_multiplier=1, has_bias=False):
+        super(DepthwiseConv, self).__init__()
+        self.has_bias = has_bias
+        self.in_channels = in_planes
+        self.channel_multiplier = channel_multiplier
+        self.out_channels = in_planes * channel_multiplier
+        self.kernel_size = (kernel_size, kernel_size)
+        self.depthwise_conv = P.DepthwiseConv2dNative(channel_multiplier=channel_multiplier, kernel_size=kernel_size,
+                                                      stride=stride, pad_mode=pad_mode, pad=pad)
+        self.bias_add = P.BiasAdd()
+        weight_shape = [channel_multiplier, in_planes, *self.kernel_size]
+        self.weight = Parameter(initializer('ones', weight_shape), name='weight')
+
+        if has_bias:
+            bias_shape = [channel_multiplier * in_planes]
+            self.bias = Parameter(initializer('zeros', bias_shape), name='bias')
+        else:
+            self.bias = None
+
+    def construct(self, x):
+        output = self.depthwise_conv(x, self.weight)
+        if self.has_bias:
+            output = self.bias_add(output, self.bias)
+        return output
+
+
+class ConvBNReLU(nn.Cell):
+    """
+    Convolution/Depthwise fused with Batchnorm and ReLU block definition.
+
+    Args:
+        in_planes (int): Input channel.
+        out_planes (int): Output channel.
+        kernel_size (int): Input kernel size.
+        stride (int): Stride size for the first convolutional layer. Default: 1.
+        groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
+
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
+    """
+    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
+        super(ConvBNReLU, self).__init__()
+        padding = (kernel_size - 1) // 2
+        if groups == 1:
+            conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad',
+                             padding=padding)
+        else:
+            conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='pad', pad=padding)
+        layers = [conv, nn.BatchNorm2d(out_planes), nn.ReLU6()]
+        self.features = nn.SequentialCell(layers)
+
+    def construct(self, x):
+        output = self.features(x)
+        return output
+
+
+class InvertedResidual(nn.Cell):
+    """
+    Mobilenetv2 residual block definition.
+
+    Args:
+        inp (int): Input channel.
+        oup (int): Output channel.
+        stride (int): Stride size for the first convolutional layer. Default: 1.
+        expand_ratio (int): expand ration of input channel
+
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> ResidualBlock(3, 256, 1, 1)
+    """
+    def __init__(self, inp, oup, stride, expand_ratio):
+        super(InvertedResidual, self).__init__()
+        assert stride in [1, 2]
+
+        hidden_dim = int(round(inp * expand_ratio))
+        self.use_res_connect = stride == 1 and inp == oup
+
+        layers = []
+        if expand_ratio != 1:
+            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
+        layers.extend([
+            # dw
+            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
+            # pw-linear
+            nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, has_bias=False),
+            nn.BatchNorm2d(oup),
+        ])
+        self.conv = nn.SequentialCell(layers)
+        self.add = TensorAdd()
+        self.cast = P.Cast()
+
+    def construct(self, x):
+        identity = x
+        x = self.conv(x)
+        if self.use_res_connect:
+            return self.add(identity, x)
+        return x
+
+
+class MobileNetV2(nn.Cell):
+    """
+    MobileNetV2 architecture.
+
+    Args:
+        class_num (Cell): number of classes.
+        width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
+        has_dropout (bool): Is dropout used. Default is false
+        inverted_residual_setting (list): Inverted residual settings. Default is None
+        round_nearest (list): Channel round to . Default is 8
+    Returns:
+        Tensor, output tensor.
+
+    Examples:
+        >>> MobileNetV2(num_classes=1000)
+    """
+    def __init__(self, num_classes=1000, width_mult=1.,
+                 has_dropout=False, inverted_residual_setting=None, round_nearest=8):
+        super(MobileNetV2, self).__init__()
+        block = InvertedResidual
+        input_channel = 32
+        last_channel = 1280
+        # setting of inverted residual blocks
+        self.cfgs = inverted_residual_setting
+        if inverted_residual_setting is None:
+            self.cfgs = [
+                # t, c, n, s
+                [1, 16, 1, 1],
+                [6, 24, 2, 2],
+                [6, 32, 3, 2],
+                [6, 64, 4, 2],
+                [6, 96, 3, 1],
+                [6, 160, 3, 2],
+                [6, 320, 1, 1],
+            ]
+
+        # building first layer
+        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
+        self.out_channels = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
+        features = [ConvBNReLU(3, input_channel, stride=2)]
+        # building inverted residual blocks
+        for t, c, n, s in self.cfgs:
+            output_channel = _make_divisible(c * width_mult, round_nearest)
+            for i in range(n):
+                stride = s if i == 0 else 1
+                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
+                input_channel = output_channel
+        # building last several layers
+        features.append(ConvBNReLU(input_channel, self.out_channels, kernel_size=1))
+        # make it nn.CellList
+        self.features = nn.SequentialCell(features)
+        # mobilenet head
+        head = ([GlobalAvgPooling(), nn.Dense(self.out_channels, num_classes, has_bias=True)] if not has_dropout else
+                [GlobalAvgPooling(), nn.Dropout(0.2), nn.Dense(self.out_channels, num_classes, has_bias=True)])
+        self.head = nn.SequentialCell(head)
+
+        self._initialize_weights()
+
+    def construct(self, x):
+        x = self.features(x)
+        x = self.head(x)
+        return x
+
+    def _initialize_weights(self):
+        """
+        Initialize weights.
+
+        Args:
+
+        Returns:
+            None.
+
+        Examples:
+            >>> _initialize_weights()
+        """
+        for _, m in self.cells_and_names():
+            if isinstance(m, (nn.Conv2d, DepthwiseConv)):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n),
+                                                                    m.weight.data.shape()).astype("float32")))
+                if m.bias is not None:
+                    m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32")))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.gamma.set_parameter_data(Tensor(np.ones(m.gamma.data.shape(), dtype="float32")))
+                m.beta.set_parameter_data(Tensor(np.zeros(m.beta.data.shape(), dtype="float32")))
+            elif isinstance(m, nn.Dense):
+                m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape()).astype("float32")))
+                if m.bias is not None:
+                    m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape(), dtype="float32")))
+
+
+def mobilenet_v2(**kwargs):
+    """
+    Constructs a MobileNet V2 model
+    """
+    return MobileNetV2(**kwargs)

From 70c80c05951b37c74837e07e948da80ea41aee12 Mon Sep 17 00:00:00 2001
From: buxue <yiren19920727@163.com>
Date: Mon, 27 Apr 2020 17:41:15 +0800
Subject: [PATCH 128/242] dock FloorMod GreaterEqual NotEqual ScatterNdUpdate

---
 mindspore/ops/_op_impl/tbe/__init__.py        |  6 ++-
 .../ops/_op_impl/tbe/{fill_d.py => fill.py}   |  4 +-
 mindspore/ops/_op_impl/tbe/floor_mod.py       | 38 ++++++++++++++++
 mindspore/ops/_op_impl/tbe/greater_equal.py   | 45 +++++++++++++++++++
 mindspore/ops/_op_impl/tbe/not_equal.py       | 45 +++++++++++++++++++
 mindspore/ops/_op_impl/tbe/scatter_nd.py      |  2 +-
 .../ops/_op_impl/tbe/scatter_nd_update.py     | 42 +++++++++++++++++
 7 files changed, 178 insertions(+), 4 deletions(-)
 rename mindspore/ops/_op_impl/tbe/{fill_d.py => fill.py} (97%)
 create mode 100644 mindspore/ops/_op_impl/tbe/floor_mod.py
 create mode 100644 mindspore/ops/_op_impl/tbe/greater_equal.py
 create mode 100644 mindspore/ops/_op_impl/tbe/not_equal.py
 create mode 100644 mindspore/ops/_op_impl/tbe/scatter_nd_update.py

diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py
index 8030aac5c6..f9240ee325 100644
--- a/mindspore/ops/_op_impl/tbe/__init__.py
+++ b/mindspore/ops/_op_impl/tbe/__init__.py
@@ -142,8 +142,12 @@ from .smooth_l1_loss_grad import _smooth_l1_loss_grad_tbe
 from .fused_mul_add import _fused_mul_add_tbe
 from .fused_mul_add_n import _fused_mul_add_n_tbe
 from .fused_mul_apply_momentum import _fused_mul_apply_momentum_tbe
-from .fill_d import _fill_d_op_tbe
+from .fill import _fill_op_tbe
 from .erf import _erf_op_tbe
 from .depthwise_conv2d import _depthwise_conv2d_tbe
 from .depthwise_conv2d_backprop_filter import _depthwise_conv2d_backprop_filter_tbe
 from .depthwise_conv2d_backprop_input import _depthwise_conv2d_backprop_input_tbe
+from .greater_equal import _greater_equal_tbe
+from .not_equal import _not_equal_tbe
+from .floor_mod import _floor_mod_tbe
+from .scatter_nd_update import _scatter_nd_update_tbe
diff --git a/mindspore/ops/_op_impl/tbe/fill_d.py b/mindspore/ops/_op_impl/tbe/fill.py
similarity index 97%
rename from mindspore/ops/_op_impl/tbe/fill_d.py
rename to mindspore/ops/_op_impl/tbe/fill.py
index 97c6b73cf5..90301f123b 100644
--- a/mindspore/ops/_op_impl/tbe/fill_d.py
+++ b/mindspore/ops/_op_impl/tbe/fill.py
@@ -16,7 +16,7 @@
 """FillD op"""
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
 
-fill_d_op_info = TBERegOp("FillD") \
+fill_d_op_info = TBERegOp("Fill") \
     .fusion_type("ELEMWISE") \
     .async_flag(False) \
     .binfile_name("fill_d.so") \
@@ -50,6 +50,6 @@ fill_d_op_info = TBERegOp("FillD") \
 
 
 @op_info_register(fill_d_op_info)
-def _fill_d_op_tbe():
+def _fill_op_tbe():
     """FillD TBE register"""
     return
diff --git a/mindspore/ops/_op_impl/tbe/floor_mod.py b/mindspore/ops/_op_impl/tbe/floor_mod.py
new file mode 100644
index 0000000000..031f160e0a
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/floor_mod.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""FloorMod op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+floor_mod_op_info = TBERegOp("FloorMod") \
+    .fusion_type("ELEMWISE") \
+    .async_flag(False) \
+    .binfile_name("floor_mod.so") \
+    .compute_cost(10) \
+    .kernel_name("floor_mod") \
+    .partial_flag(True) \
+    .input(0, "x1", False, "required", "all") \
+    .input(1, "x2", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
+    .get_op_info()
+
+
+@op_info_register(floor_mod_op_info)
+def _floor_mod_tbe():
+    """FloorMod TBE register"""
+    return
diff --git a/mindspore/ops/_op_impl/tbe/greater_equal.py b/mindspore/ops/_op_impl/tbe/greater_equal.py
new file mode 100644
index 0000000000..5609f15f18
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/greater_equal.py
@@ -0,0 +1,45 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""GreaterEqual op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+greater_equal_op_info = TBERegOp("GreaterEqual") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("greater_equal.so") \
+    .compute_cost(10) \
+    .kernel_name("greater_equal") \
+    .partial_flag(True) \
+    .input(0, "x1", False, "required", "all") \
+    .input(1, "x2", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.I8_5HD, DataType.I8_5HD, DataType.BOOL_5HD) \
+    .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.U8_5HD, DataType.U8_5HD, DataType.BOOL_5HD) \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.BOOL_5HD) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.BOOL_5HD) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.BOOL_5HD) \
+    .get_op_info()
+
+
+@op_info_register(greater_equal_op_info)
+def _greater_equal_tbe():
+    """Greater TBE register"""
+    return
diff --git a/mindspore/ops/_op_impl/tbe/not_equal.py b/mindspore/ops/_op_impl/tbe/not_equal.py
new file mode 100644
index 0000000000..bd801d9a40
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/not_equal.py
@@ -0,0 +1,45 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""NotEqual op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+not_equal_op_info = TBERegOp("NotEqual") \
+    .fusion_type("ELEMWISE") \
+    .async_flag(False) \
+    .binfile_name("not_equal.so") \
+    .compute_cost(10) \
+    .kernel_name("not_equal") \
+    .partial_flag(True) \
+    .input(0, "x1", False, "required", "all") \
+    .input(1, "x2", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.I8_5HD, DataType.I8_5HD, DataType.BOOL_5HD) \
+    .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.U8_5HD, DataType.U8_5HD, DataType.BOOL_5HD) \
+    .dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.I32_5HD, DataType.I32_5HD, DataType.BOOL_5HD) \
+    .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.BOOL_5HD) \
+    .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.BOOL_Default) \
+    .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.BOOL_5HD) \
+    .get_op_info()
+
+
+@op_info_register(not_equal_op_info)
+def _not_equal_tbe():
+    """Equal TBE register"""
+    return
diff --git a/mindspore/ops/_op_impl/tbe/scatter_nd.py b/mindspore/ops/_op_impl/tbe/scatter_nd.py
index 6c9eae3ad4..168b34582f 100644
--- a/mindspore/ops/_op_impl/tbe/scatter_nd.py
+++ b/mindspore/ops/_op_impl/tbe/scatter_nd.py
@@ -37,5 +37,5 @@ scatter_nd_op_info = TBERegOp("ScatterNd") \
 
 @op_info_register(scatter_nd_op_info)
 def _scatter_nd_tbe():
-    """Conv2D TBE register"""
+    """ScatterNd TBE register"""
     return
diff --git a/mindspore/ops/_op_impl/tbe/scatter_nd_update.py b/mindspore/ops/_op_impl/tbe/scatter_nd_update.py
new file mode 100644
index 0000000000..df0996f26f
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/scatter_nd_update.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""ScatterNdUpdate op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+scatter_nd_update_op_info = TBERegOp("ScatterNdUpdate") \
+    .fusion_type("ELEMWISE") \
+    .async_flag(False) \
+    .binfile_name("scatter_nd_update.so") \
+    .compute_cost(10) \
+    .kernel_name("scatter_nd_update") \
+    .partial_flag(True) \
+    .attr("use_locking", "optional", "bool", "all") \
+    .input(0, "var", False, "required", "all") \
+    .input(1, "indices", False, "required", "all") \
+    .input(1, "updates", False, "required", "all") \
+    .output(0, "var", False, "required", "all") \
+    .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default, DataType.F16_Default) \
+    .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default, DataType.F32_Default) \
+    .dtype_format(DataType.I8_Default, DataType.I32_Default, DataType.I8_Default, DataType.I8_Default) \
+    .dtype_format(DataType.U8_Default, DataType.I32_Default, DataType.U8_Default, DataType.U8_Default,) \
+    .dtype_format(DataType.BOOL_Default, DataType.I32_Default, DataType.BOOL_Default, DataType.BOOL_Default) \
+    .get_op_info()
+
+
+@op_info_register(scatter_nd_update_op_info)
+def _scatter_nd_update_tbe():
+    """ScatterNdUpdate TBE register"""
+    return

From 53b45295585196307af039fdd03f6bac9213c7fb Mon Sep 17 00:00:00 2001
From: wilfChen <chenweifeng720@huawei.com>
Date: Mon, 27 Apr 2020 20:06:47 +0800
Subject: [PATCH 129/242] Gpu support LayerNorm kernel

---
 .../gpu/cuda_impl/layer_norm_grad_impl.cu     | 205 ++++++++++++++++++
 .../gpu/cuda_impl/layer_norm_grad_impl.cuh    |  26 +++
 .../kernel/gpu/cuda_impl/layer_norm_impl.cu   | 148 +++++++++++++
 .../kernel/gpu/cuda_impl/layer_norm_impl.cuh  |  26 +++
 .../kernel/gpu/nn/layer_norm_gpu_kernel.cc    |  31 +++
 .../kernel/gpu/nn/layer_norm_gpu_kernel.h     | 103 +++++++++
 .../gpu/nn/layer_norm_grad_gpu_kernel.cc      |  33 +++
 .../gpu/nn/layer_norm_grad_gpu_kernel.h       | 107 +++++++++
 tests/st/ops/gpu/test_layer_norm_grad_op.py   | 140 ++++++++++++
 tests/st/ops/gpu/test_layer_norm_op.py        | 134 ++++++++++++
 10 files changed, 953 insertions(+)
 create mode 100644 mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu
 create mode 100644 mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh
 create mode 100644 mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu
 create mode 100644 mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh
 create mode 100644 mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc
 create mode 100644 mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h
 create mode 100644 mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc
 create mode 100644 mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h
 create mode 100644 tests/st/ops/gpu/test_layer_norm_grad_op.py
 create mode 100644 tests/st/ops/gpu/test_layer_norm_op.py

diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu
new file mode 100644
index 0000000000..f8377fd721
--- /dev/null
+++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cu
@@ -0,0 +1,205 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <cuda_runtime.h>
+#include "kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh"
+
+constexpr int NUM_PER_THREAD_REDUCE = 4;
+constexpr int WARP_SIZE = 32;
+
+template <typename T>
+inline __device__ void GammaAndBetaThreadReduce(const int& col, const int& row_dim, const int& col_dim,
+                                                const T& epsilon, const T* dy, const T* x, const T* mean, const T* var,
+                                                T* dg, T* db) {
+  int loop_num = (row_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE;
+  for (int i = threadIdx.x; i < loop_num; i += blockDim.x) {
+    for (int j = 0; j < NUM_PER_THREAD_REDUCE; j++) {
+      int row = NUM_PER_THREAD_REDUCE * i + j;
+      if (row >= row_dim) {
+        return;
+      }
+
+      int pos = row * col_dim + col;
+      dg[0] += dy[pos] * pow(var[row] + epsilon, -0.5) * (x[pos] - mean[row]);
+      db[0] += dy[pos];
+    }
+  }
+}
+
+template <typename T>
+inline __device__ void GammaAndBetaWarpReduce(T* dg, T* db) {
+  for (int delta = (WARP_SIZE >> 1); delta > 0; delta >>= 1) {
+    dg[0] += __shfl_down_sync(0xffffffff, dg[0], delta);
+    db[0] += __shfl_down_sync(0xffffffff, db[0], delta);
+  }
+}
+
+template <typename T>
+inline __device__ void GammaAndBetaBlockReduce(const int& col, const int& row_dim, T* dg, T* db, T* dg_addr,
+                                               T* db_addr) {
+  if (threadIdx.x >= row_dim) {
+    return;
+  }
+
+  // load data to share memory
+  // thread(0, 32, 64, 96, ...) keep the data
+  extern __shared__ T share_mem[];
+  if (threadIdx.x % WARP_SIZE == 0) {
+    int offset = threadIdx.x / WARP_SIZE * 2;
+    share_mem[offset] = dg[0];
+    share_mem[offset + 1] = db[0];
+  }
+  __syncthreads();
+
+  for (int stride = blockDim.x / WARP_SIZE / 2; stride > 0; stride >>= 1) {
+    if (threadIdx.x < stride) {
+      int offset = (threadIdx.x + stride) * 2;
+      share_mem[threadIdx.x * 2] += share_mem[offset];
+      share_mem[threadIdx.x * 2 + 1] += share_mem[offset + 1];
+    }
+  }
+  __syncthreads();
+
+  if (threadIdx.x == 0) {
+    dg_addr[col] = share_mem[0];
+    db_addr[col] = share_mem[1];
+  }
+}
+
+template <typename T>
+__global__ void GammaAndBetaPropKernel(const int row_dim, const int col_dim, const T epsilon, const T* dy, const T* x,
+                                       const T* mean_addr, const T* var_addr, T* dg_addr, T* db_addr) {
+  // row: [0:param_axis]
+  // col: [param_axis:]
+  // dg[i][j] = dy[i][j] * (var[i] + epsilon, -0.5) * (x[i][j] - mean[i])
+  // dg[j] = \Sigma_{j}dg[i][j]
+  for (int col = blockIdx.x; col < col_dim; col += gridDim.x) {
+    T dg = 0;
+    T db = 0;
+    GammaAndBetaThreadReduce(col, row_dim, col_dim, epsilon, dy, x, mean_addr, var_addr, &dg, &db);
+    GammaAndBetaWarpReduce(&dg, &db);
+    GammaAndBetaBlockReduce(col, row_dim, &dg, &db, dg_addr, db_addr);
+  }
+}
+
+template <typename T>
+inline __device__ void InputThreadReduce(const int& row, const int& col_dim, const int& param_dim, const T& epsilon,
+                                         T* sum1, T* sum2, T* sum3, const T* dy, const T* x, const T* mean,
+                                         const T* var, const T* gamma) {
+  int loop_num = (col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE;
+  for (int i = threadIdx.x; i < loop_num; i += blockDim.x) {
+    for (int j = 0; j < NUM_PER_THREAD_REDUCE; j++) {
+      int col = NUM_PER_THREAD_REDUCE * i + j;
+      if (col >= col_dim) {
+        return;
+      }
+
+      int pos = row * col_dim + col;
+      int gamma_offset = pos % param_dim;
+      T v1 = dy[pos] * gamma[gamma_offset];
+      T v2 = x[pos] - mean[row];
+
+      sum1[0] += -0.5 * v1 * v2 * pow(var[row] + epsilon, -1.5);
+      sum2[0] += v1;
+      sum3[0] += -2.0 * v2;
+    }
+  }
+}
+
+template <typename T>
+inline __device__ void InputWarpReduce(T* sum1, T* sum2, T* sum3) {
+  for (int delta = (WARP_SIZE >> 1); delta > 0; delta >>= 1) {
+    sum1[0] += __shfl_down_sync(0xffffffff, sum1[0], delta);
+    sum2[0] += __shfl_down_sync(0xffffffff, sum2[0], delta);
+    sum3[0] += __shfl_down_sync(0xffffffff, sum3[0], delta);
+  }
+}
+
+template <typename T>
+inline __device__ void InputBlockReduce(const int& col_dim, T* sum1, T* sum2, T* sum3, T* share_mem) {
+  if (threadIdx.x >= col_dim) {
+    return;
+  }
+
+  // load data to share memory
+  // thread(0, 32, 64, 96, ...) keep the data
+  if (threadIdx.x % WARP_SIZE == 0) {
+    int offset = threadIdx.x / WARP_SIZE * 3;
+    share_mem[offset] = sum1[0];
+    share_mem[offset + 1] = sum2[0];
+    share_mem[offset + 2] = sum3[0];
+  }
+  __syncthreads();
+
+  for (int stride = blockDim.x / WARP_SIZE / 2; stride > 0; stride >>= 1) {
+    if (threadIdx.x < stride) {
+      int offset = (threadIdx.x + stride) * 3;
+      share_mem[threadIdx.x * 3] += share_mem[offset];
+      share_mem[threadIdx.x * 3 + 1] += share_mem[offset + 1];
+      share_mem[threadIdx.x * 3 + 2] += share_mem[offset + 2];
+    }
+  }
+  __syncthreads();
+}
+
+template <typename T>
+inline __device__ void InputProp(const int& row, const int& col_dim, const int& param_dim, const T& epsilon,
+                                 const T* dy, const T* x, const T* mean, const T* var, const T* gamma, T* dx,
+                                 const T* share_mem) {
+  for (int col = threadIdx.x; col < col_dim; col += blockDim.x) {
+    int pos = (row * col_dim + col);
+    int gamma_offset = pos % param_dim;
+    T v1 = dy[pos] * gamma[gamma_offset];
+    T v2 = x[pos] - mean[row];
+    T v3 = pow(var[row] + epsilon, -0.5);
+    dx[pos] = v1 * v3 + share_mem[0] * (2.0 / col_dim) * v2 +
+              (-1.0 * v3 * share_mem[1] + (1.0 / col_dim) * share_mem[0] * share_mem[2]) * (1.0 / col_dim);
+  }
+}
+
+template <typename T>
+__global__ void InputPropKernel(const int row_dim, const int col_dim, const int param_dim, const T epsilon, const T* dy,
+                                const T* x, const T* mean, const T* var, const T* gamma, T* dx) {
+  for (int row = blockIdx.x; row < row_dim; row += gridDim.x) {
+    T sum1 = 0;
+    T sum2 = 0;
+    T sum3 = 0;
+    extern __shared__ T share_mem[];
+    InputThreadReduce(row, col_dim, param_dim, epsilon, &sum1, &sum2, &sum3, dy, x, mean, var, gamma);
+    InputWarpReduce(&sum1, &sum2, &sum3);
+    InputBlockReduce(col_dim, &sum1, &sum2, &sum3, share_mem);
+    InputProp(row, col_dim, param_dim, epsilon, dy, x, mean, var, gamma, dx, share_mem);
+  }
+}
+
+template <typename T>
+void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const T& epsilon, const T* dy,
+                   const T* x, const T* mean, const T* var, const T* gamma, T* dx, T* dg, T* db, cudaStream_t stream) {
+  int share_mem =
+    ((col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE + WARP_SIZE - 1) / WARP_SIZE * 3 * sizeof(T);
+  InputPropKernel<<<row_dim, 256, share_mem, stream>>>(row_dim, col_dim, param_dim, epsilon, dy, x, mean, var, gamma,
+                                                       dx);
+
+  share_mem =
+    ((row_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE + WARP_SIZE - 1) / WARP_SIZE * 2 * sizeof(T);
+  GammaAndBetaPropKernel<<<col_dim, 256, share_mem, stream>>>(row_dim, col_dim, epsilon, dy, x, mean, var, dg, db);
+}
+
+template void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const float& epsilon,
+                            const float* dy, const float* x, const float* mean, const float* var, const float* gamma,
+                            float* dx, float* dg, float* db, cudaStream_t stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh
new file mode 100644
index 0000000000..9f7d57cdb9
--- /dev/null
+++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_GRAD_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_GRAD_H_
+
+#include "device/gpu/cuda_common.h"
+
+template <typename T>
+void LayerNormGrad(const int& row_dim, const int& col_dim, const int& param_dim, const T& epsilon, const T* dy,
+                   const T* x, const T* mean, const T* var, const T* gamma, T* dx, T* dg, T* db, cudaStream_t stream);
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_GRAD_H_
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu
new file mode 100644
index 0000000000..db33673744
--- /dev/null
+++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cu
@@ -0,0 +1,148 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <cuda_runtime.h>
+#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh"
+
+constexpr int NUM_PER_THREAD_REDUCE = 4;
+constexpr int WARP_SIZE = 32;
+
+template <typename T>
+inline __device__ void MeanAndVarAccumulation(T* mean, T* var, T* num, const T& val) {
+  // Welford Algorithm:
+  // \mu_k = \mu_{k-1} + (x_k - \mu_{k-1})/k
+  // \sigma_k^2 = \sigma_{k-1}^2 + (x_k - \mu_{k-1}) * (x_k - \mu_k)
+  num[0]++;
+  T mean_new = mean[0] + (val - mean[0]) / num[0];
+  var[0] = var[0] + (val - mean[0]) * (val - mean_new);
+  mean[0] = mean_new;
+}
+
+template <typename T>
+inline __device__ void MeanAndVarMerge(T* m1, T* v1, T* n1, const T& m2, const T& v2, const T& n2) {
+  if (n2 == 0) {
+    return;
+  }
+
+  T count = n1[0] + n2;
+  v1[0] = v1[0] + v2 + (m1[0] - m2) * (m1[0] - m2) * n1[0] * n2 / count;
+  m1[0] = (n1[0] * m1[0] + n2 * m2) / count;
+  n1[0] = count;
+}
+
+template <typename T>
+inline __device__ void ThreadReduce(const int& col_dim, const T* block_addr, T* mean, T* var, T* num) {
+  int loop_num = (col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE;
+  for (int i = threadIdx.x; i < loop_num; i += blockDim.x) {
+    for (int j = 0; j < NUM_PER_THREAD_REDUCE; j++) {
+      int pos = NUM_PER_THREAD_REDUCE * i + j;
+      if (pos >= col_dim) {
+        return;
+      }
+      MeanAndVarAccumulation(mean, var, num, block_addr[pos]);
+    }
+  }
+}
+
+template <typename T>
+inline __device__ void WarpReduce(T* mean, T* var, T* num) {
+  for (int delta = (WARP_SIZE >> 1); delta > 0; delta >>= 1) {
+    T mean_other = __shfl_down_sync(0xffffffff, mean[0], delta);
+    T var_other = __shfl_down_sync(0xffffffff, var[0], delta);
+    T num_other = __shfl_down_sync(0xffffffff, num[0], delta);
+    MeanAndVarMerge(mean, var, num, mean_other, var_other, num_other);
+  }
+}
+
+template <typename T>
+inline __device__ void BlockReduce(const int& col_dim, T* mean, T* var, T* num, T* mean_addr, T* var_addr,
+                                   T* share_mem) {
+  if (threadIdx.x >= col_dim) {
+    return;
+  }
+
+  // load data to share memory
+  // thread(0, 32, 64, 96, ...) keep the data
+  if (threadIdx.x % WARP_SIZE == 0) {
+    int offset = threadIdx.x / WARP_SIZE * 3;
+    share_mem[offset] = mean[0];
+    share_mem[offset + 1] = var[0];
+    share_mem[offset + 2] = num[0];
+  }
+  __syncthreads();
+
+  for (int stride = blockDim.x / WARP_SIZE / 2; stride > 0; stride >>= 1) {
+    if (threadIdx.x < stride) {
+      int offset = (threadIdx.x + stride) * 3;
+      MeanAndVarMerge(&share_mem[threadIdx.x * 3], &share_mem[threadIdx.x * 3 + 1], &share_mem[threadIdx.x * 3 + 2],
+                      share_mem[offset], share_mem[offset + 1], share_mem[offset + 2]);
+    }
+  }
+  __syncthreads();
+
+  if (threadIdx.x == 0) {
+    mean_addr[blockIdx.x] = share_mem[0];  // todo: blockDim.x < row
+    share_mem[1] /= col_dim;
+    var_addr[blockIdx.x] = share_mem[1];
+  }
+}
+
+template <typename T>
+inline __device__ void LayerNorm(const int& row, const int& col_dim, const int& param_dim, const T* x,
+                                 const T* share_mem, const T* gamma, const T* beta, const T epsilon, T* y) {
+  for (int col = threadIdx.x; col < col_dim; col += blockDim.x) {
+    int pos = row * col_dim + col;
+    int i = pos % param_dim;
+    y[pos] = (x[pos] - share_mem[0]) / sqrt(share_mem[1] + epsilon) * gamma[i] + beta[i];
+  }
+}
+
+template <typename T>
+__global__ void LayerNormKernel(const int row_dim, const int col_dim, const int param_dim, const T epsilon, const T* x,
+                                const T* gamma, const T* beta, T* y, T* mean_addr, T* var_addr) {
+  for (auto row = blockIdx.x; row < row_dim; row += gridDim.x) {
+    T mean = 0;
+    T var = 0;
+    T num = 0;
+    const T* block_addr = x + row * col_dim;
+    extern __shared__ T share_mem[];
+
+    ThreadReduce(col_dim, block_addr, &mean, &var, &num);
+    WarpReduce(&mean, &var, &num);
+    BlockReduce(col_dim, &mean, &var, &num, mean_addr, var_addr, share_mem);
+
+    __syncthreads();
+    LayerNorm(row, col_dim, param_dim, x, share_mem, gamma, beta, epsilon, y);
+  }
+}
+
+template <typename T>
+void LayerNorm(const int& row_dim, const int& col_dim, const int& param_dim, const T& epsilon, const T* x,
+               const T* gamma, const T* beta, T* y, T* mean, T* var, cudaStream_t stream) {
+  const dim3 block(row_dim);
+  const dim3 thread(256);
+  // keep the mean/var/num after warp reduce
+  int share_mem =
+    ((col_dim + NUM_PER_THREAD_REDUCE - 1) / NUM_PER_THREAD_REDUCE + WARP_SIZE - 1) / WARP_SIZE * 3 * sizeof(T);
+  LayerNormKernel<<<block, thread, share_mem, stream>>>(row_dim, col_dim, param_dim, epsilon, x, gamma, beta, y, mean,
+                                                        var);
+}
+
+template void LayerNorm(const int& row_dim, const int& col_dim, const int& param_dim, const float& epsilon,
+                        const float* x, const float* gamma, const float* beta, float* y, float* mean, float* var,
+                        cudaStream_t stream);
diff --git a/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh
new file mode 100644
index 0000000000..4832b08746
--- /dev/null
+++ b/mindspore/ccsrc/kernel/gpu/cuda_impl/layer_norm_impl.cuh
@@ -0,0 +1,26 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_H_
+
+#include "device/gpu/cuda_common.h"
+
+template <typename T>
+void LayerNorm(const int& outer, const int& inner, const int& param_dim, const T& epsilon, const T* x, const T* gamma,
+               const T* beta, T* y, T* mean, T* var, cudaStream_t stream);
+
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_IMPL_LAYER_NORM_H_
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc
new file mode 100644
index 0000000000..e67b745ab3
--- /dev/null
+++ b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.cc
@@ -0,0 +1,31 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/gpu/nn/layer_norm_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(LayerNorm,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      LayerNormGpuKernel, float)
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h
new file mode 100644
index 0000000000..e80cd091e5
--- /dev/null
+++ b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_gpu_kernel.h
@@ -0,0 +1,103 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GPU_KERNEL_H_
+
+#include <vector>
+#include "kernel/gpu/gpu_kernel.h"
+#include "kernel/gpu/gpu_kernel_factory.h"
+#include "kernel/gpu/cuda_impl/layer_norm_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class LayerNormGpuKernel : public GpuKernel {
+ public:
+  LayerNormGpuKernel() : input_row_(1), input_col_(1), param_dim_(1) {}
+  ~LayerNormGpuKernel() override = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
+    auto x = GetDeviceAddress<T>(inputs, 0);
+    auto gamma = GetDeviceAddress<T>(inputs, 1);
+    auto beta = GetDeviceAddress<T>(inputs, 2);
+    auto y = GetDeviceAddress<T>(outputs, 0);
+    auto mean = GetDeviceAddress<T>(outputs, 1);
+    auto variance = GetDeviceAddress<T>(outputs, 2);
+
+    T epsilon = 10e-12;
+    LayerNorm(input_row_, input_col_, param_dim_, epsilon, x, gamma, beta, y, mean, variance,
+              reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+  bool Init(const CNodePtr &kernel_node) override {
+    int begin_norm_axis = GetAttr<int>(kernel_node, "begin_norm_axis");
+    int begin_params_axis = GetAttr<int>(kernel_node, "begin_params_axis");
+
+    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    if (begin_norm_axis < 0) {
+      begin_norm_axis += input_shape.size();
+    }
+
+    if (begin_params_axis < 0) {
+      begin_params_axis += input_shape.size();
+    }
+
+    for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) {
+      input_row_ *= input_shape[i];
+    }
+
+    for (size_t i = begin_norm_axis; i < input_shape.size(); i++) {
+      input_col_ *= input_shape[i];
+    }
+
+    for (size_t i = begin_params_axis; i < input_shape.size(); i++) {
+      param_dim_ *= input_shape[i];
+    }
+
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(input_row_ * input_col_ * sizeof(T));
+    input_size_list_.push_back(param_dim_ * sizeof(T));
+    input_size_list_.push_back(param_dim_ * sizeof(T));
+
+    output_size_list_.push_back(input_row_ * input_col_ * sizeof(T));
+    output_size_list_.push_back(input_row_ * sizeof(T));
+    output_size_list_.push_back(input_row_ * sizeof(T));
+    return;
+  }
+
+ private:
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+
+  int input_row_;
+  int input_col_;
+  int param_dim_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GPU_KERNEL_H_
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc
new file mode 100644
index 0000000000..e268161349
--- /dev/null
+++ b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.cc
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/gpu/nn/layer_norm_grad_gpu_kernel.h"
+
+namespace mindspore {
+namespace kernel {
+MS_REG_GPU_KERNEL_ONE(LayerNormGrad,
+                      KernelAttr()
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddInputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32)
+                        .AddOutputAttr(kNumberTypeFloat32),
+                      LayerNormGradGpuKernel, float)
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h
new file mode 100644
index 0000000000..84049206db
--- /dev/null
+++ b/mindspore/ccsrc/kernel/gpu/nn/layer_norm_grad_gpu_kernel.h
@@ -0,0 +1,107 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GRAD_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GRAD_GPU_KERNEL_H_
+
+#include <vector>
+#include "kernel/gpu/gpu_kernel.h"
+#include "kernel/gpu/gpu_kernel_factory.h"
+#include "kernel/gpu/cuda_impl/layer_norm_grad_impl.cuh"
+
+namespace mindspore {
+namespace kernel {
+template <typename T>
+class LayerNormGradGpuKernel : public GpuKernel {
+ public:
+  LayerNormGradGpuKernel() : input_row_(1), input_col_(1), param_dim_(1) {}
+  ~LayerNormGradGpuKernel() override = default;
+
+  const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
+  const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
+  const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
+
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+              const std::vector<AddressPtr> &outputs, uintptr_t stream_ptr) override {
+    auto dy = GetDeviceAddress<T>(inputs, 0);
+    auto x = GetDeviceAddress<T>(inputs, 1);
+    auto var = GetDeviceAddress<T>(inputs, 2);
+    auto mean = GetDeviceAddress<T>(inputs, 3);
+    auto gamma = GetDeviceAddress<T>(inputs, 4);
+    auto dx = GetDeviceAddress<T>(outputs, 0);
+    auto dg = GetDeviceAddress<T>(outputs, 1);
+    auto db = GetDeviceAddress<T>(outputs, 2);
+
+    T epsilon = 10e-12;
+    LayerNormGrad(input_row_, input_col_, param_dim_, epsilon, dy, x, mean, var, gamma, dx, dg, db,
+                  reinterpret_cast<cudaStream_t>(stream_ptr));
+    return true;
+  }
+  bool Init(const CNodePtr &kernel_node) override {
+    int begin_norm_axis = GetAttr<int>(kernel_node, "begin_norm_axis");
+    int begin_params_axis = GetAttr<int>(kernel_node, "begin_params_axis");
+
+    auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
+    if (begin_norm_axis < 0) {
+      begin_norm_axis += input_shape.size();
+    }
+
+    if (begin_params_axis < 0) {
+      begin_params_axis += input_shape.size();
+    }
+
+    for (size_t i = 0; i < IntToSize(begin_norm_axis); i++) {
+      input_row_ *= input_shape[i];
+    }
+
+    for (size_t i = begin_norm_axis; i < input_shape.size(); i++) {
+      input_col_ *= input_shape[i];
+    }
+
+    for (size_t i = begin_params_axis; i < input_shape.size(); i++) {
+      param_dim_ *= input_shape[i];
+    }
+
+    InitSizeLists();
+    return true;
+  }
+
+ protected:
+  void InitSizeLists() override {
+    input_size_list_.push_back(input_row_ * input_col_ * sizeof(T));
+    input_size_list_.push_back(input_row_ * input_col_ * sizeof(T));
+    input_size_list_.push_back(input_row_ * sizeof(T));
+    input_size_list_.push_back(input_row_ * sizeof(T));
+    input_size_list_.push_back(param_dim_ * sizeof(T));
+
+    output_size_list_.push_back(input_row_ * input_col_ * sizeof(T));
+    output_size_list_.push_back(param_dim_ * sizeof(T));
+    output_size_list_.push_back(param_dim_ * sizeof(T));
+    return;
+  }
+
+ private:
+  std::vector<size_t> input_size_list_;
+  std::vector<size_t> output_size_list_;
+  std::vector<size_t> workspace_size_list_;
+
+  int input_row_;
+  int input_col_;
+  int param_dim_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_KERNEL_GPU_NN_LAYER_NORM_GRAD_GPU_KERNEL_H_
diff --git a/tests/st/ops/gpu/test_layer_norm_grad_op.py b/tests/st/ops/gpu/test_layer_norm_grad_op.py
new file mode 100644
index 0000000000..0cef113d7c
--- /dev/null
+++ b/tests/st/ops/gpu/test_layer_norm_grad_op.py
@@ -0,0 +1,140 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import pytest
+import numpy as np
+from mindspore import Tensor
+from mindspore.ops import operations as P
+from mindspore.ops.operations import _grad_ops as G
+from mindspore.ops import composite as C
+import mindspore.nn as nn
+import mindspore.context as context
+
+
+context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+
+class LayerNormGradNet(nn.Cell):
+    def __init__(self, begin_norm_axis, begin_params_axis):
+        super(LayerNormGradNet, self).__init__()
+        self.norm = G.LayerNormGrad(begin_norm_axis, begin_params_axis)
+
+    def construct(self, dy, x, var, mean, gamma):
+        return self.norm(dy, x, var, mean, gamma)
+
+def LayerNormGradReference(x, dy, gamma, epsilon, begin_norm_axis, begin_params_axis):
+    begin_norm_axis = begin_norm_axis if begin_norm_axis >=0 else begin_norm_axis + len(x.shape)
+    begin_params_axis = begin_params_axis if begin_params_axis >=0 else begin_params_axis + len(x.shape)
+
+    norm_axis = [i for i in range(begin_norm_axis, len(x.shape))]
+    param_axis = [i for i in range(0, begin_params_axis)]
+    num = 1
+    for i in range(begin_norm_axis, len(x.shape)):
+        num *= x.shape[i]
+
+    mean = np.mean(x, axis=tuple(norm_axis), keepdims=True)
+    var = np.var(x, axis=tuple(norm_axis), keepdims=True)
+
+    gamma = gamma.reshape((*((1,)*begin_params_axis), *x.shape[begin_params_axis:]))
+    dg = np.sum(dy * np.power(var + epsilon, -0.5) * (x - mean), axis=tuple(param_axis), keepdims=True)
+    db = np.sum(dy, axis=tuple(param_axis), keepdims=True)
+
+    sum1 = np.sum((-0.5) * dy * gamma * (x - mean) * np.power(var + epsilon, -1.5), axis=tuple(norm_axis), keepdims=True)
+    sum2 = np.sum(dy * gamma, axis=tuple(norm_axis), keepdims=True)
+    sum3 = np.sum(-2.0 * (x - mean), axis=tuple(norm_axis), keepdims=True)
+
+    dx1 = dy * gamma * np.power(var + epsilon, -0.5)
+    dx2 = sum1 * 2.0 / num * (x - mean)
+    dx3 = ((-1.0) * np.power(var + epsilon, -0.5) * sum2 + (1.0 / num) * sum1 * sum3) * (1.0 / num)
+    dx = dx1 + dx2 + dx3
+    return dx, dg, db, mean, var
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_layernormgrad0():
+    begin_norm_axis = 1
+    begin_params_axis = 1
+    x_np = np.random.randn(4096, 3072).astype(np.float32)
+    dy_np = np.random.randn(4096, 3072).astype(np.float32)
+    gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    epsilon = 10e-12
+    dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, begin_params_axis)
+
+    dy_ms = Tensor(dy_np)
+    x_ms = Tensor(x_np)
+    var_ms = Tensor(var_np)
+    mean_ms = Tensor(mean_np)
+    gamma_ms = Tensor(gamma_np)
+
+    net = LayerNormGradNet(begin_norm_axis, begin_params_axis)
+    dx_ms, dg_ms, db_ms = net(dy_ms, x_ms, var_ms, mean_ms, gamma_ms)
+
+    assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6)
+    assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3)
+    assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-6, atol=1e-3)
+
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_layernormgrad1():
+    begin_norm_axis = 1
+    begin_params_axis = 1
+    x_np = np.random.randn(640, 768).astype(np.float32)
+    dy_np = np.random.randn(640, 768).astype(np.float32)
+    gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    epsilon = 10e-12
+    dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, begin_params_axis)
+
+    dy_ms = Tensor(dy_np)
+    x_ms = Tensor(x_np)
+    var_ms = Tensor(var_np)
+    mean_ms = Tensor(mean_np)
+    gamma_ms = Tensor(gamma_np)
+
+    net = LayerNormGradNet(begin_norm_axis, begin_params_axis)
+    dx_ms, dg_ms, db_ms = net(dy_ms, x_ms, var_ms, mean_ms, gamma_ms)
+
+    assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6)
+    assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3)
+    assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-6, atol=1e-3)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_layernormgrad2():
+    begin_norm_axis = -1
+    begin_params_axis = -1
+    x_np = np.random.randn(32, 128, 768).astype(np.float32)
+    dy_np = np.random.randn(32, 128, 768).astype(np.float32)
+    gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    epsilon = 10e-12
+    dx_np, dg_np, db_np, mean_np, var_np = LayerNormGradReference(x_np, dy_np, gamma_np, epsilon, begin_norm_axis, begin_params_axis)
+
+    dy_ms = Tensor(dy_np)
+    x_ms = Tensor(x_np)
+    var_ms = Tensor(var_np)
+    mean_ms = Tensor(mean_np)
+    gamma_ms = Tensor(gamma_np)
+
+    net = LayerNormGradNet(begin_norm_axis, begin_params_axis)
+    dx_ms, dg_ms, db_ms = net(dy_ms, x_ms, var_ms, mean_ms, gamma_ms)
+
+    assert np.allclose(dx_ms.asnumpy(), dx_np, rtol=1e-6, atol=1e-6)
+    assert np.allclose(dg_ms.asnumpy(), dg_np, rtol=1e-6, atol=1e-3)
+    assert np.allclose(db_ms.asnumpy(), db_np, rtol=1e-6, atol=1e-3)
\ No newline at end of file
diff --git a/tests/st/ops/gpu/test_layer_norm_op.py b/tests/st/ops/gpu/test_layer_norm_op.py
new file mode 100644
index 0000000000..a281cd0f5f
--- /dev/null
+++ b/tests/st/ops/gpu/test_layer_norm_op.py
@@ -0,0 +1,134 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import pytest
+import numpy as np
+from mindspore import Tensor
+from mindspore.ops import operations as P
+import mindspore.nn as nn
+import mindspore.context as context
+
+context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+
+
+class LayerNormNet(nn.Cell):
+    def __init__(self, begin_norm_axis, begin_params_axis):
+        super(LayerNormNet, self).__init__()
+        self.norm = P.LayerNorm(begin_norm_axis, begin_params_axis)
+
+    def construct(self, x, gamma, beta):
+        return self.norm(x, gamma, beta)
+
+def LayerNormReference(begin_norm_axis, begin_params_axis, x, gamma, beta):
+    begin_norm_axis = begin_norm_axis if begin_norm_axis >=0 else begin_norm_axis + len(x.shape)
+    begin_params_axis = begin_params_axis if begin_params_axis >=0 else begin_params_axis + len(x.shape)
+
+    axis = [i for i in range(begin_norm_axis, len(x.shape))]
+    mean = np.mean(x, axis=tuple(axis), keepdims=True)
+    var  = np.var(x, axis=tuple(axis), keepdims=True)
+
+    gamma = gamma.reshape((*((1,)*begin_params_axis), *x.shape[begin_params_axis:]))
+    beta = beta.reshape((*((1,)*begin_params_axis), *x.shape[begin_params_axis:]))
+    y = np.subtract(x, mean) / np.sqrt(var + 1e-12) * gamma + beta
+    return y, mean, var
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_layernorm0():
+    begin_norm_axis = 1
+    begin_params_axis = 1
+    x_np = np.random.randn(4096, 3072).astype(np.float32)
+    gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np)
+
+    x_ms = Tensor(x_np)
+    gamma_ms = Tensor(gamma_np)
+    beta_ms = Tensor(beta_np)
+    net = LayerNormNet(begin_norm_axis, begin_params_axis)
+    y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms)
+
+    assert np.allclose(y_ms.asnumpy(), y_np, atol=1e-6)
+    assert np.allclose(mean_ms.asnumpy(), mean_np, atol=1e-6)
+    assert np.allclose(var_ms.asnumpy(), var_np, atol=1e-6)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_layernorm1():
+    begin_norm_axis = 1
+    begin_params_axis = 1
+    x_np = np.random.randn(640, 768).astype(np.float32)
+    gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np)
+
+    x_ms = Tensor(x_np)
+    gamma_ms = Tensor(gamma_np)
+    beta_ms = Tensor(beta_np)
+    net = LayerNormNet(begin_norm_axis, begin_params_axis)
+    y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms)
+
+
+    assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-6)
+    assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-6)
+    assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-6)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_layernorm3d_1():
+    begin_norm_axis = -1
+    begin_params_axis = -1
+    x_np = np.random.randn(32, 128, 768).astype(np.float32)
+    gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np)
+
+    x_ms = Tensor(x_np)
+    gamma_ms = Tensor(gamma_np)
+    beta_ms = Tensor(beta_np)
+    net = LayerNormNet(begin_norm_axis, begin_params_axis)
+    y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms)
+
+    assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-6)
+    assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-6)
+    assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-6)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_layernorm3d_2():
+    begin_norm_axis = -1
+    begin_params_axis = 1
+    x_np = np.random.randn(32, 128, 768).astype(np.float32)
+    gamma_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    beta_np = np.random.randn(*x_np.shape[begin_params_axis:]).astype(np.float32)
+    y_np, mean_np, var_np = LayerNormReference(begin_norm_axis, begin_params_axis, x_np, gamma_np, beta_np)
+
+    x_ms = Tensor(x_np)
+    gamma_ms = Tensor(gamma_np)
+    beta_ms = Tensor(beta_np)
+    net = LayerNormNet(begin_norm_axis, begin_params_axis)
+    y_ms, mean_ms, var_ms = net(x_ms, gamma_ms, beta_ms)
+
+    assert np.allclose(y_ms.asnumpy(), y_np, rtol=1e-6, atol=1e-6)
+    assert np.allclose(mean_ms.asnumpy(), mean_np, rtol=1e-6, atol=1e-6)
+    assert np.allclose(var_ms.asnumpy(), var_np, rtol=1e-6, atol=1e-6)

From 60545c4d86fd97f3987007f0c737304b388d3a23 Mon Sep 17 00:00:00 2001
From: simson <526422051@qq.com>
Date: Mon, 27 Apr 2020 17:38:03 +0800
Subject: [PATCH 130/242] delete useless log

---
 mindspore/ccsrc/utils/context/ms_context.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mindspore/ccsrc/utils/context/ms_context.cc b/mindspore/ccsrc/utils/context/ms_context.cc
index 1e2a5d6f09..6da1de9cdb 100644
--- a/mindspore/ccsrc/utils/context/ms_context.cc
+++ b/mindspore/ccsrc/utils/context/ms_context.cc
@@ -78,7 +78,6 @@ MsContext::MsContext(const std::string &policy, const std::string &target) {
   graph_memory_max_size_ = "0";
   variable_memory_max_size_ = "0";
   enable_loop_sink_ = target == kAscendDevice || target == kDavinciDevice;
-  MS_LOG(DEBUG) << "Create context with backend policy:" << policy << ", device target:" << target << ".";
 }
 
 std::shared_ptr<MsContext> MsContext::GetInstance() {

From 2265669d32f2417df1e36aeef8889f050a8a81cc Mon Sep 17 00:00:00 2001
From: zhaozhenlong <zhaozhenlong1@huawei.com>
Date: Mon, 27 Apr 2020 15:33:47 +0800
Subject: [PATCH 131/242] add examples for ops Conv2D Conv2dBackpropInput
 SigmoidCrossEntropyWithLogits SoftmaxCrossEntropyWithLogits
 SparseSoftmaxCrossEntropyWithLogits SparseApplyAdagrad

---
 mindspore/ops/operations/nn_ops.py | 33 ++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index bec133f9ce..84f26c67fc 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -634,6 +634,12 @@ class Conv2D(PrimitiveWithInfer):
 
     Outputs:
         Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
+
+    Examples:
+        >>> input = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
+        >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
+        >>> conv2d = P.Conv2D(out_channel=32, kernel_size=3)
+        >>> conv2d(input, weight)
     """
 
     @prim_attr_register
@@ -1090,6 +1096,13 @@ class Conv2DBackpropInput(PrimitiveWithInfer):
 
     Returns:
         Tensor, the gradients of convolution.
+
+    Examples:
+        >>> dout = Tensor(np.ones([10, 32, 30, 30]), mindspore.float32)
+        >>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
+        >>> x = Tensor(np.ones([10, 32, 32, 32]))
+        >>> conv2d_backprop_input = P.Conv2DBackpropInput(out_channel=32, kernel_size=3)
+        >>> conv2d_backprop_input(dout, weight, F.shape(x))
     """
 
     @prim_attr_register
@@ -1262,6 +1275,9 @@ class SoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
 
     Outputs:
         Tuple of 2 Tensor, the loss shape is `(N,)`, and the dlogits with the same shape as `logits`.
+
+    Examples:
+        Please refer to the usage in nn.SoftmaxCrossEntropyWithLogits source code.
     """
 
     @prim_attr_register
@@ -1306,6 +1322,9 @@ class SparseSoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
     Outputs:
         Tensor, if `is_grad` is False, the output tensor is the value of loss which is a scalar tensor;
         if `is_grad` is True, the output tensor is the gradient of input with the same shape as `logits`.
+
+    Examples:
+        Please refer to the usage in nn.SoftmaxCrossEntropyWithLogits source code.
     """
 
     @prim_attr_register
@@ -2117,6 +2136,12 @@ class SigmoidCrossEntropyWithLogits(PrimitiveWithInfer):
 
     Outputs:
         Tensor, with the same shape and type as input `logits`.
+
+    Examples:
+        >>> logits = Tensor(np.random.randn(2, 3).astype(np.float16))
+        >>> labels = Tensor(np.random.randn(2, 3).astype(np.float16))
+        >>> sigmoid = P.SigmoidCrossEntropyWithLogits()
+        >>> sigmoid(logits, labels)
     """
 
     @prim_attr_register
@@ -2471,6 +2496,14 @@ class SparseApplyAdagrad(PrimitiveWithInfer):
 
     Outputs:
         Tensor, has the same shape and type as `var`.
+
+    Examples:
+        var = Tensor(np.random.random((3, 3)), mindspore.float32)
+        accum = Tensor(np.random.random((3, 3)), mindspore.float32)
+        grad = Tensor(np.random.random((3, 3)), mindspore.float32)
+        indices = Tensor(np.ones((3,), np.int32))
+        sparse_apply_ada_grad = P.SparseApplyAdagrad(0.5)
+        sparse_apply_ada_grad(var, accum, grad, indices)
     """
 
     @prim_attr_register

From e6f36e050f2840b415eb226af00d9c6ea0e1a14e Mon Sep 17 00:00:00 2001
From: kswang <wangkaisheng2@huawei.com>
Date: Mon, 27 Apr 2020 17:27:40 +0800
Subject: [PATCH 132/242] change cpu kernel dir

---
 mindspore/ccsrc/kernel/CMakeLists.txt         |  8 ++++++-
 .../cpu}/apply_momentum_cpu_kernel.cc         | 10 ++++----
 .../cpu}/apply_momentum_cpu_kernel.h          | 14 +++++------
 .../cpu}/argmax_cpu_kernel.cc                 |  8 +++----
 .../kernel => kernel/cpu}/argmax_cpu_kernel.h | 16 ++++++-------
 .../cpu}/bias_add_cpu_kernel.cc               | 12 ++++------
 .../cpu}/bias_add_cpu_kernel.h                | 24 +++++++++----------
 .../cpu}/bias_add_grad_cpu_kernel.cc          | 12 ++++------
 .../cpu}/bias_add_grad_cpu_kernel.h           | 24 +++++++++----------
 .../{device => kernel}/cpu/cpu_kernel.cc      |  8 +++----
 .../ccsrc/{device => kernel}/cpu/cpu_kernel.h | 12 ++++------
 .../cpu/cpu_kernel_factory.cc                 |  8 +++----
 .../cpu/cpu_kernel_factory.h                  | 14 +++++------
 .../cpu}/equal_count_cpu_kernel.cc            |  8 +++----
 .../cpu}/equal_count_cpu_kernel.h             | 16 ++++++-------
 .../cpu}/mkldnn/conv2d_cpu_kernel.cc          | 10 ++++----
 .../cpu}/mkldnn/conv2d_cpu_kernel.h           | 14 +++++------
 .../mkldnn/conv2d_grad_filter_cpu_kernel.cc   | 10 ++++----
 .../mkldnn/conv2d_grad_filter_cpu_kernel.h    | 14 +++++------
 .../mkldnn/conv2d_grad_input_cpu_kernel.cc    | 10 ++++----
 .../mkldnn/conv2d_grad_input_cpu_kernel.h     | 14 +++++------
 .../cpu}/mkldnn/matmul_cpu_kernel.cc          | 10 ++++----
 .../cpu}/mkldnn/matmul_cpu_kernel.h           | 14 +++++------
 .../cpu}/mkldnn/mkl_cpu_kernel.cc             | 10 ++++----
 .../cpu}/mkldnn/mkl_cpu_kernel.h              | 16 ++++++-------
 .../cpu}/mkldnn/mkl_kernel_engine.cc          |  8 +++----
 .../cpu}/mkldnn/mkl_kernel_engine.h           |  6 ++---
 .../cpu}/mkldnn/mul_cpu_kernel.cc             | 10 ++++----
 .../cpu}/mkldnn/mul_cpu_kernel.h              | 14 +++++------
 .../cpu}/mkldnn/pooling_cpu_kernel.cc         | 10 ++++----
 .../cpu}/mkldnn/pooling_cpu_kernel.h          | 14 +++++------
 .../cpu}/mkldnn/pooling_grad_cpu_kernel.cc    | 10 ++++----
 .../cpu}/mkldnn/pooling_grad_cpu_kernel.h     | 14 +++++------
 .../cpu}/mkldnn/relu_cpu_kernel.cc            | 10 ++++----
 .../cpu}/mkldnn/relu_cpu_kernel.h             | 14 +++++------
 .../cpu}/mkldnn/relu_grad_cpu_kernel.cc       | 10 ++++----
 .../cpu}/mkldnn/relu_grad_cpu_kernel.h        | 14 +++++------
 .../cpu}/mkldnn/softmax_cpu_kernel.cc         | 10 ++++----
 .../cpu}/mkldnn/softmax_cpu_kernel.h          | 14 +++++------
 ...ax_cross_entropy_with_logits_cpu_kernel.cc | 10 ++++----
 ...max_cross_entropy_with_logits_cpu_kernel.h | 14 +++++------
 .../cpu}/one_hot_cpu_kernel.cc                |  8 +++----
 .../cpu}/one_hot_cpu_kernel.h                 | 16 ++++++-------
 .../cpu}/reshape_cpu_kernel.cc                |  8 +++----
 .../cpu}/reshape_cpu_kernel.h                 | 16 ++++++-------
 mindspore/ccsrc/session/CMakeLists.txt        |  7 ++++++
 .../{device/cpu => session}/cpu_session.cc    |  6 ++---
 .../{device/cpu => session}/cpu_session.h     |  0
 mindspore/ccsrc/session/session_factory.h     |  1 -
 .../insert_memcpy_async_for_getnext.cc        |  1 +
 50 files changed, 243 insertions(+), 318 deletions(-)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/apply_momentum_cpu_kernel.cc (90%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/apply_momentum_cpu_kernel.h (77%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/argmax_cpu_kernel.cc (94%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/argmax_cpu_kernel.h (77%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/bias_add_cpu_kernel.cc (91%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/bias_add_cpu_kernel.h (65%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/bias_add_grad_cpu_kernel.cc (89%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/bias_add_grad_cpu_kernel.h (62%)
 rename mindspore/ccsrc/{device => kernel}/cpu/cpu_kernel.cc (91%)
 rename mindspore/ccsrc/{device => kernel}/cpu/cpu_kernel.h (89%)
 rename mindspore/ccsrc/{device => kernel}/cpu/cpu_kernel_factory.cc (92%)
 rename mindspore/ccsrc/{device => kernel}/cpu/cpu_kernel_factory.h (86%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/equal_count_cpu_kernel.cc (92%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/equal_count_cpu_kernel.h (76%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/conv2d_cpu_kernel.cc (95%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/conv2d_cpu_kernel.h (78%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/conv2d_grad_filter_cpu_kernel.cc (95%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/conv2d_grad_filter_cpu_kernel.h (78%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/conv2d_grad_input_cpu_kernel.cc (95%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/conv2d_grad_input_cpu_kernel.h (78%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/matmul_cpu_kernel.cc (93%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/matmul_cpu_kernel.h (80%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/mkl_cpu_kernel.cc (95%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/mkl_cpu_kernel.h (82%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/mkl_kernel_engine.cc (89%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/mkl_kernel_engine.h (95%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/mul_cpu_kernel.cc (93%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/mul_cpu_kernel.h (78%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/pooling_cpu_kernel.cc (94%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/pooling_cpu_kernel.h (78%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/pooling_grad_cpu_kernel.cc (96%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/pooling_grad_cpu_kernel.h (82%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/relu_cpu_kernel.cc (91%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/relu_cpu_kernel.h (78%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/relu_grad_cpu_kernel.cc (93%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/relu_grad_cpu_kernel.h (77%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/softmax_cpu_kernel.cc (91%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/softmax_cpu_kernel.h (78%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc (95%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h (83%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/one_hot_cpu_kernel.cc (95%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/one_hot_cpu_kernel.h (77%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/reshape_cpu_kernel.cc (92%)
 rename mindspore/ccsrc/{device/cpu/kernel => kernel/cpu}/reshape_cpu_kernel.h (78%)
 rename mindspore/ccsrc/{device/cpu => session}/cpu_session.cc (96%)
 rename mindspore/ccsrc/{device/cpu => session}/cpu_session.h (100%)

diff --git a/mindspore/ccsrc/kernel/CMakeLists.txt b/mindspore/ccsrc/kernel/CMakeLists.txt
index f3ac99283a..a3a5077a1a 100644
--- a/mindspore/ccsrc/kernel/CMakeLists.txt
+++ b/mindspore/ccsrc/kernel/CMakeLists.txt
@@ -17,6 +17,12 @@ if (ENABLE_D)
 	add_compile_definitions(ENABLE_D)
 endif ()
 
+if (ENABLE_CPU)
+    file(GLOB_RECURSE CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+        "cpu/*.cc"
+    )
+endif ()
+
 if (ENABLE_GPU)
     file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "gpu/*.cu"
@@ -37,4 +43,4 @@ if (ENABLE_GPU)
 	# add_library(_mindspore_kernel_cuda_obj OBJECT ${CUDA_SRC_LIST})
 endif()
 
-add_library(_mindspore_kernel_obj OBJECT ${KERNEL_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
+add_library(_mindspore_kernel_obj OBJECT ${KERNEL_SRC_LIST} ${CPU_SRC_LIST} ${GPU_SRC_LIST} ${D_SRC_LIST})
diff --git a/mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc
similarity index 90%
rename from mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc
index 94b6c306ef..3cd6c57413 100644
--- a/mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.cc
@@ -13,14 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/apply_momentum_cpu_kernel.h"
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/apply_momentum_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void ApplyMomentumCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {}
 
 bool ApplyMomentumCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
@@ -44,6 +43,5 @@ bool ApplyMomentumCPUKernel::Launch(const std::vector<kernel::AddressPtr> &input
   }
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h
similarity index 77%
rename from mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h
index 84ed340430..91e159cf74 100644
--- a/mindspore/ccsrc/device/cpu/kernel/apply_momentum_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/apply_momentum_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class ApplyMomentumCPUKernel : public MKLCPUKernel {
  public:
   ApplyMomentumCPUKernel() = default;
@@ -35,8 +34,7 @@ class ApplyMomentumCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(ApplyMomentum, ApplyMomentumCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_APPLY_MOMENTUM_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc
index a492bf7969..ee328df721 100644
--- a/mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.cc
@@ -13,12 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/argmax_cpu_kernel.h"
+#include "kernel/cpu/argmax_cpu_kernel.h"
 #include "device/cpu/cpu_device_address.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void ArgmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@@ -64,6 +63,5 @@ bool ArgmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   }
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h
similarity index 77%
rename from mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h
index 993b012c4e..b50b5fc272 100644
--- a/mindspore/ccsrc/device/cpu/kernel/argmax_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/argmax_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_ARGMAX_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_ARGMAX_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "device/cpu/cpu_kernel.h"
-#include "device/cpu/cpu_kernel_factory.h"
+#include "kernel/cpu/cpu_kernel.h"
+#include "kernel/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class ArgmaxCPUKernel : public CPUKernel {
  public:
   ArgmaxCPUKernel() = default;
@@ -39,8 +38,7 @@ class ArgmaxCPUKernel : public CPUKernel {
 };
 
 MS_REG_CPU_KERNEL(Argmax, ArgmaxCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_ARGMAX_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_ARGMAX_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc
index 4661ee73cd..00f3017231 100644
--- a/mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.cc
@@ -14,12 +14,11 @@
  * limitations under the License.
  */
 
-#include "device/cpu/kernel/bias_add_cpu_kernel.h"
+#include "kernel/cpu/bias_add_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
-void BiasAddCpuKernel::InitKernel(const CNodePtr &kernel_node) {
+namespace kernel {
+void BiasAddCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
   bias_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
@@ -41,7 +40,7 @@ void BiasAddCpuKernel::InitKernel(const CNodePtr &kernel_node) {
   }
 }
 
-bool BiasAddCpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
+bool BiasAddCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
                               const std::vector<AddressPtr> &outputs) {
   if (inputs.size() != 2 || outputs.size() != 1) {
     MS_LOG(EXCEPTION) << "inputs outputs size not supoort";
@@ -79,6 +78,5 @@ bool BiasAddCpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::
   }
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h
similarity index 65%
rename from mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h
index 45028523bd..9c1a23b4e1 100644
--- a/mindspore/ccsrc/device/cpu/kernel/bias_add_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/bias_add_cpu_kernel.h
@@ -13,21 +13,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIAS_ADD_CPU_KERNEL_H_
-#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIAS_ADD_CPU_KERNEL_H_
+#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_
+#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/cpu_kernel.h"
-#include "device/cpu/cpu_kernel_factory.h"
+#include "kernel/cpu/cpu_kernel.h"
+#include "kernel/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
-class BiasAddCpuKernel : public CPUKernel {
+namespace kernel {
+class BiasAddCPUKernel : public CPUKernel {
  public:
-  BiasAddCpuKernel() = default;
-  ~BiasAddCpuKernel() override = default;
+  BiasAddCPUKernel() = default;
+  ~BiasAddCPUKernel() override = default;
 
   void InitKernel(const CNodePtr &kernel_node) override;
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
@@ -38,8 +37,7 @@ class BiasAddCpuKernel : public CPUKernel {
   std::vector<size_t> input_shape_;
   std::vector<size_t> bias_shape_;
 };
-MS_REG_CPU_KERNEL(BiasAdd, BiasAddCpuKernel);
-}  // namespace cpu
-}  // namespace device
+MS_REG_CPU_KERNEL(BiasAdd, BiasAddCPUKernel);
+}  // namespace kernel
 }  // namespace mindspore
-#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIAS_ADD_CPU_KERNEL_H_
+#endif  // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIAS_ADD_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc
similarity index 89%
rename from mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc
index 6846ca2555..1d9c7d076e 100644
--- a/mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.cc
@@ -14,12 +14,11 @@
  * limitations under the License.
  */
 
-#include "device/cpu/kernel/bias_add_grad_cpu_kernel.h"
+#include "kernel/cpu/bias_add_grad_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
-void BiasAddGradCpuKernel::InitKernel(const CNodePtr &kernel_node) {
+namespace kernel {
+void BiasAddGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   input_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
   if (input_shape_.size() != 4 && input_shape_.size() != 2) {
@@ -27,7 +26,7 @@ void BiasAddGradCpuKernel::InitKernel(const CNodePtr &kernel_node) {
   }
 }
 
-bool BiasAddGradCpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
+bool BiasAddGradCPUKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
                                   const std::vector<AddressPtr> &outputs) {
   if (inputs.size() != 1 || outputs.size() != 1) {
     MS_LOG(EXCEPTION) << "input output size not support";
@@ -65,6 +64,5 @@ bool BiasAddGradCpuKernel::Launch(const std::vector<AddressPtr> &inputs, const s
   }
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h
similarity index 62%
rename from mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h
index 736540b8a3..3c4d6e9a76 100644
--- a/mindspore/ccsrc/device/cpu/kernel/bias_add_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/bias_add_grad_cpu_kernel.h
@@ -14,21 +14,20 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIASADDGRADCPUKERNEL_H_
-#define MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIASADDGRADCPUKERNEL_H_
+#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_
+#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/cpu_kernel.h"
-#include "device/cpu/cpu_kernel_factory.h"
+#include "kernel/cpu/cpu_kernel.h"
+#include "kernel/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
-class BiasAddGradCpuKernel : public CPUKernel {
+namespace kernel {
+class BiasAddGradCPUKernel : public CPUKernel {
  public:
-  BiasAddGradCpuKernel() = default;
-  ~BiasAddGradCpuKernel() override = default;
+  BiasAddGradCPUKernel() = default;
+  ~BiasAddGradCPUKernel() override = default;
 
   void InitKernel(const CNodePtr &kernel_node) override;
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
@@ -37,8 +36,7 @@ class BiasAddGradCpuKernel : public CPUKernel {
  private:
   std::vector<size_t> input_shape_;
 };
-MS_REG_CPU_KERNEL(BiasAddGrad, BiasAddGradCpuKernel);
-}  // namespace cpu
-}  // namespace device
+MS_REG_CPU_KERNEL(BiasAddGrad, BiasAddGradCPUKernel);
+}  // namespace kernel
 }  // namespace mindspore
-#endif  // MINDSPORE_MINDSPORE_CCSRC_DEVICE_CPU_KERNEL_BIASADDGRADCPUKERNEL_H_
+#endif  // MINDSPORE_MINDSPORE_CCSRC_KERNEL_CPU_BIASADDGRADCPUKERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/device/cpu/cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/cpu_kernel.cc
index 5f810ff522..7150c06eb5 100644
--- a/mindspore/ccsrc/device/cpu/cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/cpu_kernel.cc
@@ -13,11 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/cpu_kernel.h"
+#include "kernel/cpu/cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void CPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
@@ -41,6 +40,5 @@ void CPUKernel::Init(const CNodePtr &kernel_node) {
   InitInputOutputSize(kernel_node);
   InitKernel(kernel_node);
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/cpu_kernel.h
similarity index 89%
rename from mindspore/ccsrc/device/cpu/cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/cpu_kernel.h
index ebd182ee49..f9121cb175 100644
--- a/mindspore/ccsrc/device/cpu/cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/cpu_kernel.h
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_
 
 #include <string>
 #include <vector>
@@ -28,8 +28,7 @@
 using mindspore::kernel::Address;
 using mindspore::kernel::AddressPtr;
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 const char KSIZE[] = "ksize";
 const char STRIDE[] = "stride";
 const char STRIDES[] = "strides";
@@ -70,8 +69,7 @@ class CPUKernel : public kernel::KernelMod {
   std::vector<size_t> output_size_list_;
   std::vector<size_t> workspace_size_list_;
 };
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_factory.cc b/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc
similarity index 92%
rename from mindspore/ccsrc/device/cpu/cpu_kernel_factory.cc
rename to mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc
index 77a3345344..66949cb4fa 100644
--- a/mindspore/ccsrc/device/cpu/cpu_kernel_factory.cc
+++ b/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.cc
@@ -14,15 +14,14 @@
  * limitations under the License.
  */
 
-#include "device/cpu/cpu_kernel_factory.h"
+#include "kernel/cpu/cpu_kernel_factory.h"
 
 #include <memory>
 #include <iostream>
 #include <string>
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 CPUKernelFactory &CPUKernelFactory::Get() {
   static CPUKernelFactory instance;
   return instance;
@@ -45,6 +44,5 @@ std::shared_ptr<CPUKernel> CPUKernelFactory::Create(const std::string &kernel_na
   }
   return nullptr;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/cpu_kernel_factory.h b/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h
similarity index 86%
rename from mindspore/ccsrc/device/cpu/cpu_kernel_factory.h
rename to mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h
index 6a86f94709..f546758632 100644
--- a/mindspore/ccsrc/device/cpu/cpu_kernel_factory.h
+++ b/mindspore/ccsrc/kernel/cpu/cpu_kernel_factory.h
@@ -13,8 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_FACTORY_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_FACTORY_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_
 
 #include <functional>
 #include <map>
@@ -22,10 +22,9 @@
 #include <string>
 #include <utility>
 #include "common/utils.h"
-#include "device/cpu/cpu_kernel.h"
+#include "kernel/cpu/cpu_kernel.h"
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 using CPUKernelCreator = std::function<std::shared_ptr<CPUKernel>()>;
 class CPUKernelFactory {
  public:
@@ -51,8 +50,7 @@ class CPUKernelRegistrar {
 #define MS_REG_CPU_KERNEL(KERNEL_NAME, KERNEL_CLASS)                             \
   static const CPUKernelRegistrar g_cpu_kernel_##KERNEL_NAME##_reg(#KERNEL_NAME, \
                                                                    []() { return std::make_shared<KERNEL_CLASS>(); });
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_CPU_KERNEL_FACTORY_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_CPU_KERNEL_FACTORY_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc
index ee6bb9f144..60e7eafa78 100644
--- a/mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.cc
@@ -13,12 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/equal_count_cpu_kernel.h"
+#include "kernel/cpu/equal_count_cpu_kernel.h"
 #include "device/cpu/cpu_device_address.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void EqualCountCPUKernel::InitKernel(const CNodePtr & /*kernel_node*/) {}
 
 bool EqualCountCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
@@ -43,6 +42,5 @@ bool EqualCountCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   output[0] = count;
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h
similarity index 76%
rename from mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h
index 26f343e855..ecfe24a90f 100644
--- a/mindspore/ccsrc/device/cpu/kernel/equal_count_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/equal_count_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_EQUAL_COUNT_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_EQUAL_COUNT_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "device/cpu/cpu_kernel.h"
-#include "device/cpu/cpu_kernel_factory.h"
+#include "kernel/cpu/cpu_kernel.h"
+#include "kernel/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class EqualCountCPUKernel : public CPUKernel {
  public:
   EqualCountCPUKernel() = default;
@@ -35,8 +34,7 @@ class EqualCountCPUKernel : public CPUKernel {
 };
 
 MS_REG_CPU_KERNEL(EqualCount, EqualCountCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_EQUAL_COUNT_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_EQUAL_COUNT_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc
index 5d63aee6cd..657c85dc48 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.cc
@@ -13,15 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/conv2d_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/conv2d_cpu_kernel.h"
 #include <string>
 #include "common/utils.h"
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@@ -88,6 +87,5 @@ bool Conv2dCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   ExecutePrimitive();
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h
similarity index 78%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h
index d975b537ca..b91059a0d1 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class Conv2dCPUKernel : public MKLCPUKernel {
  public:
   Conv2dCPUKernel() = default;
@@ -35,8 +34,7 @@ class Conv2dCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(Conv2D, Conv2dCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
index 1a7c10a531..fbfebaf56e 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc
@@ -13,15 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h"
 #include <string>
 #include "common/utils.h"
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
@@ -90,6 +89,5 @@ bool Conv2dGradFilterCPUKernel::Launch(const std::vector<kernel::AddressPtr> &in
   ExecutePrimitive();
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
similarity index 78%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
index d42c1166f2..b6cd78171a 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_filter_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class Conv2dGradFilterCPUKernel : public MKLCPUKernel {
  public:
   Conv2dGradFilterCPUKernel() = default;
@@ -35,8 +34,7 @@ class Conv2dGradFilterCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(Conv2DBackpropFilter, Conv2dGradFilterCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_FILTER_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
index 04dda20acd..ff0b8633d4 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc
@@ -13,15 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h"
 #include <string>
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::vector<size_t> src_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
@@ -89,6 +88,5 @@ bool Conv2dGradInputCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
   ExecutePrimitive();
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
similarity index 78%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
index fb6e14688d..c61d8133ee 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/conv2d_grad_input_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/conv2d_grad_input_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class Conv2dGradInputCPUKernel : public MKLCPUKernel {
  public:
   Conv2dGradInputCPUKernel() = default;
@@ -35,8 +34,7 @@ class Conv2dGradInputCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(Conv2DBackpropInput, Conv2dGradInputCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_CONV2D_GRAD_INPUT_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc
index 84d9508c71..28266f2aa0 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.cc
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/matmul_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/matmul_cpu_kernel.h"
 #include <algorithm>
 #include <utility>
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "common/utils.h"
 #include "device/cpu/cpu_device_address.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void MatMulCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@@ -68,6 +67,5 @@ bool MatMulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   (void)dnnl_sgemm(trans_a_, trans_b_, dim_m_, dim_n_, dim_k_, 1.f, input_a, lda, input_b, ldb, 0.f, output, dim_n_);
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h
similarity index 80%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h
index b096e76740..ecca5dec73 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/matmul_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/matmul_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_MATMUL_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_MATMUL_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class MatMulCPUKernel : public MKLCPUKernel {
  public:
   MatMulCPUKernel() = default;
@@ -42,8 +41,7 @@ class MatMulCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(MatMul, MatMulCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_MATMUL_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_MATMUL_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc
index 677df1d0f8..1f7ccf9e41 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.cc
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 #include <vector>
 #include <string>
 #include <algorithm>
 #include "common/utils.h"
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode,
                               const std::vector<size_t> &src_shape, int kernel_size, int stride,
                               std::vector<int> *padding_l, std::vector<int> *padding_r) {
@@ -99,6 +98,5 @@ void MKLCPUKernel::SetArgumentHandle(int arg_key, void *ptr) {
 }
 
 void MKLCPUKernel::ExecutePrimitive() { MKLKernelEngine::Get().Execute(primitive_, arguments_); }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h
similarity index 82%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h
index 0a38de7060..cd06032ff0 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_cpu_kernel.h
@@ -13,20 +13,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_MKL_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_MKL_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_
 
 #include <string>
 #include <unordered_map>
 #include <memory>
 #include <vector>
 #include "dnnl.hpp"
-#include "device/cpu/cpu_kernel.h"
-#include "device/cpu/cpu_kernel_factory.h"
+#include "kernel/cpu/cpu_kernel.h"
+#include "kernel/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class MKLCPUKernel : public CPUKernel {
  public:
   MKLCPUKernel() = default;
@@ -43,8 +42,7 @@ class MKLCPUKernel : public CPUKernel {
   std::unordered_map<int, dnnl::memory> arguments_;
   std::shared_ptr<dnnl::primitive> primitive_{nullptr};
 };
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_MKL_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_MKL_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc
similarity index 89%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc
index 7025148732..ae4dbb26d8 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.cc
@@ -13,13 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "utils/log_adapter.h"
 #include "dnnl.hpp"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void MKLKernelEngine::Execute(const std::shared_ptr<dnnl::primitive> &primitive,
                               const std::unordered_map<int, dnnl::memory> &arguments) {
   MS_EXCEPTION_IF_NULL(primitive);
@@ -34,6 +33,5 @@ dnnl::memory MKLKernelEngine::CreateMemory(const dnnl::memory::desc &mem_desc, b
     return dnnl::memory(mem_desc, engine_, nullptr);
   }
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.h b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h
similarity index 95%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h
index ea764359b6..36a3ceff6d 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mkl_kernel_engine.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mkl_kernel_engine.h
@@ -23,8 +23,7 @@
 #include "common/utils.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class MKLKernelEngine {
  public:
   static MKLKernelEngine &Get() {
@@ -46,8 +45,7 @@ class MKLKernelEngine {
   dnnl::engine engine_;
   dnnl::stream stream_;
 };
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
 #endif  // MINDSPORE_MKL_KERNEL_ENGINE_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc
index bdaa85559e..4f77508004 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.cc
@@ -13,14 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/mul_cpu_kernel.h"
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/mul_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void MulCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::vector<size_t> src0_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@@ -58,6 +57,5 @@ bool MulCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   ExecutePrimitive();
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h
similarity index 78%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h
index e666197632..746c2925ec 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/mul_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/mul_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_MUL_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_MUL_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class MulCPUKernel : public MKLCPUKernel {
  public:
   MulCPUKernel() = default;
@@ -35,8 +34,7 @@ class MulCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(Mul, MulCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_MUL_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_MUL_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc
similarity index 94%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc
index 9417105e2f..5225050dc1 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.cc
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/pooling_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/pooling_cpu_kernel.h"
 #include <string>
 #include <algorithm>
 #include "common/utils.h"
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@@ -66,6 +65,5 @@ bool PoolingCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   ExecutePrimitive();
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h
similarity index 78%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h
index 50f4ff5da7..a082015137 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_POOLING_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_POOLING_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class PoolingCPUKernel : public MKLCPUKernel {
  public:
   PoolingCPUKernel() = default;
@@ -35,8 +34,7 @@ class PoolingCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(MaxPool, PoolingCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_POOLING_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_POOLING_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc
similarity index 96%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc
index 724b78f19f..c0459de790 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.cc
@@ -13,17 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h"
 #include <string>
 #include <utility>
 #include <algorithm>
 #include "common/utils.h"
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   src_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@@ -121,6 +120,5 @@ bool PoolingGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   }
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h
similarity index 82%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h
index e557f7f95f..16ca6901d4 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/pooling_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/pooling_grad_cpu_kernel.h
@@ -13,17 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_POOLING_GRAD_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_POOLING_GRAD_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
 #include <utility>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class PoolingGradCPUKernel : public MKLCPUKernel {
  public:
   PoolingGradCPUKernel() = default;
@@ -45,8 +44,7 @@ class PoolingGradCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(MaxPoolGrad, PoolingGradCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_POOLING_GRAD_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_POOLING_GRAD_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc
index c8bf63eaf0..d5ef20a25e 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.cc
@@ -13,14 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/relu_cpu_kernel.h"
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/relu_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void ReluCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@@ -49,6 +48,5 @@ bool ReluCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   ExecutePrimitive();
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h
similarity index 78%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h
index 8811539f40..b9ccb12f25 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_RELU_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_RELU_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class ReluCPUKernel : public MKLCPUKernel {
  public:
   ReluCPUKernel() = default;
@@ -35,8 +34,7 @@ class ReluCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(ReLU, ReluCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_RELU_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_RELU_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc
similarity index 93%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc
index b831562d10..4a6213ddf2 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.cc
@@ -13,14 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.h"
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/relu_grad_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void ReluGradCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@@ -66,6 +65,5 @@ bool ReluGradCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   }
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h
similarity index 77%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h
index 81b84916ba..1ff9184b2e 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/relu_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/relu_grad_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_RELU_GRAD_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_RELU_GRAD_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class ReluGradCPUKernel : public MKLCPUKernel {
  public:
   ReluGradCPUKernel() = default;
@@ -35,8 +34,7 @@ class ReluGradCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(ReluGrad, ReluGradCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_RELU_GRAD_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_RELU_GRAD_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc
similarity index 91%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc
index 495f833c16..7fa740cfc0 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.cc
@@ -13,14 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/softmax_cpu_kernel.h"
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/softmax_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void SoftmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::vector<size_t> src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
@@ -51,6 +50,5 @@ bool SoftmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   ExecutePrimitive();
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h
similarity index 78%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h
index 8f4ccae1b2..de51247493 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/softmax_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/softmax_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_SOFTMAX_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_SOFTMAX_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SOFTMAX_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_SOFTMAX_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class SoftmaxCPUKernel : public MKLCPUKernel {
  public:
   SoftmaxCPUKernel() = default;
@@ -35,8 +34,7 @@ class SoftmaxCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(Softmax, SoftmaxCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_SOFTMAX_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_SOFTMAX_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
index ca06b4a617..c33fcd246f 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.cc
@@ -13,17 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h"
 #include <numeric>
 #include <functional>
 #include <cmath>
-#include "device/cpu/kernel/mkldnn/mkl_kernel_engine.h"
+#include "kernel/cpu/mkldnn/mkl_kernel_engine.h"
 #include "device/cpu/cpu_device_address.h"
 #include "common/utils.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void SparseSoftmaxCrossEntropyWithLogitsCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
   CPUKernel::InitInputOutputSize(kernel_node);
   MS_EXCEPTION_IF_NULL(kernel_node);
@@ -126,6 +125,5 @@ bool SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Launch(const std::vector<kern
   }
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
similarity index 83%
rename from mindspore/ccsrc/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
index 1d7169ea3d..5c00b8cb6d 100644
--- a/mindspore/ccsrc/device/cpu/kernel/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/mkldnn/sparse_softmax_cross_entropy_with_logits_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_
 
 #include <vector>
 #include <memory>
-#include "device/cpu/kernel/mkldnn/mkl_cpu_kernel.h"
+#include "kernel/cpu/mkldnn/mkl_cpu_kernel.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public MKLCPUKernel {
  public:
   SparseSoftmaxCrossEntropyWithLogitsCPUKernel() = default;
@@ -45,8 +44,7 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public MKLCPUKernel {
 };
 
 MS_REG_CPU_KERNEL(SparseSoftmaxCrossEntropyWithLogits, SparseSoftmaxCrossEntropyWithLogitsCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc
similarity index 95%
rename from mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc
index e4b3f03f58..00dfe73f28 100644
--- a/mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.cc
@@ -13,12 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/one_hot_cpu_kernel.h"
+#include "kernel/cpu/one_hot_cpu_kernel.h"
 #include "device/cpu/cpu_device_address.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void OneHotCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
   auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
@@ -69,6 +68,5 @@ bool OneHotCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
 
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h
similarity index 77%
rename from mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h
index f41ac63265..bb69236123 100644
--- a/mindspore/ccsrc/device/cpu/kernel/one_hot_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/one_hot_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_ONE_HOT_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_ONE_HOT_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_ONE_HOT_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_ONE_HOT_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "device/cpu/cpu_kernel.h"
-#include "device/cpu/cpu_kernel_factory.h"
+#include "kernel/cpu/cpu_kernel.h"
+#include "kernel/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class OneHotCPUKernel : public CPUKernel {
  public:
   OneHotCPUKernel() = default;
@@ -40,8 +39,7 @@ class OneHotCPUKernel : public CPUKernel {
 };
 
 MS_REG_CPU_KERNEL(OneHot, OneHotCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_ONE_HOT_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_ONE_HOT_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc
similarity index 92%
rename from mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.cc
rename to mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc
index a742e3a550..7342a19e99 100644
--- a/mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.cc
+++ b/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.cc
@@ -13,12 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "device/cpu/kernel/reshape_cpu_kernel.h"
+#include "kernel/cpu/reshape_cpu_kernel.h"
 #include "device/cpu/cpu_device_address.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 void ReshapeCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); }
 
 bool ReshapeCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
@@ -43,6 +42,5 @@ bool ReshapeCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
   }
   return true;
 }
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.h b/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h
similarity index 78%
rename from mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.h
rename to mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h
index d371e3a7ac..837873d48c 100644
--- a/mindspore/ccsrc/device/cpu/kernel/reshape_cpu_kernel.h
+++ b/mindspore/ccsrc/kernel/cpu/reshape_cpu_kernel.h
@@ -13,16 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_CCSRC_DEVICE_CPU_RESHAPE_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_DEVICE_CPU_RESHAPE_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_KERNEL_CPU_RESHAPE_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_KERNEL_CPU_RESHAPE_CPU_KERNEL_H_
 #include <vector>
 #include <memory>
-#include "device/cpu/cpu_kernel.h"
-#include "device/cpu/cpu_kernel_factory.h"
+#include "kernel/cpu/cpu_kernel.h"
+#include "kernel/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
-namespace device {
-namespace cpu {
+namespace kernel {
 class ReshapeCPUKernel : public CPUKernel {
  public:
   ReshapeCPUKernel() = default;
@@ -37,8 +36,7 @@ class ReshapeCPUKernel : public CPUKernel {
 MS_REG_CPU_KERNEL(Reshape, ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(Flatten, ReshapeCPUKernel);
 MS_REG_CPU_KERNEL(ExpandDims, ReshapeCPUKernel);
-}  // namespace cpu
-}  // namespace device
+}  // namespace kernel
 }  // namespace mindspore
 
-#endif  // MINDSPORE_CCSRC_DEVICE_CPU_RESHAPE_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_KERNEL_CPU_RESHAPE_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/session/CMakeLists.txt b/mindspore/ccsrc/session/CMakeLists.txt
index 66495626eb..56a7327e80 100644
--- a/mindspore/ccsrc/session/CMakeLists.txt
+++ b/mindspore/ccsrc/session/CMakeLists.txt
@@ -12,6 +12,13 @@ if (ENABLE_GPU)
     list(APPEND _SESSION_SRC_LIST ${_GPU_SRC_LIST})
 endif ()
 
+if (ENABLE_CPU)
+    file(GLOB_RECURSE _CPU_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
+        "cpu_session.cc"
+        )
+    list(APPEND _SESSION_SRC_LIST ${_CPU_SRC_LIST})
+endif ()
+
 if (ENABLE_D)
     file(GLOB_RECURSE _D_SRC_LIST  RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         "ascend_session.cc"
diff --git a/mindspore/ccsrc/device/cpu/cpu_session.cc b/mindspore/ccsrc/session/cpu_session.cc
similarity index 96%
rename from mindspore/ccsrc/device/cpu/cpu_session.cc
rename to mindspore/ccsrc/session/cpu_session.cc
index 1613f9f91e..e8830d730c 100644
--- a/mindspore/ccsrc/device/cpu/cpu_session.cc
+++ b/mindspore/ccsrc/session/cpu_session.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "device/cpu/cpu_session.h"
+#include "session/cpu_session.h"
 #include <algorithm>
 #include "ir/meta_tensor.h"
 #include "ir/anf.h"
@@ -23,7 +23,7 @@
 #include "session/anf_runtime_algorithm.h"
 #include "device/kernel_runtime.h"
 #include "predict/predict.h"
-#include "device/cpu/cpu_kernel_factory.h"
+#include "kernel/cpu/cpu_kernel_factory.h"
 
 namespace mindspore {
 namespace session {
@@ -110,7 +110,7 @@ void CPUSession::BuildKernel(const KernelGraph *kernel_graph) {
     MS_EXCEPTION_IF_NULL(kernel_node);
     std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
     MS_LOG(INFO) << "Cpu building operator[" << kernel_name << "].";
-    std::shared_ptr<device::cpu::CPUKernel> cpu_kernel = device::cpu::CPUKernelFactory::Get().Create(kernel_name);
+    std::shared_ptr<kernel::CPUKernel> cpu_kernel = kernel::CPUKernelFactory::Get().Create(kernel_name);
     if (cpu_kernel == nullptr) {
       MS_LOG(EXCEPTION) << "Operator[" << kernel_name << "] is not support.";
     }
diff --git a/mindspore/ccsrc/device/cpu/cpu_session.h b/mindspore/ccsrc/session/cpu_session.h
similarity index 100%
rename from mindspore/ccsrc/device/cpu/cpu_session.h
rename to mindspore/ccsrc/session/cpu_session.h
diff --git a/mindspore/ccsrc/session/session_factory.h b/mindspore/ccsrc/session/session_factory.h
index 476d9ff4a1..99db0afeb7 100644
--- a/mindspore/ccsrc/session/session_factory.h
+++ b/mindspore/ccsrc/session/session_factory.h
@@ -22,7 +22,6 @@
 #include <string>
 #include <utility>
 #include "common/utils.h"
-#include "device/cpu/cpu_kernel.h"
 #include "session/session_basic.h"
 namespace mindspore {
 namespace session {
diff --git a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
index 2616354e4c..56bf0ae4e0 100644
--- a/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
+++ b/tests/ut/cpp/pre_activate/ascend/enhancer/insert_memcpy_async_for_getnext.cc
@@ -16,6 +16,7 @@
 #include "common/backend_common_test.h"
 #include "common/py_func_graph_fetcher.h"
 #include "session/ascend_session.h"
+#include "session/anf_runtime_algorithm.h"
 #include "pipeline/resource.h"
 #include "operator/ops.h"
 #include "ir/manager.h"

From bfba630aa227b3f54840802bed12e26834d39845 Mon Sep 17 00:00:00 2001
From: liyong <liyong126@huawei.com>
Date: Thu, 23 Apr 2020 12:02:47 +0800
Subject: [PATCH 133/242] update pK_sampler

---
 mindspore/ccsrc/dataset/api/python_bindings.cc    |  6 +++---
 mindspore/ccsrc/mindrecord/io/shard_reader.cc     |  9 +++++++++
 mindspore/ccsrc/mindrecord/meta/shard_category.cc |  2 +-
 mindspore/dataset/engine/samplers.py              |  9 +++++++--
 .../python/dataset/test_minddataset_exception.py  | 15 +++++++++++++++
 5 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/dataset/api/python_bindings.cc
index ea2e8352da..dedee8e9b3 100644
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@@ -435,12 +435,12 @@ void bindSamplerOps(py::module *m) {
     .def(py::init<std::vector<int64_t>, uint32_t>(), py::arg("indices"), py::arg("seed") = GetSeed());
   (void)py::class_<mindrecord::ShardPkSample, mindrecord::ShardOperator, std::shared_ptr<mindrecord::ShardPkSample>>(
     *m, "MindrecordPkSampler")
-    .def(py::init([](int64_t kVal, bool shuffle) {
+    .def(py::init([](int64_t kVal, std::string kColumn, bool shuffle) {
       if (shuffle == true) {
-        return std::make_shared<mindrecord::ShardPkSample>("label", kVal, std::numeric_limits<int64_t>::max(),
+        return std::make_shared<mindrecord::ShardPkSample>(kColumn, kVal, std::numeric_limits<int64_t>::max(),
                                                            GetSeed());
       } else {
-        return std::make_shared<mindrecord::ShardPkSample>("label", kVal);
+        return std::make_shared<mindrecord::ShardPkSample>(kColumn, kVal);
       }
     }));
 
diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
index dd34615f7e..4cbb2b3767 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
@@ -316,6 +316,10 @@ MSRStatus ShardReader::ReadAllRowsInShard(int shard_id, const std::string &sql,
 }
 
 MSRStatus ShardReader::GetAllClasses(const std::string &category_field, std::set<std::string> &categories) {
+  if (column_schema_id_.find(category_field) == column_schema_id_.end()) {
+    MS_LOG(ERROR) << "Field " << category_field << " does not exist.";
+    return FAILED;
+  }
   auto ret = ShardIndexGenerator::GenerateFieldName(std::make_pair(column_schema_id_[category_field], category_field));
   if (SUCCESS != ret.first) {
     return FAILED;
@@ -719,6 +723,11 @@ int64_t ShardReader::GetNumClasses(const std::string &file_path, const std::stri
   for (auto &field : index_fields) {
     map_schema_id_fields[field.second] = field.first;
   }
+
+  if (map_schema_id_fields.find(category_field) == map_schema_id_fields.end()) {
+    MS_LOG(ERROR) << "Field " << category_field << " does not exist.";
+    return -1;
+  }
   auto ret =
     ShardIndexGenerator::GenerateFieldName(std::make_pair(map_schema_id_fields[category_field], category_field));
   if (SUCCESS != ret.first) {
diff --git a/mindspore/ccsrc/mindrecord/meta/shard_category.cc b/mindspore/ccsrc/mindrecord/meta/shard_category.cc
index 80816e7a79..2a9c2c0966 100644
--- a/mindspore/ccsrc/mindrecord/meta/shard_category.cc
+++ b/mindspore/ccsrc/mindrecord/meta/shard_category.cc
@@ -38,7 +38,7 @@ MSRStatus ShardCategory::execute(ShardTask &tasks) { return SUCCESS; }
 
 int64_t ShardCategory::GetNumSamples(int64_t dataset_size, int64_t num_classes) {
   if (dataset_size == 0) return dataset_size;
-  if (dataset_size > 0 && num_categories_ > 0 && num_elements_ > 0) {
+  if (dataset_size > 0 && num_classes > 0 && num_categories_ > 0 && num_elements_ > 0) {
     return std::min(num_categories_, num_classes) * num_elements_;
   }
   return -1;
diff --git a/mindspore/dataset/engine/samplers.py b/mindspore/dataset/engine/samplers.py
index 82759989cb..ce732d28a7 100644
--- a/mindspore/dataset/engine/samplers.py
+++ b/mindspore/dataset/engine/samplers.py
@@ -152,6 +152,7 @@ class PKSampler(BuiltinSampler):
         num_val (int): Number of elements to sample for each class.
         num_class (int, optional): Number of classes to sample (default=None, all classes).
         shuffle (bool, optional): If true, the class IDs are shuffled (default=False).
+        class_column (str, optional): Name of column to classify dataset(default='label'), for MindDataset.
 
     Examples:
         >>> import mindspore.dataset as ds
@@ -168,7 +169,7 @@ class PKSampler(BuiltinSampler):
         ValueError: If shuffle is not boolean.
     """
 
-    def __init__(self, num_val, num_class=None, shuffle=False):
+    def __init__(self, num_val, num_class=None, shuffle=False, class_column='label'):
         if num_val <= 0:
             raise ValueError("num_val should be a positive integer value, but got num_val={}".format(num_val))
 
@@ -180,12 +181,16 @@ class PKSampler(BuiltinSampler):
 
         self.num_val = num_val
         self.shuffle = shuffle
+        self.class_column = class_column # work for minddataset
 
     def create(self):
         return cde.PKSampler(self.num_val, self.shuffle)
 
     def _create_for_minddataset(self):
-        return cde.MindrecordPkSampler(self.num_val, self.shuffle)
+        if not self.class_column or not isinstance(self.class_column, str):
+            raise ValueError("class_column should be a not empty string value, \
+                    but got class_column={}".format(class_column))
+        return cde.MindrecordPkSampler(self.num_val, self.class_column, self.shuffle)
 
 class RandomSampler(BuiltinSampler):
     """
diff --git a/tests/ut/python/dataset/test_minddataset_exception.py b/tests/ut/python/dataset/test_minddataset_exception.py
index 70add46b68..e1d54fa7c8 100644
--- a/tests/ut/python/dataset/test_minddataset_exception.py
+++ b/tests/ut/python/dataset/test_minddataset_exception.py
@@ -82,3 +82,18 @@ def test_minddataset_lack_db():
             num_iter += 1
         assert num_iter == 0
     os.remove(CV_FILE_NAME)
+
+
+def test_cv_minddataset_pk_sample_error_class_column():
+    create_cv_mindrecord(1)
+    columns_list = ["data", "file_name", "label"]
+    num_readers = 4
+    sampler = ds.PKSampler(5, None, True, 'no_exsit_column')
+    with pytest.raises(Exception, match="MindRecordOp launch failed"):
+        data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler)
+        num_iter = 0
+        for item in data_set.create_dict_iterator():
+            num_iter += 1
+    os.remove(CV_FILE_NAME)
+    os.remove("{}.db".format(CV_FILE_NAME))
+

From 78e9a10f37f3740201a5577de5cfac18f3e84245 Mon Sep 17 00:00:00 2001
From: dinghao <dinghao7@huawei.com>
Date: Mon, 27 Apr 2020 19:15:42 +0800
Subject: [PATCH 134/242] remove data sync

---
 mindspore/ccsrc/ir/meta_tensor.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mindspore/ccsrc/ir/meta_tensor.cc b/mindspore/ccsrc/ir/meta_tensor.cc
index af6b4f7ffc..de59cb66d9 100644
--- a/mindspore/ccsrc/ir/meta_tensor.cc
+++ b/mindspore/ccsrc/ir/meta_tensor.cc
@@ -166,9 +166,6 @@ Tensor::Tensor(const py::int_ &input, const TypePtr &data_type) { init(py::array
 Tensor::Tensor(const Tensor &tensor, const TypePtr &data_type)
     : MetaTensor(tensor), dirty_(tensor.dirty_), device_address_(tensor.device_address_) {
   init(tensor.data_, data_type);
-  if (device_address_ != nullptr) {
-    (void)data_sync();
-  }
 }
 
 Tensor &Tensor::operator=(const Tensor &tensor) {

From 2e2e7a28ae0160a350243c657b7adf90bcc8834b Mon Sep 17 00:00:00 2001
From: "Etone.Chan" <etone.chan@huawei.com>
Date: Mon, 27 Apr 2020 22:13:02 +0800
Subject: [PATCH 135/242] refactor buffer fusion

---
 .../ascend/buffer_fusion/buffer_fusion.cc     | 143 +++++++-----------
 .../ascend/buffer_fusion/buffer_fusion.h      |   1 -
 .../ccsrc/session/anf_runtime_algorithm.cc    |   2 +
 mindspore/ccsrc/utils/utils.h                 |   2 +
 4 files changed, 58 insertions(+), 90 deletions(-)

diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
index abacb9137d..8581f1165d 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
@@ -261,23 +261,24 @@ CNodePtr CreateFusionOp(const std::vector<AnfNodePtr> &inputs_list, const std::v
   return buffer_fusion_kernel;
 }
 
-kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list_in,
-                                                    const std::vector<AnfNodePtr> &inputs_list,
+kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr> &inputs_list,
                                                     const std::vector<AnfNodePtr> &outputs_list) {
   MS_LOG(DEBUG) << "Start Create Kernel Info";
   kernel::KernelBuildInfo::KernelBuildInfoBuilder builder;
   // inputs format and data type
   std::vector<std::string> inputs_format;
   std::vector<TypeId> inputs_data_type;
-  for (auto node : inputs_list_in) {
-    auto cnode = node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
-    auto &inputs = cnode->inputs();
-    for (size_t input_index = 1; input_index < inputs.size(); ++input_index) {
-      if (std::find(inputs_list.begin(), inputs_list.end(), inputs[input_index]) != inputs_list.end()) {
-        inputs_format.push_back(AnfAlgo::GetInputFormat(node, input_index - 1));
-        inputs_data_type.push_back(AnfAlgo::GetInputDeviceDataType(node, input_index - 1));
-      }
+  for (const auto &input : inputs_list) {
+    if (input->isa<CNode>() && AnfAlgo::GetCNodeName(input) == prim::kPrimTupleGetItem->name()) {
+      auto tuple_getitem = input->cast<CNodePtr>();
+      MS_EXCEPTION_IF_NULL(tuple_getitem);
+      inputs_format.push_back(AnfAlgo::GetOutputFormat(
+        tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2))))));
+      inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(
+        tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2))))));
+    } else {
+      inputs_format.push_back(AnfAlgo::GetOutputFormat(input, 0));
+      inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(input, 0));
     }
   }
   // outputs format and data type
@@ -360,62 +361,6 @@ void ReplaceOldNode(std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusi
   }
 }
 
-void GetInputList(const CNodePtr &node, const int32_t cur_fusion_id, std::vector<AnfNodePtr> *inputs_list) {
-  MS_EXCEPTION_IF_NULL(node);
-  MS_EXCEPTION_IF_NULL(inputs_list);
-  auto &inputs = node->inputs();
-  for (size_t input_index = 1; input_index < inputs.size(); ++input_index) {
-    auto input = inputs[input_index];
-    if (AnfAlgo::IsRealCNodeKernel(input)) {
-      if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input)) {
-        auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input, kOpAttrFusionId);
-        if (fusion_id != cur_fusion_id) {
-          inputs_list->push_back(input);
-        }
-      } else {
-        inputs_list->push_back(input);
-      }
-    } else if (input->isa<CNode>()) {
-      for (auto &input_in : input->cast<CNodePtr>()->inputs()) {
-        if (AnfAlgo::IsRealCNodeKernel(input_in)) {
-          if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, input_in)) {
-            auto fusion_id = AnfAlgo::GetNodeAttr<int32_t>(input_in, kOpAttrFusionId);
-            if (fusion_id != cur_fusion_id) {
-              inputs_list->push_back(input);
-            }
-          } else {
-            inputs_list->push_back(input);
-          }
-        }
-      }
-    } else {
-      inputs_list->push_back(input);
-    }
-  }
-}
-
-void CheckCurrentNodeIsInput(const CNodePtr &node, const int32_t &cur_fusion_id,
-                             std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
-  MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
-  if ((*buffer_fusion_infos).find(cur_fusion_id) == (*buffer_fusion_infos).end()) {
-    BufferFusionInfo_t buffer_fusion_info;
-    (*buffer_fusion_infos)[cur_fusion_id] = buffer_fusion_info;
-  }
-  std::vector<AnfNodePtr> inputs_list;
-  GetInputList(node, cur_fusion_id, &inputs_list);
-  if (!inputs_list.empty()) {
-    if (!(*buffer_fusion_infos)[cur_fusion_id].inputs_list.empty()) {
-      (void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list.insert(
-        (*buffer_fusion_infos)[cur_fusion_id].inputs_list.end(), inputs_list.begin(), inputs_list.end());
-      (void)(*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.insert(
-        (*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.end(), node);
-    } else {
-      (*buffer_fusion_infos)[cur_fusion_id].inputs_list = inputs_list;
-      (*buffer_fusion_infos)[cur_fusion_id].inputs_list_in.push_back(node);
-    }
-  }
-}
-
 void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph,
                                    std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
   MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
@@ -429,6 +374,45 @@ void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph,
   }
 }
 
+void GetFusionScopeInputNodeList(session::KernelGraph *kernel_graph,
+                                 std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
+  auto manager = kernel_graph->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+
+  for (auto &buffer_fusion_info : *buffer_fusion_infos) {
+    auto fusion_id = buffer_fusion_info.first;
+    auto fusion_info = buffer_fusion_info.second;
+    for (const auto &node : fusion_info.anf_nodes) {
+      auto cnode = node->cast<CNodePtr>();
+      for (size_t idx = 1; idx < cnode->inputs().size(); ++idx) {
+        auto real_input = AnfAlgo::VisitKernel(cnode->input(idx), 0);
+        if (std::find(fusion_info.anf_nodes.begin(), fusion_info.anf_nodes.end(), real_input.first) ==
+            fusion_info.anf_nodes.end()) {
+          if (std::find((*buffer_fusion_infos)[fusion_id].inputs_list.begin(),
+                        (*buffer_fusion_infos)[fusion_id].inputs_list.end(),
+                        cnode->input(idx)) == (*buffer_fusion_infos)[fusion_id].inputs_list.end()) {
+            (*buffer_fusion_infos)[fusion_id].inputs_list.push_back(cnode->input(idx));
+          }
+        }
+      }
+    }
+  }
+}
+
+bool TupleGetitemNodeCompare(const AnfNodePtr &node1, const AnfNodePtr &node2) {
+  MS_EXCEPTION_IF_NULL(node1);
+  MS_EXCEPTION_IF_NULL(node2);
+  auto getitem1 = node1->cast<CNodePtr>();
+  auto getitem2 = node2->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(getitem1);
+  MS_EXCEPTION_IF_NULL(getitem2);
+  auto output_idx1 = GetValue<int>(GetValueNode(getitem1->input(2)));
+  auto output_idx2 = GetValue<int>(GetValueNode(getitem2->input(2)));
+  return output_idx1 < output_idx2;
+}
+
 void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
                                   std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
   MS_EXCEPTION_IF_NULL(kernel_graph);
@@ -454,14 +438,7 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
         std::transform(manager->node_users()[node].begin(), manager->node_users()[node].end(),
                        std::back_inserter(tuple_getitem_nodes),
                        [](const std::pair<AnfNodePtr, int> &use_node) { return use_node.first; });
-        std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(),
-                  [](const AnfNodePtr &node1, const AnfNodePtr &node2) {
-                    auto getitem1 = node1->cast<CNodePtr>();
-                    auto getitem2 = node2->cast<CNodePtr>();
-                    auto output_idx1 = GetValue<int>(GetValueNode(getitem1->input(2)));
-                    auto output_idx2 = GetValue<int>(GetValueNode(getitem2->input(2)));
-                    return output_idx1 < output_idx2;
-                  });
+        std::sort(tuple_getitem_nodes.begin(), tuple_getitem_nodes.end(), TupleGetitemNodeCompare);
         for (auto getitem : tuple_getitem_nodes) {
           auto getitem_ptr = getitem->cast<CNodePtr>();
           auto input2 = getitem_ptr->input(2);
@@ -634,24 +611,12 @@ void MatchFusionTypePattern(const session::KernelGraph &kernel_graph, std::unord
 void BufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph,
                                        std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const {
   MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
-  std::vector<AnfNodePtr> node_list = TopoSort(kernel_graph->get_return());
-  for (auto &node : node_list) {
-    if (!AnfAlgo::IsRealCNodeKernel(node)) {
-      continue;
-    }
-    auto cnode = node->cast<CNodePtr>();
-    MS_EXCEPTION_IF_NULL(cnode);
-    if (AnfAlgo::HasNodeAttr(kOpAttrFusionId, cnode)) {
-      auto cur_fusion_id = AnfAlgo::GetNodeAttr<int32_t>(cnode, kOpAttrFusionId);
-      CheckCurrentNodeIsInput(cnode, cur_fusion_id, buffer_fusion_infos);
-    }
-  }
   GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos);
+  GetFusionScopeInputNodeList(kernel_graph, buffer_fusion_infos);
   GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos);
   for (auto &buffer_fusion_info : *buffer_fusion_infos) {
     buffer_fusion_info.second.kernel_build_info =
-      CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list_in, buffer_fusion_info.second.inputs_list,
-                               buffer_fusion_info.second.outputs_list);
+      CreateFusionOpKernelInfo(buffer_fusion_info.second.inputs_list, buffer_fusion_info.second.outputs_list);
   }
 }
 
diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
index 9bed7217dd..f2fa63601b 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
+++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.h
@@ -30,7 +30,6 @@ namespace opt {
 struct BufferFusionInfo_t {
   std::vector<AnfNodePtr> anf_nodes;
   std::vector<AnfNodePtr> inputs_list;
-  std::vector<AnfNodePtr> inputs_list_in;
   std::vector<AnfNodePtr> outputs_list;
   kernel::KernelBuildInfoPtr kernel_build_info;
 };
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
index e1a18d95da..dbf7097970 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
@@ -816,6 +816,8 @@ size_t AnfRuntimeAlgorithm::GetRealInputIndex(const mindspore::AnfNodePtr &anf_n
   MS_EXCEPTION_IF_NULL(anf_node);
   static std::map<std::string, std::map<size_t, size_t>> spec_node_list = {
     {prim::kPrimConv2DBackpropInput->name(), {{0, 1}, {1, 0}}},
+    {kFusionOpConv2DBackpropInputReluGradV2Name, {{0, 1}, {1, 0}, {2, 2}}},
+    {kFusionOpConv2DBackpropInputAddNReluGradV2Name, {{0, 1}, {1, 0}, {2, 2}, {3, 3}}},
     {prim::kPrimConv2DBackpropFilter->name(), {{0, 1}, {1, 0}}},
     {prim::kPrimLogSoftmaxGrad->name(), {{0, 1}, {1, 0}}},
     {prim::kPrimLayerNormGrad->name(), {{0, 1}, {1, 0}, {2, 2}, {3, 3}, {4, 4}}},
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index 4dfc4baa31..904acff975 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -122,6 +122,8 @@ constexpr auto kSendOpName = "Send";
 constexpr auto kRecvOpName = "Recv";
 constexpr auto kReluV2OpName = "ReLUV2";
 constexpr auto kReluGradV2OpName = "ReluGradV2";
+constexpr auto kFusionOpConv2DBackpropInputReluGradV2Name = "FusionOp_Conv2DBackpropInput_ReluGradV2";
+constexpr auto kFusionOpConv2DBackpropInputAddNReluGradV2Name = "FusionOp_Conv2DBackpropInput_AddN_ReluGradV2";
 
 // attr key name
 constexpr auto kAttrInputNames = "input_names";

From 78b4479ac62b8ef2621358e1cbd109e4193a0a55 Mon Sep 17 00:00:00 2001
From: fary86 <fary.fanrui@huawei.com>
Date: Mon, 27 Apr 2020 22:41:27 +0800
Subject: [PATCH 136/242] Fix bug of ApplyRMSProp's check

---
 mindspore/ops/operations/nn_ops.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 84f26c67fc..66656b559e 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -1586,9 +1586,11 @@ class ApplyRMSProp(PrimitiveWithInfer):
         args = {"var": var_dtype, "mean_square": mean_square_dtype, "moment": moment_dtype, "grad": grad_dtype}
         validator.check_tensor_type_same(args, mstype.number_type, self.name)
 
-        args = {"learning_rate": learning_rate_dtype, "decay": decay_dtype,
-                'momentum': momentum_dtype, "epsilon": epsilon_dtype}
-        validator.check_scalar_or_tensor_type_same(args, [mstype.float16, mstype.float32], self.name)
+        valid_types = [mstype.float16, mstype.float32]
+        args_decay = {"decay": decay_dtype, 'momentum': momentum_dtype, "epsilon": epsilon_dtype}
+        validator.check_type_same(args_decay, valid_types, self.name)
+        args_lr = {"learning_rate": learning_rate_dtype, "decay": decay_dtype}
+        validator.check_scalar_or_tensor_type_same(args_lr, valid_types, self.name, allow_mix=True)
         return var_dtype
 
 

From 1e904ddceee25ab9fa9677ca44f45bdd909731c8 Mon Sep 17 00:00:00 2001
From: Junhan Hu <junhan.hu@huawei.com>
Date: Mon, 27 Apr 2020 13:10:23 -0400
Subject: [PATCH 137/242] Fix CI warning of samplers.py

---
 mindspore/dataset/engine/samplers.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mindspore/dataset/engine/samplers.py b/mindspore/dataset/engine/samplers.py
index ce732d28a7..972f0af191 100644
--- a/mindspore/dataset/engine/samplers.py
+++ b/mindspore/dataset/engine/samplers.py
@@ -19,8 +19,8 @@ SequentialSampler, SubsetRandomSampler, WeightedRandomSampler.
 User can also define custom sampler by extending from Sampler class.
 """
 
-import mindspore._c_dataengine as cde
 import numpy as np
+import mindspore._c_dataengine as cde
 
 
 class Sampler:
@@ -137,6 +137,7 @@ class DistributedSampler(BuiltinSampler):
         self.shard_id = shard_id
         self.shuffle = shuffle
         self.seed = 0
+        super().__init__()
 
     def create(self):
         # each time user calls create_dict_iterator() (to do repeat) sampler would get a different seed to shuffle
@@ -182,6 +183,7 @@ class PKSampler(BuiltinSampler):
         self.num_val = num_val
         self.shuffle = shuffle
         self.class_column = class_column # work for minddataset
+        super().__init__()
 
     def create(self):
         return cde.PKSampler(self.num_val, self.shuffle)
@@ -192,6 +194,7 @@ class PKSampler(BuiltinSampler):
                     but got class_column={}".format(class_column))
         return cde.MindrecordPkSampler(self.num_val, self.class_column, self.shuffle)
 
+
 class RandomSampler(BuiltinSampler):
     """
     Samples the elements randomly.
@@ -225,6 +228,7 @@ class RandomSampler(BuiltinSampler):
 
         self.replacement = replacement
         self.num_samples = num_samples
+        super().__init__()
 
     def create(self):
         # If num_samples is not specified, then call constructor #2
@@ -275,6 +279,7 @@ class SubsetRandomSampler(BuiltinSampler):
             indices = [indices]
 
         self.indices = indices
+        super().__init__()
 
     def create(self):
         return cde.SubsetRandomSampler(self.indices)
@@ -322,6 +327,7 @@ class WeightedRandomSampler(BuiltinSampler):
         self.weights = weights
         self.num_samples = num_samples
         self.replacement = replacement
+        super().__init__()
 
     def create(self):
         return cde.WeightedRandomSampler(self.weights, self.num_samples, self.replacement)

From 819b102ef89c5ef9e03c15bc92a1f375957ec475 Mon Sep 17 00:00:00 2001
From: jonyguo <guozhijian@huawei.com>
Date: Mon, 27 Apr 2020 15:53:52 +0800
Subject: [PATCH 138/242] add performance test for mindrecord

---
 .../convert_to_mindrecord/imagenet/mr_api.py  |   3 +
 .../imagenet/imagenet_to_mindrecord.py        |  32 +++++
 .../imagenet/imagenet_to_tfrecord.py          | 113 ++++++++++++++++++
 .../mindrecord/imagenet/perf_read_imagenet.py | 106 ++++++++++++++++
 .../perf_test/mindrecord/imagenet/schema.json |  18 +++
 5 files changed, 272 insertions(+)
 create mode 100644 tests/perf_test/mindrecord/imagenet/imagenet_to_mindrecord.py
 create mode 100644 tests/perf_test/mindrecord/imagenet/imagenet_to_tfrecord.py
 create mode 100644 tests/perf_test/mindrecord/imagenet/perf_read_imagenet.py
 create mode 100644 tests/perf_test/mindrecord/imagenet/schema.json

diff --git a/example/convert_to_mindrecord/imagenet/mr_api.py b/example/convert_to_mindrecord/imagenet/mr_api.py
index e569b489b5..c8129ec9ff 100644
--- a/example/convert_to_mindrecord/imagenet/mr_api.py
+++ b/example/convert_to_mindrecord/imagenet/mr_api.py
@@ -118,5 +118,8 @@ def mindrecord_dict_data(task_id):
         image_file = open(file_name, "rb")
         image_bytes = image_file.read()
         image_file.close()
+        if not image_bytes:
+            print("The image file: {} is invalid.".format(file_name))
+            continue
         data["data"] = image_bytes
         yield data
diff --git a/tests/perf_test/mindrecord/imagenet/imagenet_to_mindrecord.py b/tests/perf_test/mindrecord/imagenet/imagenet_to_mindrecord.py
new file mode 100644
index 0000000000..cc3b6d78b0
--- /dev/null
+++ b/tests/perf_test/mindrecord/imagenet/imagenet_to_mindrecord.py
@@ -0,0 +1,32 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""use ImageNetToMR tool generate mindrecord"""
+import os
+from mindspore.mindrecord import ImageNetToMR
+
+IMAGENET_MAP_FILE = "../../../ut/data/mindrecord/testImageNetDataWhole/labels_map.txt"
+IMAGENET_IMAGE_DIR = "../../../ut/data/mindrecord/testImageNetDataWhole/images"
+MINDRECORD_FILE = "./imagenet.mindrecord"
+PARTITION_NUMBER = 16
+
+def imagenet_to_mindrecord():
+    imagenet_transformer = ImageNetToMR(IMAGENET_MAP_FILE,
+                                        IMAGENET_IMAGE_DIR,
+                                        MINDRECORD_FILE,
+                                        PARTITION_NUMBER)
+    imagenet_transformer.transform()
+
+if __name__ == '__main__':
+    imagenet_to_mindrecord()
diff --git a/tests/perf_test/mindrecord/imagenet/imagenet_to_tfrecord.py b/tests/perf_test/mindrecord/imagenet/imagenet_to_tfrecord.py
new file mode 100644
index 0000000000..86d18a7d94
--- /dev/null
+++ b/tests/perf_test/mindrecord/imagenet/imagenet_to_tfrecord.py
@@ -0,0 +1,113 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""generate tfrecord"""
+import collections
+import os
+import tensorflow as tf
+
+IMAGENET_MAP_FILE = "../../../ut/data/mindrecord/testImageNetDataWhole/labels_map.txt"
+IMAGENET_IMAGE_DIR = "../../../ut/data/mindrecord/testImageNetDataWhole/images"
+TFRECORD_FILE = "./imagenet.tfrecord"
+PARTITION_NUMBER = 16
+
+def get_imagenet_filename_label_pic(map_file, image_dir):
+    """
+    Get data from imagenet.
+
+    Yields:
+        filename, label, image_bytes
+    """
+    if not os.path.exists(map_file):
+        raise IOError("map file {} not exists".format(map_file))
+
+    label_dict = {}
+    with open(map_file) as fp:
+        line = fp.readline()
+        while line:
+            labels = line.split(" ")
+            label_dict[labels[1]] = labels[0]
+            line = fp.readline()
+
+    # get all the dir which are n02087046, n02094114, n02109525
+    dir_paths = {}
+    for item in label_dict:
+        real_path = os.path.join(image_dir, label_dict[item])
+        if not os.path.isdir(real_path):
+            print("{} dir is not exist".format(real_path))
+            continue
+        dir_paths[item] = real_path
+
+    if not dir_paths:
+        raise PathNotExistsError("not valid image dir in {}".format(image_dir))
+
+    # get the filename, label and image binary as a dict
+    for label in dir_paths:
+        for item in os.listdir(dir_paths[label]):
+            file_name = os.path.join(dir_paths[label], item)
+            if not item.endswith("JPEG") and not item.endswith("jpg"):
+                print("{} file is not suffix with JPEG/jpg, skip it.".format(file_name))
+                continue
+
+            # get the image data
+            image_file = open(file_name, "rb")
+            image_bytes = image_file.read()
+            image_file.close()
+            if not image_bytes:
+                print("The image file: {} is invalid.".format(file_name))
+                continue
+            yield str(file_name), int(label), image_bytes
+
+def create_int_feature(values):
+    feature = tf.train.Feature(int64_list=tf.train.Int64List(value=[values]))
+    return feature
+
+def create_string_feature(values):
+    feature = tf.train.Feature(bytes_list=tf.train.BytesList(value=[bytes(values, encoding='utf-8')]))
+    return feature
+
+def create_bytes_feature(values):
+    feature = tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
+    return feature
+
+def imagenet_to_tfrecord():
+    writers = []
+    for i in range(PARTITION_NUMBER):
+        output_file = TFRECORD_FILE + str(i).rjust(2, '0')
+        writers.append(tf.io.TFRecordWriter(output_file))
+
+    writer_index = 0
+    total_written = 0
+
+    for file_name, label, image_bytes in get_imagenet_filename_label_pic(IMAGENET_MAP_FILE,
+                                                                         IMAGENET_IMAGE_DIR):
+        features = collections.OrderedDict()
+        features["file_name"] = create_string_feature(file_name)
+        features["label"] = create_int_feature(label)
+        features["data"] = create_bytes_feature(image_bytes)
+
+        tf_example = tf.train.Example(features=tf.train.Features(feature=features))
+
+        writers[writer_index].write(tf_example.SerializeToString())
+        writer_index = (writer_index + 1) % len(writers)
+
+        total_written += 1
+
+    for writer in writers:
+        writer.close()
+
+    print("Write {} total examples".format(total_written))
+
+if __name__ == '__main__':
+    imagenet_to_tfrecord()
diff --git a/tests/perf_test/mindrecord/imagenet/perf_read_imagenet.py b/tests/perf_test/mindrecord/imagenet/perf_read_imagenet.py
new file mode 100644
index 0000000000..fa5baef266
--- /dev/null
+++ b/tests/perf_test/mindrecord/imagenet/perf_read_imagenet.py
@@ -0,0 +1,106 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""test dataset performance about mindspore.MindDataset, mindspore.TFRecordDataset, tf.data.TFRecordDataset"""
+import time
+import mindspore.dataset as ds
+from mindspore.mindrecord import FileReader
+
+import tensorflow as tf
+
+print_step = 5000
+
+def print_log(count):
+    if count % print_step == 0:
+        print("Read {} rows ...".format(count))
+
+def use_filereader(mindrecord):
+    start = time.time()
+    columns_list = ["data", "label"]
+    reader = FileReader(file_name=mindrecord,
+                        num_consumer=4,
+                        columns=columns_list)
+    num_iter = 0
+    for index, item in enumerate(reader.get_next()):
+        num_iter += 1
+        print_log(num_iter)
+    end = time.time()
+    print("Read by FileReader - total rows: {}, cost time: {}s".format(num_iter, end - start))
+
+def use_minddataset(mindrecord):
+    start = time.time()
+    columns_list = ["data", "label"]
+    data_set = ds.MindDataset(dataset_file=mindrecord,
+                              columns_list=columns_list,
+                              num_parallel_workers=4)
+    num_iter = 0
+    for item in data_set.create_dict_iterator():
+        num_iter += 1
+        print_log(num_iter)
+    end = time.time()
+    print("Read by MindDataset - total rows: {}, cost time: {}s".format(num_iter, end - start))
+
+def use_tfrecorddataset(tfrecord):
+    start = time.time()
+    columns_list = ["data", "label"]
+    data_set = ds.TFRecordDataset(dataset_files=tfrecord,
+                                  columns_list=columns_list,
+                                  num_parallel_workers=4,
+                                  shuffle=ds.Shuffle.GLOBAL)
+    data_set = data_set.shuffle(10000)
+    num_iter = 0
+    for item in data_set.create_dict_iterator():
+        num_iter += 1
+        print_log(num_iter)
+    end = time.time()
+    print("Read by TFRecordDataset - total rows: {}, cost time: {}s".format(num_iter, end - start))
+
+def use_tensorflow_tfrecorddataset(tfrecord):
+    start = time.time()
+    def _parse_record(example_photo):
+        features = {
+            'file_name': tf.io.FixedLenFeature([], tf.string),
+            'label': tf.io.FixedLenFeature([1], tf.int64),
+            'data': tf.io.FixedLenFeature([], tf.string)}
+        parsed_features = tf.io.parse_single_example(example_photo, features=features)
+        return parsed_features
+
+    data_set = tf.data.TFRecordDataset(filenames=tfrecord,
+                                       buffer_size=100000,
+                                       num_parallel_reads=4)
+    data_set = data_set.map(_parse_record, num_parallel_calls=4)
+    num_iter = 0
+    for item in data_set.__iter__():
+        num_iter += 1
+        print_log(num_iter)
+    end = time.time()
+    print("Read by TensorFlow TFRecordDataset - total rows: {}, cost time: {}s".format(num_iter, end - start))
+
+if __name__ == '__main__':
+    # use MindDataset
+    mindrecord = './imagenet.mindrecord00'
+    use_minddataset(mindrecord)
+
+    # use TFRecordDataset
+    tfrecord = ['imagenet.tfrecord00', 'imagenet.tfrecord01', 'imagenet.tfrecord02', 'imagenet.tfrecord03',
+                'imagenet.tfrecord04', 'imagenet.tfrecord05', 'imagenet.tfrecord06', 'imagenet.tfrecord07',
+                'imagenet.tfrecord08', 'imagenet.tfrecord09', 'imagenet.tfrecord10', 'imagenet.tfrecord11',
+                'imagenet.tfrecord12', 'imagenet.tfrecord13', 'imagenet.tfrecord14', 'imagenet.tfrecord15']
+    use_tfrecorddataset(tfrecord)
+
+    # use TensorFlow TFRecordDataset
+    use_tensorflow_tfrecorddataset(tfrecord)
+
+    # use FileReader
+    # use_filereader(mindrecord)
diff --git a/tests/perf_test/mindrecord/imagenet/schema.json b/tests/perf_test/mindrecord/imagenet/schema.json
new file mode 100644
index 0000000000..b76f9113d7
--- /dev/null
+++ b/tests/perf_test/mindrecord/imagenet/schema.json
@@ -0,0 +1,18 @@
+{
+  "datasetType": "TF",
+  "numRows": 930059,
+  "columns": {
+    "file_name": {
+      "type": "uint8",
+      "rank": 0
+    },
+    "label": {
+      "type": "int64",
+      "rank": 0
+    },
+    "data": {
+      "type": "uint8",
+      "rank": 0
+    }
+  }
+}

From 6ae8345cad3b8c74e9aa082c94906210a1ba9a47 Mon Sep 17 00:00:00 2001
From: rick_sanchez <hw.huangyong@huawei.com>
Date: Sun, 26 Apr 2020 16:27:09 +0800
Subject: [PATCH 139/242] refactor vm module for multigraph sink

---
 mindspore/ccsrc/session/ascend_session.cc     |  80 +++++---
 mindspore/ccsrc/session/ascend_session.h      |   4 +
 mindspore/ccsrc/session/kernel_graph.h        |   2 +-
 mindspore/ccsrc/utils/base_ref.h              |   9 +
 mindspore/ccsrc/vm/backend.cc                 | 106 +++++++---
 mindspore/ccsrc/vm/backend.h                  |  16 +-
 mindspore/ccsrc/vm/transform.cc               |  23 ++-
 mindspore/ccsrc/vm/vm.cc                      | 116 +++++++----
 mindspore/ccsrc/vm/vm.h                       |  10 +-
 tests/st/control/test_multigraph_sink.py      | 184 ++++++++++++++++++
 .../pynative_mode/test_multigraph_sink.py     | 119 +++++++++++
 11 files changed, 561 insertions(+), 108 deletions(-)
 create mode 100644 tests/st/control/test_multigraph_sink.py
 create mode 100644 tests/ut/python/pynative_mode/test_multigraph_sink.py

diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc
index 253d2d08ae..b15637e7be 100755
--- a/mindspore/ccsrc/session/ascend_session.cc
+++ b/mindspore/ccsrc/session/ascend_session.cc
@@ -800,45 +800,77 @@ void AscendSession::UpdateGraphOrder(GraphId to_graph_id) {
   }
 }
 
+size_t AscendSession::SetChildGraphInput(const KernelGraphPtr &graph, const AnfNodePtr &node, size_t input_index) {
+  auto output_num = AnfAlgo::GetOutputTensorNum(node);
+  if (output_num > 1 && !AnfAlgo::CheckPrimitiveType(node, prim::kPrimTupleGetItem)) {
+    return input_index + output_num;
+  }
+  auto &graph_inputs = graph->inputs();
+  auto &valid_inputs = graph->ValidInputs();
+  if (valid_inputs[input_index]) {
+    SetChildGraphParameter(node, graph_inputs[input_index]);
+  } else {
+    MS_LOG(DEBUG) << "Invalid input arg: " << node->DebugString();
+  }
+  return ++input_index;
+}
+
+size_t AscendSession::SetChildGraphInput(const KernelGraphPtr &graph, const ValuePtr &value, size_t input_index) {
+  MS_EXCEPTION_IF_NULL(value);
+  if (!value->isa<Tensor>()) {
+    MS_LOG(EXCEPTION) << "Value Node should be a tensor, unexpected value: " << value->ToString();
+  }
+  auto &graph_inputs = graph->inputs();
+  SetChildGraphParameter(value->cast<TensorPtr>(), graph_inputs[input_index]);
+  return ++input_index;
+}
+
+size_t AscendSession::SetChildGraphInput(const KernelGraphPtr &graph, const VectorRef &vec_args, size_t input_index) {
+  auto index = input_index;
+  for (auto &arg : vec_args) {
+    if (utils::isa<AnfNodePtr>(arg)) {
+      // arg is a anf node
+      auto node = utils::cast<AnfNodePtr>(arg);
+      index = SetChildGraphInput(graph, node, input_index);
+    } else if (utils::isa<ValuePtr>(arg)) {
+      // arg is a tensor
+      auto value = utils::cast<ValuePtr>(arg);
+      index = SetChildGraphInput(graph, value, input_index);
+    } else {
+      MS_LOG(EXCEPTION) << "Unexpected arg type " << arg.ToString();
+    }
+  }
+  return index;
+}
+
 void AscendSession::SetChildGraphInput(GraphId g, const VectorRef &args) {
   MS_LOG(INFO) << "Set input of graph " << g;
   auto to_graph = GetGraph(g);
   MS_EXCEPTION_IF_NULL(to_graph);
   DumpGraphInputArgs(args);
   UpdateGraphOrder(g);
-  std::vector<AnfNodePtr> graph_inputs = to_graph->inputs();
-  auto valid_inputs = to_graph->ValidInputs();
+  auto &graph_inputs = to_graph->inputs();
   auto real_args = GetRealArgs(to_graph, args);
   size_t input_index = 0;
   for (size_t i = 0; i < real_args.size(); i++) {
     if (input_index >= graph_inputs.size()) {
       MS_LOG(EXCEPTION) << "input_index " << input_index << " out of range size " << graph_inputs.size();
     }
-    if (utils::isa<AnfNodePtr>(real_args[i])) {
+    auto &real_arg = real_args[i];
+    if (utils::isa<AnfNodePtr>(real_arg)) {
       // arg is a anf node
-      auto real_arg = utils::cast<AnfNodePtr>(real_args[i]);
-      auto real_arg_output_num = AnfAlgo::GetOutputTensorNum(real_arg);
-      if (!AnfAlgo::CheckPrimitiveType(real_arg, prim::kPrimTupleGetItem) && real_arg_output_num > 1) {
-        input_index += real_arg_output_num;
-        continue;
-      }
-      if (valid_inputs[input_index]) {
-        SetChildGraphParameter(real_arg, graph_inputs[input_index]);
-      } else {
-        MS_LOG(DEBUG) << "Invalid input arg" << real_arg->DebugString();
-      }
-      input_index++;
-    } else if (utils::isa<ValuePtr>(args[i])) {
-      auto value = utils::cast<ValuePtr>(args[i]);
-      MS_EXCEPTION_IF_NULL(value);
+      auto node = utils::cast<AnfNodePtr>(real_arg);
+      input_index = SetChildGraphInput(to_graph, node, input_index);
+    } else if (utils::isa<ValuePtr>(real_arg)) {
       // arg is a tensor
-      if (!value->isa<Tensor>()) {
-        MS_LOG(EXCEPTION) << "Value Node should be a tensor, unexpected value: " << value->ToString();
-      }
-      SetChildGraphParameter(value->cast<TensorPtr>(), graph_inputs[input_index]);
-      input_index++;
+      auto value = utils::cast<ValuePtr>(real_arg);
+      input_index = SetChildGraphInput(to_graph, value, input_index);
+    } else if (utils::isa<VectorRef>(real_arg)) {
+      // arg is a VectorRef
+      auto vec_args = utils::cast<VectorRef>(real_arg);
+      input_index = SetChildGraphInput(to_graph, vec_args, input_index);
     } else {
-      MS_LOG(EXCEPTION) << "Unexpected arg type " << args[i].ToString();
+      MS_LOG(EXCEPTION) << "Unexpected arg type " << real_arg.ToString();
     }
   }
   MS_LOG(INFO) << "Finish!";
diff --git a/mindspore/ccsrc/session/ascend_session.h b/mindspore/ccsrc/session/ascend_session.h
index 0b006256a1..eec4e4ea41 100755
--- a/mindspore/ccsrc/session/ascend_session.h
+++ b/mindspore/ccsrc/session/ascend_session.h
@@ -79,6 +79,10 @@ class AscendSession : public SessionBasic {
   void RunOpHardwareOptimize(const std::shared_ptr<session::KernelGraph> &kernel_graph) const;
   void RunOpExecTask(const std::shared_ptr<KernelGraph> &kernel_graph) const;
 
+  size_t SetChildGraphInput(const KernelGraphPtr &graph, const AnfNodePtr &node, size_t input_index);
+  size_t SetChildGraphInput(const KernelGraphPtr &graph, const ValuePtr &value, size_t input_index);
+  size_t SetChildGraphInput(const KernelGraphPtr &graph, const VectorRef &vec_args, size_t input_index);
+
   // merge execution order list of child graphs
   void MergeGraphExecOrder();
   // insert assion op to sync data bettween different graphs
diff --git a/mindspore/ccsrc/session/kernel_graph.h b/mindspore/ccsrc/session/kernel_graph.h
index a33e8f7bd6..8cafcc2ebc 100755
--- a/mindspore/ccsrc/session/kernel_graph.h
+++ b/mindspore/ccsrc/session/kernel_graph.h
@@ -88,7 +88,7 @@ class KernelGraph : public FuncGraph {
   void set_executable(bool executable) { executable_ = executable; }
   // set invalid inputs for control sink
   std::vector<bool> *MutableValidInputs() { return &valid_inputs_; }
-  std::vector<bool> ValidInputs() { return valid_inputs_; }
+  const std::vector<bool> &ValidInputs() const { return valid_inputs_; }
 
  private:
   // remove value node form graph
diff --git a/mindspore/ccsrc/utils/base_ref.h b/mindspore/ccsrc/utils/base_ref.h
index 6e7911d0d9..74ccff8f80 100644
--- a/mindspore/ccsrc/utils/base_ref.h
+++ b/mindspore/ccsrc/utils/base_ref.h
@@ -228,6 +228,8 @@ T cast(const BaseRef &handle) {
 
 class VectorRef : public BaseRef {
  public:
+  using value_type = BaseRef;
+
   VectorRef() {}
   explicit VectorRef(const std::vector<BaseRef> &elements) : elements_(elements) {}
   VectorRef(const const_iterator &begin, const const_iterator &end) : elements_(begin, end) {}
@@ -251,6 +253,13 @@ class VectorRef : public BaseRef {
     return elements_[dim];
   }
 
+  BaseRef &operator[](const std::size_t &dim) {
+    if (dim >= size()) {
+      MS_LOG(EXCEPTION) << "Out of the size of the tuple.";
+    }
+    return elements_[dim];
+  }
+
   uint32_t type() const override { return tid(); }
   std::string ToString() const override;
   std::vector<BaseRef> &elements() { return elements_; }
diff --git a/mindspore/ccsrc/vm/backend.cc b/mindspore/ccsrc/vm/backend.cc
index d754667cce..caf4eb3ee3 100644
--- a/mindspore/ccsrc/vm/backend.cc
+++ b/mindspore/ccsrc/vm/backend.cc
@@ -143,6 +143,66 @@ void MsBackend::SetSwitchGraph() {
   }
 }
 
+// convert node from formal parameter to actual parameter,
+// and actual parameter is graph user's formal parameter.
+// get top while graph's parameter in recall while.
+AnfNodePtr MsBackend::ConvertGraphInput(const FuncGraphPtr &func_graph, const AnfNodePtr &node) {
+  std::unordered_map<AnfNodePtr, size_t> params_index;
+  auto result = node;
+  auto graph = result->func_graph();
+  while (func_graph != graph) {
+    auto iter = graph_user_inputs_.find(graph);
+    if (iter == graph_user_inputs_.end()) {
+      break;
+    }
+
+    params_index.clear();
+    auto &params = graph->parameters();
+    for (size_t i = 0; i < params.size(); ++i) {
+      params_index[params[i]] = i;
+    }
+
+    graph = iter->second.first;
+    auto &inputs = iter->second.second;
+    result = inputs[params_index[result]];
+  }
+  return result;
+}
+
+void MsBackend::SetGraphUserInputs(const FuncGraphPtr &func_graph, const FuncGraphPtr &user,
+                                   const AnfNodePtrList &inputs) {
+  if (graph_user_inputs_.find(func_graph) != graph_user_inputs_.end()) {
+    return;
+  }
+  graph_user_inputs_[func_graph] = {user, inputs};
+}
+
+void MsBackend::RecallGraphInput(const FuncGraphPtr &func_graph, const VectorRef &args, const BaseRef &c) {
+  std::unordered_map<AnfNodePtr, size_t> params_index;
+  auto &params = func_graph->parameters();
+  for (size_t i = 0; i < params.size(); ++i) {
+    params_index[params[i]] = i;
+  }
+
+  // recall all child graphs in this while
+  auto &graph_inputs = graph_inputs_[c];
+  for (auto &iter : graph_inputs) {
+    auto &graph = iter.first;
+    auto &old_args = iter.second;
+    auto &result = graph_id_map_[graph];
+    auto &inputs = result.inputs;
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      auto input = ConvertGraphInput(func_graph, inputs[i]);
+      auto it = params_index.find(input);
+      if (it != params_index.end()) {
+        old_args[i] = args[it->second];
+      }
+    }
+    sess_->SetChildGraphInput(graph, old_args);
+  }
+  graph_inputs_.erase(c);
+}
+
 // compile set input output
 VectorRef MsBackend::MsSimuRunGraph(const GraphId &g, const VectorRef &args) {
   MS_LOG(DEBUG) << "set graph input:" << g;
@@ -150,13 +210,20 @@ VectorRef MsBackend::MsSimuRunGraph(const GraphId &g, const VectorRef &args) {
   sess_->SetChildGraphInput(g, args);
 
   if (is_switch_call_) {
-    bool curr_cond = simu_cond_map_[curr_switch_].curr_cond;
-    MS_LOG(DEBUG) << "switch call MsSimuRunGraph:" << curr_cond;
-    if (0 == simu_cond_map_[curr_switch_].cond_graph_map.count(curr_cond)) {
-      MS_LOG(DEBUG) << "switch call MsSimuRunGraph:" << curr_cond << ", " << g;
-      simu_cond_map_[curr_switch_].cond_graph_map[curr_cond] = g;
-      SetSwitchGraph();
+    if (!curr_switch_.is_null()) {
+      // push this {g, args} to all user while graph_inputs for nest while,
+      // when current condition recall over delete this cond in graph_inputs.
+      for (auto &iter : graph_inputs_) {
+        iter.second.push_back({g, args});
+      }
+      if (graph_inputs_.find(curr_switch_) == graph_inputs_.end()) {
+        graph_inputs_[curr_switch_].push_back({g, args});
+      }
     }
+    bool curr_cond = simu_cond_map_[curr_switch_].curr_cond;
+    MS_LOG(DEBUG) << "switch call MsSimuRunGraph:" << curr_cond << ", " << g;
+    simu_cond_map_[curr_switch_].cond_graph_map[curr_cond] = g;
+    SetSwitchGraph();
   }
 
   std::vector<BaseRef> outputs;
@@ -205,42 +272,17 @@ VectorRef MsBackend::MsRunGraph(const GraphId &g, const VectorRef &args) {
   return outputs;
 }
 
-void MsBackend::SetSimuCondFlag(const BaseRef &c, int flag) {
-  MS_LOG(DEBUG) << "while set cond :" << c.ToString() << ", " << simu_cond_map_.size();
-
-  if (simu_cond_map_.find(c) == simu_cond_map_.end()) {
-    MS_LOG(EXCEPTION) << "error c not find";
-  }
-  simu_cond_map_[c].flag = flag;
-}
-
-int MsBackend::GetSimuCondFlag(const BaseRef &c) {
-  BaseRef cond = c;
-  if (cond.is_null()) {
-    MS_LOG(DEBUG) << "get curr_switch";
-    cond = curr_switch_;
-  }
-  if (simu_cond_map_.find(cond) == simu_cond_map_.end()) {
-    MS_LOG(ERROR) << "error c not find";
-    return -1;
-  }
-  return simu_cond_map_[cond].flag;
-}
-
 SwitchCondStatus MsBackend::SetSimuCond(const BaseRef &c, bool value) {
   MS_LOG(DEBUG) << "set cond :" << c.ToString() << ", " << simu_cond_map_.size();
 
   CondGraph cond_graph;
   cond_graph.curr_cond = value;
   if (simu_cond_map_.find(c) == simu_cond_map_.end()) {
-    cond_graph.flag = 0;
     simu_cond_map_[c] = cond_graph;
   }
 
   if (simu_cond_map_[c].cond_graph_map.count(value)) {
-    if (value == true) {
-      return kCondAlreadyRun;
-    }
+    return kCondAlreadyRun;
   }
   simu_cond_map_[c].curr_cond = value;
   MS_LOG(DEBUG) << "end set cond ";
diff --git a/mindspore/ccsrc/vm/backend.h b/mindspore/ccsrc/vm/backend.h
index b950e7adcb..769dab473e 100644
--- a/mindspore/ccsrc/vm/backend.h
+++ b/mindspore/ccsrc/vm/backend.h
@@ -16,9 +16,11 @@
 #ifndef MINDSPORE_CCSRC_VM_BACKEND_H_
 #define MINDSPORE_CCSRC_VM_BACKEND_H_
 
-#include <string>
+#include <list>
 #include <memory>
+#include <string>
 #include <unordered_map>
+#include <utility>
 
 #include "ir/anf.h"
 #include "vm/segment_runner.h"
@@ -45,6 +47,8 @@ class Backend {
   virtual bool GetCond(const BaseRef &c, bool *value);
   virtual void SetSwitchGraph() {}
   virtual void SetSwitchActive(const BaseRef &, bool) {}
+  virtual void RecallGraphInput(const FuncGraphPtr &, const VectorRef &, const BaseRef &) {}
+  virtual void SetGraphUserInputs(const FuncGraphPtr &, const FuncGraphPtr &, const AnfNodePtrList &) {}
 
   void set_curr_switch(const BaseRef &value) {
     curr_switch_ = value;
@@ -54,8 +58,6 @@ class Backend {
   BaseRef curr_switch() { return curr_switch_; }
   virtual void Link(GraphId) {}
   virtual LinConvertResult GetMultiGraphRun(const FuncGraphPtr &) { return LinConvertResult(); }
-  virtual void SetSimuCondFlag(const BaseRef &, int) {}
-  virtual int GetSimuCondFlag(const BaseRef &) { return 0; }
 
   LinConvertResult multi_result() { return multi_result_; }
   void set_multi_result(const LinConvertResult &value) { multi_result_ = value; }
@@ -75,11 +77,11 @@ class Backend {
   bool simu_flag_;
   LinConvertResult multi_result_;
   AnfNodePtr final_output_;
+  std::unordered_map<FuncGraphPtr, std::pair<FuncGraphPtr, AnfNodePtrList>> graph_user_inputs_;
 };
 
 struct CondGraph {
   bool curr_cond;
-  int flag;
   std::unordered_map<bool, GraphId> cond_graph_map;
 };
 
@@ -97,15 +99,17 @@ class MsBackend : public Backend {
 
   void SetSwitchGraph() override;
   void SetSwitchActive(const BaseRef &c, bool cond) override;
+  void RecallGraphInput(const FuncGraphPtr &, const VectorRef &, const BaseRef &) override;
+  void SetGraphUserInputs(const FuncGraphPtr &, const FuncGraphPtr &, const AnfNodePtrList &) override;
   void Link(GraphId) override;
+  AnfNodePtr ConvertGraphInput(const FuncGraphPtr &, const AnfNodePtr &);
   LinConvertResult GetMultiGraphRun(const FuncGraphPtr &g) override;
-  void SetSimuCondFlag(const BaseRef &c, int flag) override;
-  int GetSimuCondFlag(const BaseRef &c) override;
 
  private:
   session::SessionPtr sess_;
   std::unordered_map<BaseRef, CondGraph, BaseRefHash> simu_cond_map_;
   std::unordered_map<GraphId, LinConvertResult> graph_id_map_;
+  std::unordered_map<BaseRef, std::list<std::pair<GraphId, VectorRef>>, BaseRefHash> graph_inputs_;
 };
 }  // namespace compile
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc
index 1c3c917dae..b14bf54869 100644
--- a/mindspore/ccsrc/vm/transform.cc
+++ b/mindspore/ccsrc/vm/transform.cc
@@ -390,6 +390,16 @@ void CompileGraph::AddTailCall(const AnfNodePtr &fn, size_t size) {
 void CompileGraph::AddPartial(const CNodePtr &node) {
   auto inputs = node->inputs();
   VectorRef args;
+  auto fn = inputs[1];
+  if (!IsValueNode<FuncGraph>(fn)) {
+    MS_LOG(EXCEPTION) << "The type of 1st input of node must be FuncGraph";
+  }
+  if (backend_->is_multi_graph_sink()) {
+    auto func_graph = GetValueNode<FuncGraphPtr>(fn);
+    args.emplace_back(func_graph);
+    AnfNodePtrList outs(inputs.begin() + 2, inputs.end());
+    backend_->SetGraphUserInputs(func_graph, node->func_graph(), outs);
+  }
   for (size_t i = 1; i < inputs.size(); i++) {
     args.emplace_back(Ref(inputs[i]));
   }
@@ -442,12 +452,17 @@ void CompileGraph::AddPrimitive(const CNodePtr &node, const PrimitivePtr &prim)
 }
 
 int CompileGraph::AddCall(const FuncGraphPtr &graph, const CNodePtr &node) {
-  auto node_inputs = node->inputs();
-  AnfNodePtr fn = node_inputs[0];
+  auto inputs = node->inputs();
+  AnfNodePtr fn = inputs[0];
+  if (backend_->is_multi_graph_sink() && IsValueNode<FuncGraph>(fn)) {
+    auto func_graph = GetValueNode<FuncGraphPtr>(fn);
+    AnfNodePtrList outs(inputs.begin() + 1, inputs.end());
+    backend_->SetGraphUserInputs(func_graph, node->func_graph(), outs);
+  }
   (void)Ref(fn);
-  size_t size = node_inputs.size();
+  size_t size = inputs.size();
   for (size_t i = size - 1; i > 0; i--) {
-    AddInput(node_inputs[i]);
+    AddInput(inputs[i]);
   }
   if (node == graph->output()) {
     AddTailCall(fn, size);
diff --git a/mindspore/ccsrc/vm/vm.cc b/mindspore/ccsrc/vm/vm.cc
index a897c72f8f..cf52aafdfe 100644
--- a/mindspore/ccsrc/vm/vm.cc
+++ b/mindspore/ccsrc/vm/vm.cc
@@ -32,7 +32,8 @@ namespace compile {
 // Arguments:
 //   fn_: Callable function.
 //   args_: Sequence of function args.
-StructPartial::StructPartial(int fn, const VectorRef &args) : fn_(fn), args_(args) {}
+//   fg_: Graph of function.
+StructPartial::StructPartial(int fn, const VectorRef &args, const FuncGraphPtr &fg) : fn_(fn), args_(args), fg_(fg) {}
 
 std::ostream &operator<<(std::ostream &os, const StructPartial &other) {
   os << "partial(" << other.fn_ << ", " << other.args_.ToString() << ")";
@@ -40,7 +41,7 @@ std::ostream &operator<<(std::ostream &os, const StructPartial &other) {
 }
 
 bool operator==(const StructPartial &lhs, const StructPartial &rhs) {
-  return (lhs.fn_ == rhs.fn_ && lhs.args_ == rhs.args_);
+  return (lhs.fn_ == rhs.fn_ && lhs.args_ == rhs.args_ && lhs.fg_ == rhs.fg_);
 }
 
 StructSimuSwitch::StructSimuSwitch(const BaseRef &fn, const BaseRef &value) : fn_(fn), value_(value) {}
@@ -242,16 +243,6 @@ void FinalVM::InstTailCall(const VectorRef &args) {
   int nargs = utils::cast<int>(args[2]);
 
   auto new_jmp = Ref(jmp);
-
-  if (backend_->simu_flag()) {
-    if (backend_->GetSimuCondFlag(BaseRef()) == 2) {
-      MS_LOG(DEBUG) << "invoke while call tail first";
-      Pop(height);
-      Push(1);
-      Popp();
-      return;
-    }
-  }
   MoveStack(nargs, height);
   MS_LOG(DEBUG) << "TailCall pushp:" << pc_ << ", jmp:" << jmp;
   DoJmp(new_jmp);
@@ -291,8 +282,30 @@ void FinalVM::InstReturn(const VectorRef &args) {
   MS_LOG(DEBUG) << "End";
 }
 
-void FinalVM::InstPartial(const VectorRef &args) {
-  MS_LOG(DEBUG) << "Start";
+void FinalVM::InstSimuPartial(const VectorRef &args) {
+  const size_t args_size = 2;
+  if (args.size() < args_size) {
+    MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is "
+                  << args.size() << ".";
+    return;
+  }
+
+  auto &node = args[0];
+  if (!utils::isa<FuncGraphPtr>(node)) {
+    MS_LOG(ERROR) << "The type of 1st input of node must be FuncGraph";
+    return;
+  }
+  auto fg = utils::cast<FuncGraphPtr>(node);
+  int fn_ = utils::cast<int>(args[1]);
+  auto fn = utils::cast<int>(Ref(fn_));
+  MS_LOG(DEBUG) << "Partial argssize:" << args.size();
+  std::vector<BaseRef> outs(args.size() - 2);
+  (void)std::transform(args.begin() + 2, args.end(), outs.begin(),
+                       [&, this](const BaseRef &a) { return Ref(utils::cast<int>(a)); });
+  Push(std::make_shared<StructPartial>(fn, VectorRef(outs), fg));
+}
+
+void FinalVM::InstRealPartial(const VectorRef &args) {
   const size_t args_size = 1;
   if (args.size() < args_size) {
     MS_LOG(ERROR) << __FUNCTION__ << " requires " << args_size << " or more parameters, while the input size is "
@@ -304,10 +317,18 @@ void FinalVM::InstPartial(const VectorRef &args) {
   auto fn = utils::cast<int>(Ref(fn_));
   MS_LOG(DEBUG) << "Partial argssize:" << args.size();
   std::vector<BaseRef> outs(args.size() - 1);
-
   (void)std::transform(args.begin() + 1, args.end(), outs.begin(),
                        [&, this](const BaseRef &a) { return Ref(utils::cast<int>(a)); });
   Push(std::make_shared<StructPartial>(fn, VectorRef(outs)));
+}
+
+void FinalVM::InstPartial(const VectorRef &args) {
+  MS_LOG(DEBUG) << "Start";
+  if (backend_->is_multi_graph_sink()) {
+    InstSimuPartial(args);
+  } else {
+    InstRealPartial(args);
+  }
   MS_LOG(DEBUG) << "End";
 }
 
@@ -328,43 +349,57 @@ void FinalVM::InstSimuSwitch(const VectorRef &args) {
   bool bool_value = cond;
   SwitchCondStatus cond_stat = backend_->SetSimuCond(c, bool_value);
 
-  int cond_flag = backend_->GetSimuCondFlag(c);
-  MS_LOG(DEBUG) << "Simu switch cond:" << cond << ", " << cond_flag << ", " << c.cast<AnfNodePtr>()->DebugString();
-  if (cond_flag == 2) {
-    Popp();
-    Popp();
-    backend_->SetSimuCondFlag(c, 0);
-    return;
-  }
-
   if (cond_stat == kCondAlreadyRun) {
     MS_LOG(DEBUG) << "switch alreay run bool while true jmp";
-    if (cond_flag == 0) {
-      MS_LOG(DEBUG) << "switch second run bool while true jmp";
-      backend_->SetSwitchActive(c, true);
-      Push(std::make_shared<StructSimuSwitch>(Ref(vtrue), c));
-      Pushsp();
-      backend_->SetSimuCondFlag(c, 1);
-      return;
-    } else if (cond_flag == 1) {
-      MS_LOG(DEBUG) << "switch first run bool while if jmp";
-      Push(std::make_shared<StructSimuSwitch>(Ref(vfalse), c));
-      (void)backend_->SetSimuCond(c, false);
-      backend_->SetSimuCondFlag(c, 2);
-      return;
-    } else {
-      MS_LOG(EXCEPTION) << "error cond not find";
-      return;
+    BaseRef jmp = Ref(vtrue);
+    if (utils::isa<StructPartial>(jmp)) {
+      auto new_jmp = utils::cast<std::shared_ptr<StructPartial>>(jmp);
+      backend_->RecallGraphInput(new_jmp->fg_, new_jmp->args_, c);
     }
+    cond_jmp_[c] = Ref(vfalse);
+    Push(static_cast<int>(cond_stat));
+    Popp();
+    backend_->SetSwitchActive(c, bool_value);
+    return;
   }
   if (bool_value) {
     Push(std::make_shared<StructSimuSwitch>(Ref(vtrue), c));
     Pushsp();
   } else {
+    MergeJmpArgs(Ref(vfalse), c);
     Push(std::make_shared<StructSimuSwitch>(Ref(vfalse), c));
   }
 }
 
+void FinalVM::MergeJmpArgs(const BaseRef &jmp, const BaseRef &c) {
+  auto iter = cond_jmp_.find(c);
+  if (iter == cond_jmp_.end()) {
+    return;
+  }
+  auto old_jmp = utils::cast<std::shared_ptr<StructPartial>>(iter->second);
+  auto new_jmp = utils::cast<std::shared_ptr<StructPartial>>(jmp);
+  auto &old_args = old_jmp->args_;
+  auto &new_args = new_jmp->args_;
+  for (size_t i = 0; i < new_args.size(); ++i) {
+    auto &old_arg = old_args[i];
+    auto &new_arg = new_args[i];
+    if (utils::isa<VectorRef>(old_arg)) {
+      auto old_vec_ref = utils::cast<VectorRef>(old_arg);
+      if (utils::isa<VectorRef>(new_arg)) {
+        auto new_vec_ref = utils::cast<VectorRef>(new_arg);
+        std::copy(new_vec_ref.begin(), new_vec_ref.end(), std::back_inserter(old_vec_ref));
+      }
+      new_arg = old_vec_ref;
+    } else if (utils::isa<VectorRef>(new_arg)) {
+      auto new_vec_ref = utils::cast<VectorRef>(new_arg);
+      new_vec_ref.push_back(old_arg);
+      new_arg = new_vec_ref;
+    } else {
+      new_arg = VectorRef({new_arg, old_arg});
+    }
+  }
+}
+
 void FinalVM::InstRealSwitch(const VectorRef &args) {
   const size_t args_size = 3;
   if (args.size() != args_size) {
@@ -399,6 +434,7 @@ void FinalVM::InstSwitch(const VectorRef &args) {
   } else {
     InstRealSwitch(args);
   }
+  MS_LOG(DEBUG) << "End";
 }
 
 void FinalVM::InstTuple(const VectorRef &args) {
diff --git a/mindspore/ccsrc/vm/vm.h b/mindspore/ccsrc/vm/vm.h
index eab726a9b7..a02eced44c 100644
--- a/mindspore/ccsrc/vm/vm.h
+++ b/mindspore/ccsrc/vm/vm.h
@@ -27,6 +27,9 @@
 #include <utility>
 #include <vector>
 #include <deque>
+#include <unordered_map>
+
+#include "ir/anf.h"
 #include "utils/base_ref.h"
 
 namespace mindspore {
@@ -60,13 +63,14 @@ const std::vector<std::string> inst_str{"call",  "tail_call", "return", "partial
 class StructPartial : public Base {
  public:
   // Initialize StructPartial.
-  StructPartial(int fn, const VectorRef &args);
+  StructPartial(int fn, const VectorRef &args, const FuncGraphPtr &fg = nullptr);
 
   virtual ~StructPartial() = default;
   MS_DECLARE_PARENT(StructPartial, Base)
 
   int fn_;
   VectorRef args_;
+  FuncGraphPtr fg_;
 };
 
 std::ostream &operator<<(std::ostream &os, const StructPartial &other);
@@ -98,6 +102,8 @@ class FinalVM {
   void InstTailCall(const VectorRef &args);
   void InstReturn(const VectorRef &args);
   void InstPartial(const VectorRef &args);
+  void InstSimuPartial(const VectorRef &args);
+  void InstRealPartial(const VectorRef &args);
   void InstSwitch(const VectorRef &args);
   void InstSimuSwitch(const VectorRef &args);
   void InstRealSwitch(const VectorRef &args);
@@ -120,6 +126,7 @@ class FinalVM {
   void Pushsp();
   void Popsp();
   void DoJmp(const BaseRef &jmp);
+  void MergeJmpArgs(const BaseRef &jmp, const BaseRef &c);
 
  private:
   InstSet insts_;
@@ -128,6 +135,7 @@ class FinalVM {
   std::stack<int> retsp_;
   int pc_;
   int sp_;
+  std::unordered_map<BaseRef, BaseRef, BaseRefHash> cond_jmp_;
   BackendPtr backend_;
   const InstFunctionMap inst_function_map = {
     {Instruction::kCall, [this](const VectorRef &args) { InstCall(args); }},
diff --git a/tests/st/control/test_multigraph_sink.py b/tests/st/control/test_multigraph_sink.py
new file mode 100644
index 0000000000..b2732a63d4
--- /dev/null
+++ b/tests/st/control/test_multigraph_sink.py
@@ -0,0 +1,184 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+""" test_multigraph_sink """
+import pytest
+import numpy as np
+import mindspore.nn as nn
+import mindspore.context as context
+from mindspore.common.tensor import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.common import ms_function
+from mindspore.ops import operations as P
+
+
+def setup_module(module):
+    context.set_context(mode = context.PYNATIVE_MODE, save_graphs = True, device_target = "Ascend")
+    context.set_context(enable_task_sink = True, device_id = 0)
+
+
+c1 = Tensor([2], mstype.int32)
+c2 = Tensor([14], mstype.int32)
+c3 = Tensor([1], mstype.int32)
+c4 = Tensor([0], mstype.int32)
+c5 = Tensor([14], mstype.int32)
+
+
+@ms_function
+def simple_if(x, y, z):
+    if x < y:
+        x = x + 1
+    else:
+        x = x + 2
+    x = x + 3
+    return x
+
+
+@ms_function
+def if_by_if(x, y, z):
+    if x < y:
+        x = x + 1
+    if y > x:
+        x = x + 2
+    x = x + 3
+    return x
+
+
+@ms_function
+def if_in_if(x, y, z):
+    out = c4
+    if x < y:
+        z = c4 + c4
+        if z < y:
+            z = z + 2
+            out = out + z
+        x = x + 3
+    out = out + x
+    return out
+
+
+@ms_function
+def simple_while(x, y, z):
+    y = y + 4
+    while x < y:
+        x = x + 1
+    x = x + 3
+    return x
+
+
+@ms_function
+def while_by_while(x, y, z):
+    while x < y:
+        x = x + 1
+    while z < c5:
+        z = z + 1
+        x = x + 1
+    x = x + 1
+    return x
+
+
+@ms_function
+def while_in_while(x, y, z):
+    out = c4
+    while x < y:
+        z = c4 + c4
+        while z < y:
+            z = z + 1
+            out = out + z
+        x = x + 1
+    out = out + x
+    return out
+
+
+@ms_function
+def while_by_while_in_while(x, y, z):
+    out = c4
+    while x < c2:
+        y = c4 + c4
+        while y < c2:
+            y = y + 1
+        out = out + y
+        z = c4 + c4
+        while z < c2:
+            z = z + 1
+        out = out + z
+        x = x + 1
+    out = out + x
+    return out
+
+
+@ms_function
+def while_in_while_in_while(x, y, z):
+    out = c4
+    while x < c2:
+        y = c4 + c4
+        while y < c2:
+            y = y + 1
+            z = c4 + c4
+            while z < c2:
+                z = z + 1
+            out = out + z
+        out = out + y
+        x = x + 1
+    out = out + x
+    return out
+
+
+def test_simple_if():
+    output = simple_if(c1, c2, c3)
+    expect = Tensor([6], mstype.int32)
+    assert output == expect
+
+
+def test_if_by_if():
+    output = if_by_if(c1, c2, c3)
+    expect = Tensor([8], mstype.int32)
+    assert output == expect
+
+
+def test_if_in_if():
+    output = if_in_if(c1, c2, c3)
+    expect = Tensor([7], mstype.int32)
+    assert output == expect
+
+
+def test_simple_while():
+    output = simple_while(c1, c2, c3)
+    expect = Tensor([21], mstype.int32)
+    assert output == expect
+
+
+def test_while_by_while():
+    output = while_by_while(c1, c2, c3)
+    expect = Tensor([28], mstype.int32)
+    assert output == expect
+
+
+def test_while_in_while():
+    output = while_in_while(c1, c2, c3)
+    expect = Tensor([1274], mstype.int32)
+    assert output == expect
+
+
+def test_while_by_while_in_while():
+    output = while_by_while_in_while(c1, c2, c3)
+    expect = Tensor([350], mstype.int32)
+    assert output == expect
+
+
+def test_while_in_while_in_while():
+    output = while_in_while_in_while(c1, c2, c3)
+    expect = Tensor([2534], mstype.int32)
+    assert output == expect
+
diff --git a/tests/ut/python/pynative_mode/test_multigraph_sink.py b/tests/ut/python/pynative_mode/test_multigraph_sink.py
new file mode 100644
index 0000000000..0c69c7c2c1
--- /dev/null
+++ b/tests/ut/python/pynative_mode/test_multigraph_sink.py
@@ -0,0 +1,119 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+""" test_multigraph_sink """
+import pytest
+import numpy as np
+import mindspore.nn as nn
+import mindspore.context as context
+from mindspore.common.tensor import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.common import ms_function
+from mindspore.ops import operations as P
+
+
+def setup_module(module):
+    context.set_context(mode = context.PYNATIVE_MODE, save_graphs = True, device_target = "Ascend")
+    context.set_context(enable_task_sink = True, device_id = 0)
+
+
+c1 = Tensor([2], mstype.int32)
+c2 = Tensor([14], mstype.int32)
+c3 = Tensor([1], mstype.int32)
+c4 = Tensor([0], mstype.int32)
+c5 = Tensor([14], mstype.int32)
+
+
+@ms_function
+def simple_if(x, y, z):
+    if x < y:
+        x = x + 1
+    else:
+        x = x + 2
+    x = x + 3
+    return x
+
+
+@ms_function
+def if_by_if(x, y, z):
+    if x < y:
+        x = x + 1
+    if y > x:
+        x = x + 2
+    x = x + 3
+    return x
+
+
+@ms_function
+def if_in_if(x, y, z):
+    out = c4
+    if x < y:
+        z = c4 + c4
+        if z < y:
+            z = z + 2
+            out = out + z
+        x = x + 3
+    out = out + x
+    return out
+
+
+@ms_function
+def simple_while(x, y, z):
+    y = y + 4
+    while x < y:
+        x = x + 1
+    x = x + 3
+    return x
+
+
+@ms_function
+def while_by_while(x, y, z):
+    while x < y:
+        x = x + 1
+    while z < c5:
+        z = z + 1
+        x = x + 1
+    x = x + 1
+    return x
+
+
+def test_simple_if():
+    output = simple_if(c1, c2, c3)
+    expect = Tensor([6], mstype.int32)
+    assert output == expect
+
+
+def test_if_by_if():
+    output = if_by_if(c1, c2, c3)
+    expect = Tensor([8], mstype.int32)
+    assert output == expect
+
+
+def test_if_in_if():
+    output = if_in_if(c1, c2, c3)
+    expect = Tensor([7], mstype.int32)
+    assert output == expect
+
+
+def test_simple_while():
+    output = simple_while(c1, c2, c3)
+    expect = Tensor([21], mstype.int32)
+    assert output == expect
+
+
+def test_while_by_while():
+    output = while_by_while(c1, c2, c3)
+    expect = Tensor([28], mstype.int32)
+    assert output == expect
+

From 0b4648881b10481beef2908a0e3f6d73feca8b19 Mon Sep 17 00:00:00 2001
From: lirongzhen1 <lirongzhen1@huawei.com>
Date: Sun, 26 Apr 2020 22:27:59 +0800
Subject: [PATCH 140/242] add reducescatter bprop

---
 .../auto_parallel/rec_core/rec_partition.cc   |  1 -
 mindspore/ops/_grad/grad_comm_ops.py          | 20 ++++++++++-
 tests/ut/python/communication/test_comm.py    | 34 ++++++++++++++++++-
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
index 81e0eaa2dd..eafe4784a4 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
@@ -29,7 +29,6 @@
 
 namespace mindspore {
 namespace parallel {
-
 // Get the target node's weight for sorting.
 double GetWeights(const Graph::NodeType &node) {
   const OperatorRec &op = node.apply;
diff --git a/mindspore/ops/_grad/grad_comm_ops.py b/mindspore/ops/_grad/grad_comm_ops.py
index 3a31c8aeec..97b8b3fdf3 100644
--- a/mindspore/ops/_grad/grad_comm_ops.py
+++ b/mindspore/ops/_grad/grad_comm_ops.py
@@ -67,11 +67,29 @@ def get_bprop_broad_cast(self):
 @bprop_getters.register(AllGather)
 def get_bprop_all_gather(self):
     """Generate bprop for AllGather"""
-    reduce_scatter_grad = ReduceScatter(ReduceOp.SUM, self.group)
+    all_gather_grad = ReduceScatter(ReduceOp.SUM, self.group)
+    if self.instance_name:
+        instance_name = "grad" + self.instance_name
+        all_gather_grad.set_prim_instance_name(instance_name)
+
+    def bprop(x, out, dout):
+        dx = all_gather_grad(dout)
+        return (dx,)
+
+    return bprop
+
+
+@bprop_getters.register(ReduceScatter)
+def get_bprop_reduce_scatter(self):
+    """Generate bprop for ReduceScatter"""
+    reduce_scatter_grad = AllGather(self.group)
     if self.instance_name:
         instance_name = "grad" + self.instance_name
         reduce_scatter_grad.set_prim_instance_name(instance_name)
 
+    if self.op != ReduceOp.SUM:
+        raise RuntimeError("The reducescatter bprop only support ReduceOp.SUM until now.")
+
     def bprop(x, out, dout):
         dx = reduce_scatter_grad(dout)
         return (dx,)
diff --git a/tests/ut/python/communication/test_comm.py b/tests/ut/python/communication/test_comm.py
index 38fd7199fd..981603b687 100644
--- a/tests/ut/python/communication/test_comm.py
+++ b/tests/ut/python/communication/test_comm.py
@@ -14,7 +14,7 @@
 
 """ test Communicate """
 import numpy as np
-from mindspore.ops.operations.comm_ops import AllReduce, AllGather, _AlltoAll, ReduceOp
+from mindspore.ops.operations.comm_ops import AllReduce, AllGather, _AlltoAll, ReduceOp, ReduceScatter
 from mindspore.ops.operations.comm_ops import Broadcast
 from mindspore.communication.management import HCCL_WORLD_COMM_GROUP, NCCL_WORLD_COMM_GROUP, GlobalComm, init
 from mindspore.communication._comm_helper import Backend
@@ -78,6 +78,19 @@ class AllGatherNet(nn.Cell):
         x = self.allgather(x)
         return self.relu(x)
 
+class ReduceScatterNet(nn.Cell):
+    """ReduceScatterNet definition"""
+    def __init__(self, input_channel, out_channel, op):
+        super(ReduceScatterNet, self).__init__()
+        self.dense = Dense(input_channel, out_channel)
+        self.reducescatter = ReduceScatter(op)
+        self.relu = ReLU()
+
+    def construct(self, x):
+        x = self.dense(x)
+        x = self.reducescatter(x)
+        return self.relu(x)
+
 class AlltoAllNet(nn.Cell):
     """AlltoAllNet definition"""
     def __init__(self, input_channel, out_channel):
@@ -126,6 +139,25 @@ def test_allgather():
     network = TrainOneStepCell(network, optimizer)
     _executor.compile(network, input_tensor, label_tensor)
 
+def run_reducescatter(op):
+    """run_reducescatter"""
+    context.set_context(mode=context.GRAPH_MODE)
+    input_tensor = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]], dtype=np.float32))
+    label_tensor = Tensor(np.array([[1.2], [2.2]], dtype=np.float32))
+    network = ReduceScatterNet(2, 1, op)
+    loss_fn = nn.SoftmaxCrossEntropyWithLogits()
+    optimizer = Momentum(filter(lambda x: x.requires_grad, network.get_parameters()),
+                         learning_rate=0.1,
+                         momentum=0.9)
+    network = WithLossCell(network, loss_fn)
+    network = TrainOneStepCell(network, optimizer)
+    _executor.compile(network, input_tensor, label_tensor)
+
+def test_reducescatter():
+    """test_reducescatter"""
+    context.set_context(mode=context.GRAPH_MODE)
+    run_reducescatter(ReduceOp.SUM)
+
 def test_broadcast():
     """test_broadcast"""
     context.set_context(mode=context.GRAPH_MODE)

From c538b83712304c6f1c9614bb05b9cb51b3d07425 Mon Sep 17 00:00:00 2001
From: zjun <zhangjun0@huawei.com>
Date: Mon, 27 Apr 2020 18:35:10 +0800
Subject: [PATCH 141/242] remove enbale hccl

---
 example/resnet101_imagenet/eval.py               | 16 +++++-----------
 example/resnet101_imagenet/train.py              | 16 +++++-----------
 example/resnet50_cifar10/eval.py                 | 16 +++++-----------
 example/resnet50_cifar10/train.py                | 16 +++++-----------
 example/vgg16_cifar10/eval.py                    |  2 +-
 example/vgg16_cifar10/train.py                   |  2 +-
 example/yolov3_coco2017/train.py                 |  2 --
 mindspore/ccsrc/pipeline/init.cc                 |  2 --
 mindspore/ccsrc/pipeline/pipeline.cc             |  2 +-
 mindspore/context.py                             | 11 +----------
 mindspore/nn/wrap/grad_reducer.py                |  2 +-
 tests/st/auto_parallel/onehot_model_parallel.py  |  1 -
 .../soft_entropy_loss_expand_parallel.py         |  1 -
 .../test_resnet50_expand_loss_2p.py              |  1 -
 tests/st/mem_reuse/resnet_cifar_memreuse.py      | 14 ++++----------
 tests/st/mem_reuse/resnet_cifar_normal.py        | 14 ++++----------
 tests/st/tbe_networks/resnet_cifar.py            | 14 ++++----------
 tests/st/tbe_networks/test_resnet_cifar_8p.py    |  1 -
 .../python/parallel/test_auto_parallel_resnet.py |  3 ++-
 19 files changed, 39 insertions(+), 97 deletions(-)

diff --git a/example/resnet101_imagenet/eval.py b/example/resnet101_imagenet/eval.py
index 00fe825e91..979c6ca949 100755
--- a/example/resnet101_imagenet/eval.py
+++ b/example/resnet101_imagenet/eval.py
@@ -51,17 +51,11 @@ context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)
 
 if __name__ == '__main__':
-    if args_opt.do_eval:
-        context.set_context(enable_hccl=False)
-    else:
-        if args_opt.run_distribute:
-            context.set_context(enable_hccl=True)
-            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True, parameter_broadcast=True)
-            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
-            init()
-        else:
-            context.set_context(enable_hccl=False)
+    if not args_opt.do_eval and args_opt.run_distribute:
+        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                          mirror_mean=True, parameter_broadcast=True)
+        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+        init()
 
     epoch_size = config.epoch_size
     net = resnet101(class_num=config.class_num)
diff --git a/example/resnet101_imagenet/train.py b/example/resnet101_imagenet/train.py
index 0f20637595..c2de3e8d98 100755
--- a/example/resnet101_imagenet/train.py
+++ b/example/resnet101_imagenet/train.py
@@ -56,17 +56,11 @@ context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)
 
 if __name__ == '__main__':
-    if args_opt.do_eval:
-        context.set_context(enable_hccl=False)
-    else:
-        if args_opt.run_distribute:
-            context.set_context(enable_hccl=True)
-            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True, parameter_broadcast=True)
-            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
-            init()
-        else:
-            context.set_context(enable_hccl=False)
+    if not args_opt.do_eval and args_opt.run_distribute:
+        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                          mirror_mean=True, parameter_broadcast=True)
+        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+        init()
 
     epoch_size = config.epoch_size
     net = resnet101(class_num=config.class_num)
diff --git a/example/resnet50_cifar10/eval.py b/example/resnet50_cifar10/eval.py
index 243dc2a332..1134d0bd2e 100755
--- a/example/resnet50_cifar10/eval.py
+++ b/example/resnet50_cifar10/eval.py
@@ -51,17 +51,11 @@ context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)
 
 if __name__ == '__main__':
-    if args_opt.do_eval:
-        context.set_context(enable_hccl=False)
-    else:
-        if args_opt.run_distribute:
-            context.set_context(enable_hccl=True)
-            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
-            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
-            init()
-        else:
-            context.set_context(enable_hccl=False)
+    if not args_opt.do_eval and args_opt.run_distribute:
+        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                          mirror_mean=True)
+        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+        init()
 
     epoch_size = config.epoch_size
     net = resnet50(class_num=config.class_num)
diff --git a/example/resnet50_cifar10/train.py b/example/resnet50_cifar10/train.py
index b18c3778de..0a3ad9dc5a 100755
--- a/example/resnet50_cifar10/train.py
+++ b/example/resnet50_cifar10/train.py
@@ -54,17 +54,11 @@ context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)
 
 if __name__ == '__main__':
-    if args_opt.do_eval:
-        context.set_context(enable_hccl=False)
-    else:
-        if args_opt.run_distribute:
-            context.set_context(enable_hccl=True)
-            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                              mirror_mean=True)
-            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
-            init()
-        else:
-            context.set_context(enable_hccl=False)
+    if not args_opt.do_eval and args_opt.run_distribute:
+        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
+                                          mirror_mean=True)
+        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+        init()
 
     epoch_size = config.epoch_size
     net = resnet50(class_num=config.class_num)
diff --git a/example/vgg16_cifar10/eval.py b/example/vgg16_cifar10/eval.py
index ca2bbd12eb..68c23d250f 100644
--- a/example/vgg16_cifar10/eval.py
+++ b/example/vgg16_cifar10/eval.py
@@ -37,7 +37,7 @@ if __name__ == '__main__':
 
     context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
     context.set_context(device_id=args_opt.device_id)
-    context.set_context(enable_mem_reuse=True, enable_hccl=False)
+    context.set_context(enable_mem_reuse=True)
 
     net = vgg16(num_classes=cfg.num_classes)
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum,
diff --git a/example/vgg16_cifar10/train.py b/example/vgg16_cifar10/train.py
index 234e3f7c7e..52ba0ecdf4 100644
--- a/example/vgg16_cifar10/train.py
+++ b/example/vgg16_cifar10/train.py
@@ -66,7 +66,7 @@ if __name__ == '__main__':
     context.set_context(device_id=args_opt.device_id)
     context.set_context(enable_task_sink=True)
     context.set_context(enable_loop_sink=True)
-    context.set_context(enable_mem_reuse=True, enable_hccl=False)
+    context.set_context(enable_mem_reuse=True)
 
     device_num = int(os.environ.get("DEVICE_NUM", 1))
     if device_num > 1:
diff --git a/example/yolov3_coco2017/train.py b/example/yolov3_coco2017/train.py
index c7d28a8350..bccc66d996 100644
--- a/example/yolov3_coco2017/train.py
+++ b/example/yolov3_coco2017/train.py
@@ -90,13 +90,11 @@ if __name__ == '__main__':
     if args_opt.distribute:
         device_num = args_opt.device_num
         context.reset_auto_parallel_context()
-        context.set_context(enable_hccl=True)
         context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
                                           device_num=device_num)
         init()
         rank = args_opt.device_id % device_num
     else:
-        context.set_context(enable_hccl=False)
         rank = 0
         device_num = 1
 
diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc
index f5cacc7ed5..9bf2aedde4 100644
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/init.cc
@@ -115,8 +115,6 @@ PYBIND11_MODULE(_c_expression, m) {
     .def("set_device_id", &mindspore::MsContext::set_device_id, "Set device id.")
     .def("open_tsd", &mindspore::MsContext::OpenTsd, "Open tdt dataset client.")
     .def("close_tsd", &mindspore::MsContext::CloseTsd, "Close tdt dataset client.")
-    .def("set_hccl_flag", &mindspore::MsContext::set_enable_hccl, "Set enable hccl.")
-    .def("get_hccl_flag", &mindspore::MsContext::enable_hccl, "Get whether to enable hccl.")
     .def("set_task_sink_flag", &mindspore::MsContext::set_enable_task_sink, "Set enable task sink.")
     .def("get_task_sink_flag", &mindspore::MsContext::enable_task_sink, "Get whether to enable task sink.")
     .def("get_save_graphs_flag", &mindspore::MsContext::save_graphs_flag, "Get whether to save graphs.")
diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc
index 7a1830e89a..63920cac13 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -773,7 +773,7 @@ void InitHccl() {
   (void)ms_context->OpenTsd();
   uint32_t device_id = ms_context->device_id();
   std::string device_name = ms_context->device_target();
-
+  ms_context->set_enable_hccl(true);
   if (ms_context->backend_policy() == "ms" && ms_context->device_target() == kAscendDevice) {
     auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(device_name, device_id);
     MS_EXCEPTION_IF_NULL(runtime_instance);
diff --git a/mindspore/context.py b/mindspore/context.py
index 237b2143ed..9b6842e4b6 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -225,14 +225,6 @@ class _Context:
         if not success:
             raise RuntimeError("Device id set failed!!!")
 
-    @property
-    def enable_hccl(self):
-        return self._context_handle.get_hccl_flag()
-
-    @enable_hccl.setter
-    def enable_hccl(self, hccl):
-        self._context_handle.set_hccl_flag(hccl)
-
     @property
     def enable_ir_fusion(self):
         return self._context_handle.get_ir_fusion_flag()
@@ -482,7 +474,7 @@ def reset_auto_parallel_context():
 
 
 @args_type_check(mode=int, precompile_only=bool, device_target=str,
-                 device_id=int, enable_ir_fusion=bool, save_graphs=bool, enable_hccl=bool,
+                 device_id=int, enable_ir_fusion=bool, save_graphs=bool,
                  enable_task_sink=bool, save_graphs_path=str, enable_loop_sink=bool,
                  enable_mem_reuse=bool, save_ms_model=bool, save_ms_model_path=str, enable_gpu_summary=bool,
                  enable_auto_mixed_precision=bool, enable_dump=bool, save_dump_path=str,
@@ -515,7 +507,6 @@ def set_context(**kwargs):
                     while device_num_per_host should no more than 4096. Default: 0.
         enable_ir_fusion (bool): Whether to enable ir fusion. Default: True.
         save_graphs (bool): Whether to save graphs. Default: False.
-        enable_hccl (bool): Whether to enable hccl. Default: False.
         enable_loop_sink (bool): Whether to enable loop sink. Default: True.
         enable_task_sink (bool): Whether to enable task sink. Default: True.
         enable_mem_reuse (bool): Whether to enable memory reuse. Default: True.
diff --git a/mindspore/nn/wrap/grad_reducer.py b/mindspore/nn/wrap/grad_reducer.py
index 01346698ee..ee57297fe0 100644
--- a/mindspore/nn/wrap/grad_reducer.py
+++ b/mindspore/nn/wrap/grad_reducer.py
@@ -130,7 +130,7 @@ class DistributedGradReducer(Cell):
         >>>
         >>> device_id = int(os.environ["DEVICE_ID"])
         >>> context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True,
-        >>>                     device_id=int(device_id), enable_hccl=True)
+        >>>                     device_id=int(device_id))
         >>> init()
         >>> context.reset_auto_parallel_context()
         >>> context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL)
diff --git a/tests/st/auto_parallel/onehot_model_parallel.py b/tests/st/auto_parallel/onehot_model_parallel.py
index 1f35ac1f80..3c41e2975e 100644
--- a/tests/st/auto_parallel/onehot_model_parallel.py
+++ b/tests/st/auto_parallel/onehot_model_parallel.py
@@ -33,7 +33,6 @@ def setup_module():
     global rank_id
     np.random.seed(0)
     context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    context.set_context(enable_hccl=True)
     context.set_context(enable_task_sink=True,
                         device_id=device_id)
     context.set_context(enable_ir_fusion=True)
diff --git a/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py b/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py
index 86a8b89521..767094c044 100644
--- a/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py
+++ b/tests/st/auto_parallel/soft_entropy_loss_expand_parallel.py
@@ -46,7 +46,6 @@ def setup_module():
     global rank_id
     np.random.seed(0)
     context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-    context.set_context(enable_hccl=True)
     context.set_context(enable_task_sink=True,
                         device_id=device_id)
     context.set_context(enable_ir_fusion=True)
diff --git a/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py b/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py
index b28ad510e3..41f08f54ee 100644
--- a/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py
+++ b/tests/st/auto_parallel/test_resnet50_expand_loss_2p.py
@@ -31,7 +31,6 @@ from mindspore.train.callback import Callback
 from mindspore.parallel import set_algo_parameters
 
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-context.set_context(enable_hccl=True)
 context.set_context(enable_task_sink=True, device_id=int(os.getenv('DEVICE_ID')))
 context.set_context(enable_ir_fusion=True)
 context.set_context(enable_loop_sink=False)
diff --git a/tests/st/mem_reuse/resnet_cifar_memreuse.py b/tests/st/mem_reuse/resnet_cifar_memreuse.py
index d6310612b6..2604fe5850 100644
--- a/tests/st/mem_reuse/resnet_cifar_memreuse.py
+++ b/tests/st/mem_reuse/resnet_cifar_memreuse.py
@@ -122,16 +122,10 @@ class CrossEntropyLoss(nn.Cell):
 
 
 if __name__ == '__main__':
-    if args_opt.do_eval:
-        context.set_context(enable_hccl=False)
-    else:
-        if args_opt.run_distribute:
-            context.set_context(enable_hccl=True)
-            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL)
-            context.set_auto_parallel_context(all_reduce_fusion_split_indices=[140])
-            init()
-        else:
-            context.set_context(enable_hccl=False)
+    if not args_opt.do_eval and args_opt.run_distribute:
+        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL)
+        context.set_auto_parallel_context(all_reduce_fusion_split_indices=[140])
+        init()
 
     context.set_context(mode=context.GRAPH_MODE)
     epoch_size = args_opt.epoch_size
diff --git a/tests/st/mem_reuse/resnet_cifar_normal.py b/tests/st/mem_reuse/resnet_cifar_normal.py
index 2b6741e57a..8e037212d0 100644
--- a/tests/st/mem_reuse/resnet_cifar_normal.py
+++ b/tests/st/mem_reuse/resnet_cifar_normal.py
@@ -123,16 +123,10 @@ class CrossEntropyLoss(nn.Cell):
 
 
 if __name__ == '__main__':
-    if args_opt.do_eval:
-        context.set_context(enable_hccl=False)
-    else:
-        if args_opt.run_distribute:
-            context.set_context(enable_hccl=True)
-            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL)
-            context.set_auto_parallel_context(all_reduce_fusion_split_indices=[140])
-            init()
-        else:
-            context.set_context(enable_hccl=False)
+    if not args_opt.do_eval and args_opt.run_distribute:
+        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL)
+        context.set_auto_parallel_context(all_reduce_fusion_split_indices=[140])
+        init()
 
     context.set_context(mode=context.GRAPH_MODE)
     epoch_size = args_opt.epoch_size
diff --git a/tests/st/tbe_networks/resnet_cifar.py b/tests/st/tbe_networks/resnet_cifar.py
index 7bd03f5d81..4709b3ac70 100644
--- a/tests/st/tbe_networks/resnet_cifar.py
+++ b/tests/st/tbe_networks/resnet_cifar.py
@@ -122,16 +122,10 @@ class CrossEntropyLoss(nn.Cell):
 
 
 if __name__ == '__main__':
-    if args_opt.do_eval:
-        context.set_context(enable_hccl=False)
-    else:
-        if args_opt.run_distribute:
-            context.set_context(enable_hccl=True)
-            context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL)
-            auto_parallel_context().set_all_reduce_fusion_split_indices([140])
-            init()
-        else:
-            context.set_context(enable_hccl=False)
+    if not args_opt.do_eval and args_opt.run_distribute:
+        context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL)
+        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+        init()
 
     context.set_context(mode=context.GRAPH_MODE)
     epoch_size = args_opt.epoch_size
diff --git a/tests/st/tbe_networks/test_resnet_cifar_8p.py b/tests/st/tbe_networks/test_resnet_cifar_8p.py
index 69f0a80d12..1e43729252 100644
--- a/tests/st/tbe_networks/test_resnet_cifar_8p.py
+++ b/tests/st/tbe_networks/test_resnet_cifar_8p.py
@@ -153,7 +153,6 @@ def train_process(q, device_id, epoch_size, num_classes, device_num, batch_size,
     context.set_context(enable_task_sink=True, device_id=device_id)
     context.set_context(enable_loop_sink=True)
     context.set_context(enable_mem_reuse=True)
-    context.set_context(enable_hccl=enable_hccl)
     os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = MINDSPORE_HCCL_CONFIG_PATH
     os.environ['RANK_ID'] = str(device_id)
     os.environ['RANK_SIZE'] = str(device_num)
diff --git a/tests/ut/python/parallel/test_auto_parallel_resnet.py b/tests/ut/python/parallel/test_auto_parallel_resnet.py
index a563efb06d..1088ad736d 100644
--- a/tests/ut/python/parallel/test_auto_parallel_resnet.py
+++ b/tests/ut/python/parallel/test_auto_parallel_resnet.py
@@ -19,6 +19,7 @@ from mindspore import Tensor
 from mindspore.ops import operations as P
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.common.initializer import TruncatedNormal
+from mindspore.communication.management import init
 from mindspore.train.model import Model, ParallelMode
 from mindspore import context
 import os
@@ -31,10 +32,10 @@ from mindspore.parallel import set_algo_parameters
 from mindspore.parallel import _cost_model_context as cost_model_context
 
 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
-context.set_context(enable_hccl=True)
 context.set_context(enable_task_sink=True, device_id= 0)
 context.set_context(enable_ir_fusion=True)
 context.set_context(enable_loop_sink=False)
+init()
 
 def weight_variable(shape, factor=0.1):
     return TruncatedNormal(0.02)

From 485b42d2c2f2117835e29c9592b549430046ff70 Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Tue, 28 Apr 2020 11:03:52 +0800
Subject: [PATCH 142/242] modify resnet101 dir name to resnet101_imagenet2012

---
 example/{resnet101_imagenet => resnet101_imagenet2012}/README.md  | 0
 example/{resnet101_imagenet => resnet101_imagenet2012}/config.py  | 0
 .../crossentropy.py                                               | 0
 example/{resnet101_imagenet => resnet101_imagenet2012}/dataset.py | 0
 example/{resnet101_imagenet => resnet101_imagenet2012}/eval.py    | 0
 .../lr_generator.py                                               | 0
 .../run_distribute_train.sh                                       | 0
 .../{resnet101_imagenet => resnet101_imagenet2012}/run_infer.sh   | 0
 .../run_standalone_train.sh                                       | 0
 example/{resnet101_imagenet => resnet101_imagenet2012}/train.py   | 0
 .../{resnet101_imagenet => resnet101_imagenet2012}/var_init.py    | 0
 11 files changed, 0 insertions(+), 0 deletions(-)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/README.md (100%)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/config.py (100%)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/crossentropy.py (100%)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/dataset.py (100%)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/eval.py (100%)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/lr_generator.py (100%)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/run_distribute_train.sh (100%)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/run_infer.sh (100%)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/run_standalone_train.sh (100%)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/train.py (100%)
 rename example/{resnet101_imagenet => resnet101_imagenet2012}/var_init.py (100%)

diff --git a/example/resnet101_imagenet/README.md b/example/resnet101_imagenet2012/README.md
similarity index 100%
rename from example/resnet101_imagenet/README.md
rename to example/resnet101_imagenet2012/README.md
diff --git a/example/resnet101_imagenet/config.py b/example/resnet101_imagenet2012/config.py
similarity index 100%
rename from example/resnet101_imagenet/config.py
rename to example/resnet101_imagenet2012/config.py
diff --git a/example/resnet101_imagenet/crossentropy.py b/example/resnet101_imagenet2012/crossentropy.py
similarity index 100%
rename from example/resnet101_imagenet/crossentropy.py
rename to example/resnet101_imagenet2012/crossentropy.py
diff --git a/example/resnet101_imagenet/dataset.py b/example/resnet101_imagenet2012/dataset.py
similarity index 100%
rename from example/resnet101_imagenet/dataset.py
rename to example/resnet101_imagenet2012/dataset.py
diff --git a/example/resnet101_imagenet/eval.py b/example/resnet101_imagenet2012/eval.py
similarity index 100%
rename from example/resnet101_imagenet/eval.py
rename to example/resnet101_imagenet2012/eval.py
diff --git a/example/resnet101_imagenet/lr_generator.py b/example/resnet101_imagenet2012/lr_generator.py
similarity index 100%
rename from example/resnet101_imagenet/lr_generator.py
rename to example/resnet101_imagenet2012/lr_generator.py
diff --git a/example/resnet101_imagenet/run_distribute_train.sh b/example/resnet101_imagenet2012/run_distribute_train.sh
similarity index 100%
rename from example/resnet101_imagenet/run_distribute_train.sh
rename to example/resnet101_imagenet2012/run_distribute_train.sh
diff --git a/example/resnet101_imagenet/run_infer.sh b/example/resnet101_imagenet2012/run_infer.sh
similarity index 100%
rename from example/resnet101_imagenet/run_infer.sh
rename to example/resnet101_imagenet2012/run_infer.sh
diff --git a/example/resnet101_imagenet/run_standalone_train.sh b/example/resnet101_imagenet2012/run_standalone_train.sh
similarity index 100%
rename from example/resnet101_imagenet/run_standalone_train.sh
rename to example/resnet101_imagenet2012/run_standalone_train.sh
diff --git a/example/resnet101_imagenet/train.py b/example/resnet101_imagenet2012/train.py
similarity index 100%
rename from example/resnet101_imagenet/train.py
rename to example/resnet101_imagenet2012/train.py
diff --git a/example/resnet101_imagenet/var_init.py b/example/resnet101_imagenet2012/var_init.py
similarity index 100%
rename from example/resnet101_imagenet/var_init.py
rename to example/resnet101_imagenet2012/var_init.py

From 73917bf2d176b250c3b7ca8f9a0014953d23c21e Mon Sep 17 00:00:00 2001
From: wukesong <wukesong1@huawei.com>
Date: Tue, 28 Apr 2020 11:21:06 +0800
Subject: [PATCH 143/242] maxpool-alexnet

---
 mindspore/model_zoo/alexnet.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mindspore/model_zoo/alexnet.py b/mindspore/model_zoo/alexnet.py
index 8cd316229c..7ad1c8e37b 100644
--- a/mindspore/model_zoo/alexnet.py
+++ b/mindspore/model_zoo/alexnet.py
@@ -15,6 +15,7 @@
 """Alexnet."""
 import mindspore.nn as nn
 from mindspore.common.initializer import TruncatedNormal
+from mindspore.ops import operations as P
 
 def conv(in_channels, out_channels, kernel_size, stride=1, padding=0, pad_mode="valid"):
     weight = weight_variable()
@@ -44,7 +45,7 @@ class AlexNet(nn.Cell):
         self.conv4 = conv(384, 384, 3, pad_mode="same")
         self.conv5 = conv(384, 256, 3, pad_mode="same")
         self.relu = nn.ReLU()
-        self.max_pool2d = nn.MaxPool2d(kernel_size=3, stride=2)
+        self.max_pool2d = P.MaxPool(ksize=3, strides=2)
         self.flatten = nn.Flatten()
         self.fc1 = fc_with_initialize(6*6*256, 4096)
         self.fc2 = fc_with_initialize(4096, 4096)

From c3807c17c9c04ec7799fde081f66ab1f259d437b Mon Sep 17 00:00:00 2001
From: caojian05 <caojian5@huawei.com>
Date: Tue, 28 Apr 2020 11:41:20 +0800
Subject: [PATCH 144/242] add distribute train README for vgg16

---
 example/vgg16_cifar10/README.md | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/example/vgg16_cifar10/README.md b/example/vgg16_cifar10/README.md
index c324673dcc..d41f373a8b 100644
--- a/example/vgg16_cifar10/README.md
+++ b/example/vgg16_cifar10/README.md
@@ -49,6 +49,24 @@ You will get the accuracy as following:
 result: {'acc': 0.92}
 ```
 
+### Distribute Training
+```
+sh run_distribute_train.sh rank_table.json your_data_path
+```
+The above shell script will run distribute training in the background, you can view the results through the file `train_parallel[X]/log`.
+
+You will get the loss value as following:
+```
+# grep "result: " train_parallel*/log
+train_parallel0/log:epoch: 1 step: 97, loss is 1.9060308
+train_parallel0/log:epcoh: 2 step: 97, loss is 1.6003821
+...
+train_parallel1/log:epoch: 1 step: 97, loss is 1.7095519
+train_parallel1/log:epcoh: 2 step: 97, loss is 1.7133579
+...
+...
+```
+> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).
 
 ## Usage:
 
@@ -75,4 +93,14 @@ parameters/options:
   --data_path           the storage path of datasetd 
   --device_id           the device which used to evaluate model.
   --checkpoint_path     the checkpoint file path used to evaluate model.
-```
\ No newline at end of file
+```
+
+### Distribute Training
+
+```
+Usage: sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [DATA_PATH]
+
+parameters/options:
+  MINDSPORE_HCCL_CONFIG_PATH   HCCL configuration file path.
+  DATA_PATH                    the storage path of dataset.
+```

From 5a085da8dd688cef17baa9df7c712838b8c21d61 Mon Sep 17 00:00:00 2001
From: zhousiyi <zhousiyi@huawei.com>
Date: Tue, 28 Apr 2020 03:51:06 +0000
Subject: [PATCH 145/242] compare context with pointer

---
 .../static_analysis/abstract_value.cc         |  3 +++
 .../static_analysis/analysis_context.cc       | 25 +++++++++++++++++++
 .../static_analysis/analysis_context.h        | 16 ++++++------
 .../static_analysis/static_analysis.cc        |  5 +++-
 .../static_analysis/static_analysis.h         |  7 ++++--
 5 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc
index 210257ea53..f46532ed43 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.cc
@@ -997,6 +997,9 @@ bool AbstractBasePtrListDeepEqual(const AbstractBasePtrList &lhs, const Abstract
   for (std::size_t i = 0; i < size; i++) {
     MS_EXCEPTION_IF_NULL(lhs[i]);
     MS_EXCEPTION_IF_NULL(rhs[i]);
+    if (lhs[i] == rhs[i]) {
+      continue;
+    }
     if (!(*lhs[i] == *rhs[i])) {
       return false;
     }
diff --git a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc b/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc
index cd68268118..39a1da5e0f 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/analysis_context.cc
@@ -23,6 +23,24 @@
 
 namespace mindspore {
 namespace abstract {
+AnalysisContextPtr AnalysisContext::NewContext(AnalysisContextPtr parent, FuncGraphPtr fg,
+                                               const AbstractBasePtrList &args_spec_list) {
+  auto children_context_map_iter = parent->children_cache_.find(fg);
+  if (children_context_map_iter != parent->children_cache_.end()) {
+    auto children_context_map = children_context_map_iter->second;
+    auto children_context_iter = children_context_map.find(args_spec_list);
+    if (children_context_iter != children_context_map.end()) {
+      return children_context_iter->second.lock();
+    }
+  }
+  AnalysisContextPtr context_new = std::make_shared<AnalysisContext>(parent, fg, args_spec_list);
+  // Reference to myself, so use weak_ptr to break reference cycle.
+  auto weak_context = std::weak_ptr<AnalysisContext>(context_new);
+  context_new->parent_cache_[fg] = weak_context;
+  parent->children_cache_[fg][args_spec_list] = weak_context;
+  return context_new;
+}
+
 AnalysisContextPtr AnalysisContext::NewFuncGraphContext(const FuncGraphPtr &func_graph,
                                                         const AbstractBasePtrList &args_spec_list) {
   FuncGraphPtr graph_parent = func_graph->parent();
@@ -89,6 +107,13 @@ AnalysisContextPtr AnalysisContext::DummyContext() {
   return dummy_context;
 }
 
+bool AnalysisContext::IsDummyContext() {
+  if (parent_ == nullptr && func_graph_ == nullptr && args_spec_list_.empty()) {
+    return true;
+  }
+  return false;
+}
+
 const AnalysisContextPtr kDummyAnalysisContext =
   std::make_shared<AnalysisContext>(nullptr, nullptr, AbstractBasePtrList());
 
diff --git a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h b/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h
index 0fb043674c..c0b3403702 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h
+++ b/mindspore/ccsrc/pipeline/static_analysis/analysis_context.h
@@ -28,6 +28,11 @@
 
 namespace mindspore {
 namespace abstract {
+class AnalysisContext;
+using AnalysisContextWeakPtr = std::weak_ptr<AnalysisContext>;
+using ArgsSpecToAnalysisContextMap =
+  std::unordered_map<AbstractBasePtrList, AnalysisContextWeakPtr, AbstractBasePtrListHasher, AbstractBasePtrListEqual>;
+
 // AnalysisContext will be stored in Config in AnalysisCache.
 class AnalysisContext {
  public:
@@ -41,12 +46,7 @@ class AnalysisContext {
   ~AnalysisContext() = default;
 
   // Helper function to wrapper constructor to save shared_ptr in parent_cache.
-  AnalysisContextPtr NewContext(AnalysisContextPtr parent, FuncGraphPtr fg, const AbstractBasePtrList &args_spec_list) {
-    AnalysisContextPtr context_new = std::make_shared<AnalysisContext>(parent, fg, args_spec_list);
-    // Reference to myself, so use weak_ptr to break reference cycle.
-    context_new->parent_cache_[fg] = std::weak_ptr<AnalysisContext>(context_new);
-    return context_new;
-  }
+  AnalysisContextPtr NewContext(AnalysisContextPtr parent, FuncGraphPtr fg, const AbstractBasePtrList &args_spec_list);
 
   // Extend this context with values for another graph.
   AnalysisContextPtr NewFuncGraphContext(const FuncGraphPtr &func_graph, const AbstractBasePtrList &args_spec_list);
@@ -56,6 +56,7 @@ class AnalysisContext {
   bool operator==(const AnalysisContext &other) const;
   std::size_t hash();
   static AnalysisContextPtr DummyContext();
+  bool IsDummyContext();
   FuncGraphPtr func_graph() const { return func_graph_; }
   AnalysisContextPtr parent() const { return parent_; }
   std::string ToString() const;
@@ -66,7 +67,8 @@ class AnalysisContext {
   AnalysisContextPtr parent_;
   FuncGraphPtr func_graph_;
   AbstractBasePtrList args_spec_list_;
-  std::unordered_map<FuncGraphPtr, std::weak_ptr<AnalysisContext>> parent_cache_;
+  std::unordered_map<FuncGraphPtr, AnalysisContextWeakPtr> parent_cache_;
+  std::unordered_map<FuncGraphPtr, ArgsSpecToAnalysisContextMap> children_cache_;
 };
 
 struct ContextHasher {
diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
index 69deaa1ec1..c5ee7447f1 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.cc
@@ -87,7 +87,10 @@ AbstractBasePtr AnalysisCache::GetValue(const AnfNodeConfigPtr &conf) {
 std::size_t AnfNodeConfigHasher::operator()(const AnfNodeConfigPtr conf) const {
   MS_EXCEPTION_IF_NULL(conf);
   MS_EXCEPTION_IF_NULL(conf->node());
-  std::size_t hash_value = hash_combine(conf->node()->hash(), conf->context()->hash());
+  std::size_t hash_value = conf->node()->hash();
+  if (!conf->context()->IsDummyContext()) {
+    hash_value = hash_combine(hash_value, std::hash<AnalysisContext *>{}(conf->context().get()));
+  }
   if (conf->context() != nullptr && conf->context()->func_graph() != nullptr) {
     MS_LOG(DEBUG) << "NodeConfigHasher Node: " << conf->node()->DebugString()
                   << ", Graph: " << conf->context()->func_graph()->ToString() << " ### , hash value: " << hash_value;
diff --git a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h
index 80c6320493..beffb9ee70 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h
+++ b/mindspore/ccsrc/pipeline/static_analysis/static_analysis.h
@@ -83,9 +83,12 @@ class AnfNodeConfig : public Config {
 
   // used by unordered_map;
   bool operator==(const AnfNodeConfig &other) const {
-    // compare node with pointer, context with content;
+    // compare node with pointer, context with pointer except DummyContext as it's created by make_shared;
     // context should not be nullptr;
-    return (node_ == other.node_) && (*context_ == *other.context_);
+    if (context_->IsDummyContext() && other.context_->IsDummyContext()) {
+      return true;
+    }
+    return (node_ == other.node_) && (context_ == other.context_);
   }
 
   std::string ToString() const override {

From eb60ef00d1d2d3d8596cf44f4a530d60cee20a9a Mon Sep 17 00:00:00 2001
From: yanghaitao <yanghaitao1@huawei.com>
Date: Tue, 28 Apr 2020 14:37:25 +0800
Subject: [PATCH 146/242] More understandable err msg when num rows is zero

---
 .../engine/datasetops/source/celeba_op.cc      | 18 ++++++++++++------
 .../engine/datasetops/source/cifar_op.cc       | 15 ++++++++++++---
 .../datasetops/source/image_folder_op.cc       |  8 ++++++--
 .../engine/datasetops/source/manifest_op.cc    | 14 ++++++++++----
 .../engine/datasetops/source/mnist_op.cc       |  8 ++++++--
 .../engine/datasetops/source/text_file_op.cc   |  4 +++-
 .../engine/datasetops/source/tf_reader_op.cc   |  4 +++-
 .../dataset/engine/datasetops/source/voc_op.cc |  8 ++++++--
 8 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
index 87a7b3c687..2394380ea4 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
@@ -33,8 +33,8 @@ CelebAOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr)
 }
 
 Status CelebAOp::Builder::Build(std::shared_ptr<CelebAOp> *op) {
-  MS_LOG(INFO) << "Celeba dataset directory is " << builder_dir_.c_str() << ".";
-  MS_LOG(INFO) << "Celeba dataset type is " << builder_dataset_type_.c_str() << ".";
+  MS_LOG(DEBUG) << "Celeba dataset directory is " << builder_dir_.c_str() << ".";
+  MS_LOG(DEBUG) << "Celeba dataset type is " << builder_dataset_type_.c_str() << ".";
   RETURN_IF_NOT_OK(SanityCheck());
   if (builder_sampler_ == nullptr) {
     builder_sampler_ = std::make_shared<SequentialSampler>();
@@ -240,9 +240,11 @@ Status CelebAOp::ParseImageAttrInfo() {
   num_rows_exact_ = image_labels_vec_.size();
   num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_exact_) ? num_rows_exact_ : num_samples_;
   if (num_rows_exact_ == 0) {
-    RETURN_STATUS_UNEXPECTED("Number of rows in celeba dataset is zero");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API CelebADataset.Please check file path or dataset API "
+      "validation first.");
   }
-  MS_LOG(INFO) << "Celeba dataset rows number is " << num_rows_exact_ << ".";
+  MS_LOG(DEBUG) << "Celeba dataset rows number is " << num_rows_exact_ << ".";
   return Status::OK();
 }
 
@@ -267,7 +269,9 @@ std::vector<std::string> CelebAOp::Split(const std::string &line) {
 // Derived from RandomAccessOp
 Status CelebAOp::GetNumSamples(int64_t *num) const {
   if (num == nullptr || num_samples_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumSample not set");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API CelebADataset.Please check file path or dataset API "
+      "validation first.");
   }
   (*num) = num_samples_;
   return Status::OK();
@@ -275,7 +279,9 @@ Status CelebAOp::GetNumSamples(int64_t *num) const {
 
 Status CelebAOp::GetNumRowsInDataset(int64_t *num) const {
   if (num == nullptr || num_rows_exact_ == 0) {
-    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "NumRow not set");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API CelebADataset.Please check file path or dataset API "
+      "validation first.");
   }
 
   *num = num_rows_exact_;
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
index 60de5a6bdf..0c2d57ff42 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
@@ -247,7 +247,10 @@ Status CifarOp::InitSampler() {
 // Derived from RandomAccessOp
 Status CifarOp::GetNumSamples(int64_t *num) const {
   if (num == nullptr || num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumRow not set");
+    std::string api = cifar_type_ == kCifar10 ? "Cifar10Dataset" : "Cifar100Dataset";
+    std::string err_msg = "There is no valid data matching the dataset API " + api +
+                          ".Please check file path or dataset API validation first.";
+    RETURN_STATUS_UNEXPECTED(err_msg);
   }
   (*num) = num_samples_;
   return Status::OK();
@@ -256,7 +259,10 @@ Status CifarOp::GetNumSamples(int64_t *num) const {
 // Derived from RandomAccessOp
 Status CifarOp::GetNumRowsInDataset(int64_t *num) const {
   if (num == nullptr || num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumRow not set");
+    std::string api = cifar_type_ == kCifar10 ? "Cifar10Dataset" : "Cifar100Dataset";
+    std::string err_msg = "There is no valid data matching the dataset API " + api +
+                          ".Please check file path or dataset API validation first.";
+    RETURN_STATUS_UNEXPECTED(err_msg);
   }
   (*num) = num_rows_;
   return Status::OK();
@@ -389,7 +395,10 @@ Status CifarOp::ParseCifarData() {
   num_rows_ = cifar_image_label_pairs_.size();
   num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_;
   if (num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("Init Cifar failed, not a single row read from dataset!");
+    std::string api = cifar_type_ == kCifar10 ? "Cifar10Dataset" : "Cifar100Dataset";
+    std::string err_msg = "There is no valid data matching the dataset API " + api +
+                          ".Please check file path or dataset API validation first.";
+    RETURN_STATUS_UNEXPECTED(err_msg);
   }
   cifar_raw_data_block_->Reset();
   return Status::OK();
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
index 0ac579a865..32d7171c8f 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
@@ -265,7 +265,9 @@ Status ImageFolderOp::InitSampler() {
 // Derived from RandomAccessOp
 Status ImageFolderOp::GetNumSamples(int64_t *num) const {
   if (num == nullptr || num_samples_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumRow not set");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API ImageFolderDatasetV2.Please check file path or dataset API "
+      "validation first.");
   }
   (*num) = num_samples_;
   return Status::OK();
@@ -274,7 +276,9 @@ Status ImageFolderOp::GetNumSamples(int64_t *num) const {
 // Derived from RandomAccessOp
 Status ImageFolderOp::GetNumRowsInDataset(int64_t *num) const {
   if (num == nullptr || num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumRow not set");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API ImageFolderDatasetV2.Please check file path or dataset API "
+      "validation first.");
   }
   (*num) = num_rows_;
   return Status::OK();
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
index 0139af4d9d..ab0c012416 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
@@ -261,7 +261,9 @@ Status ManifestOp::InitSampler() {
 // Derived from RandomAccessOp
 Status ManifestOp::GetNumSamples(int64_t *num) const {
   if (num == nullptr || num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumRow not set");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API ManifestDataset.Please check file path or dataset API "
+      "validation first.");
   }
   (*num) = num_samples_;
   return Status::OK();
@@ -270,7 +272,9 @@ Status ManifestOp::GetNumSamples(int64_t *num) const {
 // Derived from RandomAccessOp
 Status ManifestOp::GetNumRowsInDataset(int64_t *num) const {
   if (num == nullptr || num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumRow not set");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API ManifestDataset.Please check file path or dataset API "
+      "validation first.");
   }
   (*num) = num_rows_;
   return Status::OK();
@@ -279,7 +283,7 @@ Status ManifestOp::GetNumRowsInDataset(int64_t *num) const {
 // Derived from RandomAccessOp
 Status ManifestOp::GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const {
   if (cls_ids == nullptr || !cls_ids->empty() || image_labelname_.empty()) {
-    RETURN_STATUS_UNEXPECTED("Number rows is 0");
+    RETURN_STATUS_UNEXPECTED("Class indexing is invalid.");
   }
 
   for (size_t i = 0; i < image_labelname_.size(); i++) {
@@ -395,7 +399,9 @@ Status ManifestOp::CountDatasetInfo() {
   num_rows_ = static_cast<int64_t>(image_labelname_.size());
   num_samples_ = (num_samples_ == 0 || num_samples_ > num_rows_) ? num_rows_ : num_samples_;
   if (num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("Number of rows is 0");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API ManifestDataset.Please check file path or dataset API "
+      "validation first.");
   }
   return Status::OK();
 }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
index 71900f8a91..fbf041e985 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
@@ -212,7 +212,9 @@ Status MnistOp::InitSampler() {
 // Derived from RandomAccessOp
 Status MnistOp::GetNumSamples(int64_t *num) const {
   if (num == nullptr || num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumRow not set");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API MnistDataset.Please check file path or dataset API "
+      "validation first.");
   }
   (*num) = num_samples_;
   return Status::OK();
@@ -436,7 +438,9 @@ Status MnistOp::CountTotalRows(const std::string &dir, int64_t numSamples, int64
 // Derived from RandomAccessOp
 Status MnistOp::GetNumRowsInDataset(int64_t *num) const {
   if (num == nullptr || num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumRow not set");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API MnistDataset.Please check file path or dataset API "
+      "validation first.");
   }
   (*num) = num_rows_;
   return Status::OK();
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
index 2b62616366..c5fb2b5648 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
@@ -438,7 +438,9 @@ Status TextFileOp::CalculateNumRowsPerShard() {
     all_num_rows_ += count;
   }
   if (all_num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("Number of rows can not be zero");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API TextFileDataset.Please check file path or dataset API "
+      "validation first.");
   }
 
   num_rows_per_shard_ = static_cast<int64_t>(std::ceil(all_num_rows_ * 1.0 / num_devices_));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
index a2f23aac75..50c60caa86 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
@@ -198,7 +198,9 @@ Status TFReaderOp::CalculateNumRowsPerShard() {
   }
   num_rows_per_shard_ = static_cast<int64_t>(std::ceil(num_rows_ * 1.0 / num_devices_));
   if (num_rows_per_shard_ == 0) {
-    RETURN_STATUS_UNEXPECTED("Number of rows can not be zero");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API TFRecordDataset.Please check file path or dataset API "
+      "validation first.");
   }
   return Status::OK();
 }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
index 1731ed14ba..e523aa84d6 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
@@ -147,7 +147,9 @@ Status VOCOp::Reset() {
 
 Status VOCOp::GetNumSamples(int64_t *num) const {
   if (num == nullptr || num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumRow not set");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API VOCDataset.Please check file path or dataset API "
+      "validation first.");
   }
   (*num) = num_samples_;
   return Status::OK();
@@ -261,7 +263,9 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co
 // Derived from RandomAccessOp
 Status VOCOp::GetNumRowsInDataset(int64_t *num) const {
   if (num == nullptr || num_rows_ == 0) {
-    RETURN_STATUS_UNEXPECTED("NumRow not set");
+    RETURN_STATUS_UNEXPECTED(
+      "There is no valid data matching the dataset API VOCDataset.Please check file path or dataset API "
+      "validation first.");
   }
   (*num) = num_rows_;
   return Status::OK();

From 73642ef3d3c2e496e6fa2d61da91c33b0a8a3e58 Mon Sep 17 00:00:00 2001
From: jinyaohui <jinyaohui@huawei.com>
Date: Tue, 28 Apr 2020 15:12:08 +0800
Subject: [PATCH 147/242] clean pylint

---
 mindspore/model_zoo/vgg.py                           |  1 +
 mindspore/nn/optim/ftrl.py                           |  1 -
 mindspore/nn/optim/lamb.py                           |  2 +-
 .../mindspore_test_framework/apps/test_bert_parts.py | 12 ++++++------
 tests/mindspore_test_framework/utils/block_util.py   |  1 +
 5 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/mindspore/model_zoo/vgg.py b/mindspore/model_zoo/vgg.py
index f3532fab13..66a73a2e50 100644
--- a/mindspore/model_zoo/vgg.py
+++ b/mindspore/model_zoo/vgg.py
@@ -61,6 +61,7 @@ class Vgg(nn.Cell):
 
     def __init__(self, base, num_classes=1000, batch_norm=False, batch_size=1):
         super(Vgg, self).__init__()
+        _ = batch_size
         self.layers = _make_layer(base, batch_norm=batch_norm)
         self.flatten = nn.Flatten()
         self.classifier = nn.SequentialCell([
diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py
index e6f658acae..ccc1b3f10b 100644
--- a/mindspore/nn/optim/ftrl.py
+++ b/mindspore/nn/optim/ftrl.py
@@ -14,7 +14,6 @@
 # ============================================================================
 """FTRL"""
 from mindspore.ops import functional as F, composite as C, operations as P
-from mindspore.common.parameter import Parameter
 from mindspore.common import Tensor
 import mindspore.common.dtype as mstype
 from mindspore._checkparam import Validator as validator
diff --git a/mindspore/nn/optim/lamb.py b/mindspore/nn/optim/lamb.py
index 01ec984453..cbeb6fa674 100755
--- a/mindspore/nn/optim/lamb.py
+++ b/mindspore/nn/optim/lamb.py
@@ -110,8 +110,8 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para
 
 def _check_param_value(decay_steps, warmup_steps, start_learning_rate,
                        end_learning_rate, power, beta1, beta2, eps, weight_decay, prim_name):
-
     """Check the type of inputs."""
+    _ = warmup_steps
     validator.check_float_positive('start_learning_rate', start_learning_rate, prim_name)
     validator.check_float_legal_value('start_learning_rate', start_learning_rate, prim_name)
     validator.check_float_positive('end_learning_rate', end_learning_rate, prim_name)
diff --git a/tests/mindspore_test_framework/apps/test_bert_parts.py b/tests/mindspore_test_framework/apps/test_bert_parts.py
index 944ea07842..dcc679b528 100644
--- a/tests/mindspore_test_framework/apps/test_bert_parts.py
+++ b/tests/mindspore_test_framework/apps/test_bert_parts.py
@@ -173,8 +173,8 @@ test_sets = [
                                                   embedding_size=768,
                                                   embedding_shape=[1, 128, 768],
                                                   use_one_hot_embeddings=True,
-                                                  initializer_range=0.02), 1, 1), {
-                      'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}),
+                                                  initializer_range=0.02), 1, 1),
+                  {'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}),
         'desc_inputs': [input_ids],
         'desc_bprop': [[128]]}),
     ('EmbeddingLookup_multi_outputs_init_param', {
@@ -182,8 +182,8 @@ test_sets = [
                                   embedding_size=768,
                                   embedding_shape=[1, 128, 768],
                                   use_one_hot_embeddings=False,
-                                  initializer_range=0.02), {
-                      'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}),
+                                  initializer_range=0.02),
+                  {'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}),
         'desc_inputs': [input_ids],
         'desc_bprop': [[1, 128, 768], [128]]}),
     ('EmbeddingLookup_multi_outputs_grad_with_no_sens', {
@@ -191,8 +191,8 @@ test_sets = [
                                   embedding_size=768,
                                   embedding_shape=[1, 128, 768],
                                   use_one_hot_embeddings=False,
-                                  initializer_range=0.02), {
-                      'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}),
+                                  initializer_range=0.02),
+                  {'init_param_with': lambda shp: np.ones(shp).astype(np.float32)}),
         'desc_inputs': [input_ids]}),
     ('GetMaskedLMOutput_grad_with_no_sens', {
         'block': GetMaskedLMOutput(BertConfig(batch_size=1)),
diff --git a/tests/mindspore_test_framework/utils/block_util.py b/tests/mindspore_test_framework/utils/block_util.py
index 75946c3559..0d59728584 100644
--- a/tests/mindspore_test_framework/utils/block_util.py
+++ b/tests/mindspore_test_framework/utils/block_util.py
@@ -69,6 +69,7 @@ class IthOutputCell(nn.Cell):
         return predict
 
 def get_output_cell(network, num_input, output_index, training=True):
+    _ = num_input
     net = IthOutputCell(network, output_index)
     set_block_training(net, training)
     return net

From 6cde5f6d91f44ce5b0171a4643f5a8b3ba2646d9 Mon Sep 17 00:00:00 2001
From: yao_yf <yaoyifan1@huawei.com>
Date: Mon, 27 Apr 2020 14:26:27 +0800
Subject: [PATCH 148/242] auto parallel strategy checkpoint

---
 mindspore/ccsrc/ir/primitive.h                |   6 +-
 mindspore/ccsrc/parallel/context.cc           |  10 +
 mindspore/ccsrc/parallel/context.h            |   7 +
 .../ccsrc/parallel/step_auto_parallel.cc      |  40 ++-
 mindspore/ccsrc/parallel/step_parallel.cc     |  85 +++---
 mindspore/ccsrc/parallel/step_parallel.h      |   2 +-
 .../parallel_strategy_checkpoint.cc           |  28 +-
 .../parallel_strategy_checkpoint.h            |  38 ++-
 mindspore/ccsrc/pipeline/init.cc              |   6 +
 mindspore/ccsrc/utils/node_strategy.proto     |   2 +-
 mindspore/context.py                          |   8 +-
 mindspore/ops/primitive.py                    |   2 +
 mindspore/parallel/_auto_parallel_context.py  |  45 ++-
 .../parallel_strategy_checkpoint_stub.cc      |   4 +-
 .../parallel/test_strategy_checkpoint.py      | 268 ++++++++----------
 15 files changed, 303 insertions(+), 248 deletions(-)

diff --git a/mindspore/ccsrc/ir/primitive.h b/mindspore/ccsrc/ir/primitive.h
index 73941c1058..08c6b7dc9b 100644
--- a/mindspore/ccsrc/ir/primitive.h
+++ b/mindspore/ccsrc/ir/primitive.h
@@ -52,7 +52,11 @@ class Primitive : public Named {
       : Named(name), signatures_(), prim_type_(prim_type) {}
 
   Primitive(const Primitive &prim)
-      : Named(prim), attrs_(prim.attrs_), signatures_(prim.signatures_), prim_type_(prim.prim_type_) {}
+      : Named(prim),
+        attrs_(prim.attrs_),
+        signatures_(prim.signatures_),
+        instance_name_(prim.instance_name_),
+        prim_type_(prim.prim_type_) {}
 
   MS_DECLARE_PARENT(Primitive, Named);
 
diff --git a/mindspore/ccsrc/parallel/context.cc b/mindspore/ccsrc/parallel/context.cc
index 4eb79772dd..9ba7efd60f 100644
--- a/mindspore/ccsrc/parallel/context.cc
+++ b/mindspore/ccsrc/parallel/context.cc
@@ -56,6 +56,8 @@ void ParallelContext::Reset() {
   parameter_broadcast_ = false;
   parameter_broadcast_is_set_ = false;
   enable_all_reduce_fusion_ = false;
+  strategy_ckpt_load_file_ = "";
+  strategy_ckpt_save_file_ = "";
 }
 
 void ParallelContext::set_device_num(int32_t device_num) {
@@ -103,6 +105,14 @@ void ParallelContext::set_parameter_broadcast(bool parameter_broadcast) {
   parameter_broadcast_is_set_ = true;
 }
 
+void ParallelContext::set_strategy_ckpt_load_file(const std::string &strategy_ckpt_load_file) {
+  strategy_ckpt_load_file_ = strategy_ckpt_load_file;
+}
+
+void ParallelContext::set_strategy_ckpt_save_file(const std::string &strategy_ckpt_save_file) {
+  strategy_ckpt_save_file_ = strategy_ckpt_save_file;
+}
+
 void ParallelContext::set_all_reduce_fusion_split_indices(const std::vector<uint32_t> indices) {
   all_reduce_fusion_split_indices_ = indices;
 }
diff --git a/mindspore/ccsrc/parallel/context.h b/mindspore/ccsrc/parallel/context.h
index 095a50f7b3..0e007c92c6 100644
--- a/mindspore/ccsrc/parallel/context.h
+++ b/mindspore/ccsrc/parallel/context.h
@@ -85,6 +85,11 @@ class ParallelContext {
   }
   bool enable_all_reduce_fusion() const { return enable_all_reduce_fusion_; }
 
+  void set_strategy_ckpt_load_file(const std::string &strategy_ckpt_load_file);
+  std::string strategy_ckpt_load_file() const { return strategy_ckpt_load_file_; }
+  void set_strategy_ckpt_save_file(const std::string &strategy_ckpt_save_file);
+  std::string strategy_ckpt_save_file() const { return strategy_ckpt_save_file_; }
+
   void Reset();
 
  private:
@@ -105,6 +110,8 @@ class ParallelContext {
   bool enable_all_reduce_fusion_;
   std::vector<uint32_t> all_reduce_fusion_split_indices_;
   std::vector<uint32_t> all_reduce_fusion_split_sizes_;
+  std::string strategy_ckpt_load_file_;
+  std::string strategy_ckpt_save_file_;
 };
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc
index 269e624efa..f0be47642e 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc
@@ -40,6 +40,7 @@
 #include "parallel/context.h"
 #include "parallel/ops_info/tmp_identity_info.h"
 #include "parallel/step_parallel.h"
+#include "parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
 #include "pipeline/parse/python_adapter.h"
 #include "pipeline/pipeline.h"
 
@@ -339,7 +340,7 @@ bool IsAutoParallelCareNode(const CNodePtr &cnode) {
   return IsParallelCareNode(cnode) && IsSplittableOperator(prim->name());
 }
 
-OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &cnode) {
+OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &cnode, StrategyMap *stra_map) {
   MS_EXCEPTION_IF_NULL(prim);
   MS_EXCEPTION_IF_NULL(cnode);
   auto attrs = prim->attrs();
@@ -385,9 +386,15 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &
   operator_info->set_input_value(input_value);
   operator_info->set_outputs_dtype(cnode->Type());
   operator_info->set_cnode(cnode);
+  // key of strategy map
+  std::string instance_name = prim->instance_name();
+  std::string strategy_key_name = cnode->scope()->name() + std::string(CONNSYMBOL) + instance_name;
+  bool load_strategy_from_ckpt =
+    StrategyCheckpoint::GetInstance().LoadCheckPointOn() && stra_map->find(strategy_key_name) != stra_map->end();
   // If no strategy has been configured for this operator, then candidate strategies are generated for
-  // auto-strategy searching; if this primitive is CAST, we ignore the user-specified strategy
-  if (!StrategyFound(attrs) || prim->name() == CAST) {
+  // auto-strategy searching; if this primitive is CAST, we ignore the user-specified strategy.
+  // if strategy is set to load from checkpoint, it is prefer to load strategy from checkpoint .
+  if ((!StrategyFound(attrs) || prim->name() == CAST) && !load_strategy_from_ckpt) {
     // Compute split_flag_list_, indicating which input has batch dimension. This is ONLY used for preparation for
     // BatchParallelInfo operator
     operator_info->ComputeBatchSplitFlagList();
@@ -397,7 +404,12 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &
     }
   } else {
     // In this case, the configured strategy should be extracted to help setting cost
-    StrategyPtr strategyPtr = parallel::ExtractStrategy(attrs);
+    StrategyPtr strategyPtr;
+    if (load_strategy_from_ckpt) {
+      strategyPtr = (*stra_map)[strategy_key_name];
+    } else {
+      strategyPtr = parallel::ExtractStrategy(attrs);
+    }
     if (strategyPtr != nullptr) {
       if (prim->name() == RESHAPE) {
         MS_LOG(EXCEPTION) << "Setting strategy for Reshape goes for nothing!";
@@ -433,7 +445,13 @@ Status ConstructCostGraphNodesByUniqueId(const std::vector<AnfNodePtr> &all_node
   entire_costgraph->SetDeviceMemoryAndCostParameter();
   // The map from CNode's UniqueId to its operatorInfo
   std::map<std::string, OperatorInfoPtr> from_cnode_to_info;
-
+  // extract strategy from checkpoint for multi-train
+  StrategyMap stra_map;
+  if (StrategyCheckpoint::GetInstance().LoadCheckPointOn()) {
+    if (StrategyCheckpoint::GetInstance().Load(&stra_map) != SUCCESS) {
+      MS_LOG(EXCEPTION) << "Load strategy checkpoint failed";
+    }
+  }
   // Step 1
   for (auto &node : all_nodes) {
     // NOTE: we only care about splittable Primitive operators
@@ -451,7 +469,7 @@ Status ConstructCostGraphNodesByUniqueId(const std::vector<AnfNodePtr> &all_node
 
     auto search_cnode = from_cnode_to_info.find(cnode->UniqueId());
     if (search_cnode == from_cnode_to_info.end()) {
-      auto operator_info = CreateTheOperatorInfo(prim, cnode);
+      auto operator_info = CreateTheOperatorInfo(prim, cnode, &stra_map);
       if (operator_info == nullptr) {
         return FAILED;
       }
@@ -486,7 +504,13 @@ Status ConstructCostGraphNodesByUniqueIdTC(const std::vector<AnfNodePtr> &all_no
   entire_costgraph->SetDeviceMemoryAndCostParameter();
   // The map from CNode's UniqueIdThroughCopy to its operatorInfo
   std::map<std::string, OperatorInfoPtr> from_cnode_to_info;
-
+  // extract strategy from checkpoint for multi-train
+  StrategyMap stra_map;
+  if (StrategyCheckpoint::GetInstance().LoadCheckPointOn()) {
+    if (StrategyCheckpoint::GetInstance().Load(&stra_map) != SUCCESS) {
+      MS_LOG(EXCEPTION) << "Load strategy checkpoint failed";
+    }
+  }
   for (auto &node : all_nodes) {
     // NOTE: we only care about splittable Primitive operators
     auto cnode = node->cast<CNodePtr>();
@@ -504,7 +528,7 @@ Status ConstructCostGraphNodesByUniqueIdTC(const std::vector<AnfNodePtr> &all_no
     auto search_cnode = from_cnode_to_info.find(cnode->UniqueIdThroughCopy());
     if (search_cnode == from_cnode_to_info.end()) {
       // In this case, the corresponding OperatorInfo is not created, create the new one.
-      auto operator_info = CreateTheOperatorInfo(prim, cnode);
+      auto operator_info = CreateTheOperatorInfo(prim, cnode, &stra_map);
       if (operator_info == nullptr) {
         return FAILED;
       }
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index 17a6228552..62fb96c297 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -1378,6 +1378,13 @@ void SetVirtualDatasetStrategy(const CNodePtr &node) {
 }
 
 void ExtractInformation(const std::vector<AnfNodePtr> &all_nodes) {
+  // load strategy map from checkpoint
+  StrategyMap stra_map;
+  if (StrategyCheckpoint::GetInstance().LoadCheckPointOn()) {
+    if (StrategyCheckpoint::GetInstance().Load(&stra_map) != SUCCESS) {
+      MS_LOG(EXCEPTION) << "Load strategy checkpoint failed";
+    }
+  }
   for (auto &node : all_nodes) {
     auto cnode = node->cast<CNodePtr>();
     if ((cnode == nullptr) || !IsValueNode<Primitive>(cnode->input(0))) {
@@ -1414,7 +1421,14 @@ void ExtractInformation(const std::vector<AnfNodePtr> &all_nodes) {
         (void)cnode->set_operator_info(operator_);
         continue;
       }
-      if (!StrategyFound(attrs)) {
+      // load strategy checkpoint
+      // key of strategy map
+      std::string instance_name = prim->instance_name();
+      std::string strategy_key_name = cnode->scope()->name() + std::string(CONNSYMBOL) + instance_name;
+      bool load_strategy_from_ckpt =
+        StrategyCheckpoint::GetInstance().LoadCheckPointOn() && stra_map.find(strategy_key_name) != stra_map.end();
+
+      if (!StrategyFound(attrs) && !load_strategy_from_ckpt) {
         MS_LOG(INFO) << "ExtractInformation: the strategy of node " << node->ToString() << " prim " << prim->name()
                      << " is empty, using batch parallel";
         std::shared_ptr<std::vector<Dimensions>> strategy_v_ptr = operator_->GenerateBatchStrategies();
@@ -1432,6 +1446,8 @@ void ExtractInformation(const std::vector<AnfNodePtr> &all_nodes) {
         MS_LOG(INFO) << "node " << node->ToString() << " prim " << prim->name() << " batch parallel strategy is "
                      << attrs[GEN_STRATEGY]->ToString();
         strategyPtr = NewStrategy(0, *strategy_v_ptr);
+      } else if (load_strategy_from_ckpt) {
+        strategyPtr = stra_map[strategy_key_name];
       } else {
         strategyPtr = ExtractStrategy(attrs);
       }
@@ -2022,53 +2038,29 @@ void HandleSymbolicKeyInstance(const FuncGraphPtr &root, const std::vector<AnfNo
   }
 }
 
-void CheckpointStrategy(const FuncGraphPtr &func_graph) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  MS_LOG(INFO) << "Save strategy to checkpoint begin";
-  StrategyMap straMap;
-  auto ret = func_graph->get_return();
-  auto all_nodes = DeepScopedGraphSearch(ret);
-  for (auto &node : all_nodes) {
-    MS_EXCEPTION_IF_NULL(node);
-    auto cnode = node->cast<CNodePtr>();
-    if ((cnode == nullptr) || !IsValueNode<Primitive>(cnode->input(0))) {
-      continue;
-    }
-    PrimitivePtr prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-    MS_EXCEPTION_IF_NULL(prim);
-    OperatorInfoPtr operator_info = cnode->operator_info();
-    if (operator_info) {
-      if (prim->instance_name().empty()) {
-        continue;
+bool NodeWithParameter(const CNodePtr &node) {
+  std::vector<AnfNodePtr> node_inputs{node->inputs()};
+  for (auto input : node_inputs) {
+    if (input->isa<Parameter>()) {
+      auto input_parameter = input->cast<ParameterPtr>();
+      if (input_parameter->has_default()) {
+        return py::cast<bool>(parse::python_adapter::GetPyObjAttr(input_parameter->default_param(), "requires_grad"));
       }
-      std::string instance_name = prim->instance_name();
-      StrategyPtr strategyPtr = operator_info->strategy();
-      MS_EXCEPTION_IF_NULL(node->scope());
-      std::string node_name = node->scope()->name() + std::string(CONNSYMBOL) + instance_name;
-      straMap[node_name] = strategyPtr;
     }
   }
-  if (StrategyCheckpoint::GetInstance().Save(straMap) != SUCCESS) {
-    MS_LOG(EXCEPTION) << "Save strategy checkpoint failed";
-  }
+  return false;
 }
 
-void RestoreStrategy(const FuncGraphPtr &func_graph) {
+void CheckpointStrategy(const FuncGraphPtr &func_graph) {
   MS_EXCEPTION_IF_NULL(func_graph);
-  MS_LOG(INFO) << "Extract strategy from checkpoint begin";
-  StrategyMap straMap;
-  if (StrategyCheckpoint::GetInstance().Load(&straMap) != SUCCESS) {
-    MS_LOG(EXCEPTION) << "Load strategy checkpoint failed";
-  }
-  if (StrategyCheckpoint::GetInstance().RemoveCheckPoint() != SUCCESS) {
-    MS_LOG(EXCEPTION) << "Remove strategy checkpoint failed";
-  }
+  MS_LOG(DEBUG) << "Save strategy to checkpoint begin";
+  StrategyMap stra_map;
   auto ret = func_graph->get_return();
   auto all_nodes = DeepScopedGraphSearch(ret);
   for (auto &node : all_nodes) {
     MS_EXCEPTION_IF_NULL(node);
     auto cnode = node->cast<CNodePtr>();
-    if ((cnode == nullptr) || !IsValueNode<Primitive>(cnode->input(0))) {
+    if ((cnode == nullptr) || !IsValueNode<Primitive>(cnode->input(0)) || !NodeWithParameter(cnode)) {
       continue;
     }
     PrimitivePtr prim = GetValueNode<PrimitivePtr>(cnode->input(0));
@@ -2076,18 +2068,18 @@ void RestoreStrategy(const FuncGraphPtr &func_graph) {
     OperatorInfoPtr operator_info = cnode->operator_info();
     if (operator_info) {
       if (prim->instance_name().empty()) {
-        continue;
+        MS_LOG(EXCEPTION) << "Node with parameter to checkpoint strategy needs instance name";
       }
       std::string instance_name = prim->instance_name();
+      StrategyPtr strategyPtr = operator_info->strategy();
       MS_EXCEPTION_IF_NULL(node->scope());
       std::string node_name = node->scope()->name() + std::string(CONNSYMBOL) + instance_name;
-      MS_LOG(INFO) << "Node name is " << node_name;
-      if (straMap.find(node_name) != straMap.end()) {
-        StrategyPtr strategyPtr = straMap[node_name];
-        operator_info->set_strategy(strategyPtr);
-      }
+      stra_map[node_name] = strategyPtr;
     }
   }
+  if (StrategyCheckpoint::GetInstance().Save(stra_map) != SUCCESS) {
+    MS_LOG(EXCEPTION) << "Save strategy checkpoint failed";
+  }
 }
 
 void SetForwardFlag(const std::vector<AnfNodePtr> &all_nodes) {
@@ -2264,14 +2256,9 @@ bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer)
     // extract shape and strategy, set operator_info
     ExtractInformation(all_nodes);
     ReshapeInit(all_nodes);
-    // extract strategy from checkpoint for multi-train
-    if (StrategyCheckpoint::GetInstance().CheckPointOn() && StrategyCheckpoint::GetInstance().CheckPointExit()) {
-      RestoreStrategy(root);
-    }
   }
   // save strategy as checkpoint for multi-train
-  if (StrategyCheckpoint::GetInstance().CheckPointOn() &&
-      StrategyCheckpoint::GetInstance().GetCurrentTrainTime() < StrategyCheckpoint::GetInstance().GetTrainTimes()) {
+  if (StrategyCheckpoint::GetInstance().SaveCheckPointOn()) {
     CheckpointStrategy(root);
   }
 
diff --git a/mindspore/ccsrc/parallel/step_parallel.h b/mindspore/ccsrc/parallel/step_parallel.h
index 745794912b..c26f65ec65 100644
--- a/mindspore/ccsrc/parallel/step_parallel.h
+++ b/mindspore/ccsrc/parallel/step_parallel.h
@@ -135,7 +135,7 @@ void ReshapeInit(const std::vector<AnfNodePtr> &all_nodes);
 void ParallelCommunication(const FuncGraphPtr &root, const std::vector<AnfNodePtr> &all_nodes,
                            const FuncGraphManagerPtr &manager);
 
-void RestoreStrategy(const FuncGraphPtr &func_graph);
+bool NodeWithParameter(const CNodePtr &node);
 
 void CheckpointStrategy(const FuncGraphPtr &func_graph);
 
diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
index 981cf8a115..de10f4beb4 100644
--- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
+++ b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.cc
@@ -29,30 +29,32 @@ namespace mindspore {
 namespace parallel {
 StrategyCheckpoint &StrategyCheckpoint::GetInstance() {
   static StrategyCheckpoint instance = StrategyCheckpoint();
+  if (ParallelContext::GetInstance() != nullptr) {
+    instance.load_file_ = ParallelContext::GetInstance()->strategy_ckpt_load_file();
+    instance.load_checkpoint_on_ = !ParallelContext::GetInstance()->strategy_ckpt_load_file().empty();
+    instance.save_file_ = ParallelContext::GetInstance()->strategy_ckpt_save_file();
+    instance.save_checkpoint_on_ = !ParallelContext::GetInstance()->strategy_ckpt_save_file().empty();
+  }
   return instance;
 }
 
-bool StrategyCheckpoint::CheckPointExit() const {
-  std::ifstream fin(path_);
+bool StrategyCheckpoint::CheckPointExit(const std::string path) const {
+  std::ifstream fin(path);
   if (fin) {
     return true;
   }
   return false;
 }
 
-Status StrategyCheckpoint::RemoveCheckPoint() const {
-  if (std::remove(common::SafeCStr(path_)) == 0) {
-    return SUCCESS;
-  }
-  return FAILED;
-}
-
 Status StrategyCheckpoint::Load(StrategyMap *strategy_map) {
   if (strategy_map == nullptr) {
     MS_LOG(EXCEPTION) << "Failure:strategy_map is nullptr";
   }
+  if (!CheckPointExit(load_file_)) {
+    MS_LOG(EXCEPTION) << "CheckPoint file is not found";
+  }
   straspb::ParallelStrategyMap parallel_strategy_map;
-  std::fstream input(path_, std::ios::in | std::ios::binary);
+  std::fstream input(load_file_, std::ios::in | std::ios::binary);
   if (!parallel_strategy_map.ParseFromIstream(&input)) {
     MS_LOG(ERROR) << "Load strategy file failed";
     return FAILED;
@@ -77,14 +79,14 @@ Status StrategyCheckpoint::Load(StrategyMap *strategy_map) {
 
     StrategyPtr strategy = NewStrategy(stage, strategy_inputs);
     (*strategy_map)[node_name] = strategy;
-    current_train_time_ = (int32_t)parallel_strategy_map.train_time();
+    current_stage_ = (int32_t)parallel_strategy_map.current_stage();
   }
   return SUCCESS;
 }
 
 Status StrategyCheckpoint::Save(const StrategyMap &strategy_map) {
   straspb::ParallelStrategyMap parallel_strategy_map;
-  parallel_strategy_map.set_train_time(IntToUint(++current_train_time_));
+  parallel_strategy_map.set_current_stage(IntToUint(++current_stage_));
   for (auto &node_stra : strategy_map) {
     straspb::ParallelStrategyItem *parallel_strategy_item = parallel_strategy_map.add_parallel_strategy_item();
     MS_EXCEPTION_IF_NULL(parallel_strategy_item);
@@ -100,7 +102,7 @@ Status StrategyCheckpoint::Save(const StrategyMap &strategy_map) {
       }
     }
   }
-  std::fstream output(path_, std::ios::out | std::ios::trunc | std::ios::binary);
+  std::fstream output(save_file_, std::ios::out | std::ios::trunc | std::ios::binary);
   if (!parallel_strategy_map.SerializeToOstream(&output)) {
     MS_LOG(ERROR) << "Save strategy file failed";
     return FAILED;
diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
index c871ea6eef..0cf6229fa3 100644
--- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
+++ b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
@@ -21,43 +21,37 @@
 #include <unordered_map>
 #include "parallel/ops_info/ops_utils.h"
 #include "parallel/strategy.h"
+#include "parallel/context.h"
 
 namespace mindspore {
 namespace parallel {
-constexpr char DEFAULT_CHECKPOINT_PATH[] = "./strategys.ckpt";
 
 using StrategyMap = std::unordered_map<std::string, StrategyPtr>;
 class StrategyCheckpoint {
  public:
-  StrategyCheckpoint() : path_(DEFAULT_CHECKPOINT_PATH), current_train_time_(1) {
-    train_times_ = 1;
-    checkpoint_on_ = false;
-    const char *train_times_str = std::getenv("PARALLEL_TRAIN_TIMES");
-    if (train_times_str != nullptr && std::stoi(train_times_str) > 0) {
-      train_times_ = std::stoi(train_times_str);
-    }
-    const char *checkpoint_on_str = std::getenv("PARALLEL_CHECKPOINT_ON");
-    if (checkpoint_on_str != nullptr) {
-      checkpoint_on_ = (std::string(checkpoint_on_str) == "on");
-    }
+  StrategyCheckpoint() {
+    current_stage_ = 0;
+    load_file_ = "";
+    load_checkpoint_on_ = false;
+    save_file_ = "";
+    save_checkpoint_on_ = false;
   }
   ~StrategyCheckpoint() = default;
-  bool CheckPointExit() const;
-  Status RemoveCheckPoint() const;
+
   Status Load(StrategyMap *strategy_map);
   Status Save(const StrategyMap &strategy_map);
 
   static StrategyCheckpoint &GetInstance();
-  int32_t GetTrainTimes() const { return train_times_; }
-  int32_t GetCurrentTrainTime() const { return current_train_time_; }
-  bool CheckPointOn() const { return checkpoint_on_; }
+  bool LoadCheckPointOn() const { return load_checkpoint_on_; }
+  bool SaveCheckPointOn() const { return save_checkpoint_on_; }
 
  private:
-  std::string path_;
-  bool checkpoint_on_;
-  // total train times for a train, get from Environmental variable:TRAIN_TIME, please export it
-  int32_t train_times_;
-  int32_t current_train_time_;
+  std::string load_file_;
+  std::string save_file_;
+  bool load_checkpoint_on_;
+  bool save_checkpoint_on_;
+  bool CheckPointExit(const std::string path) const;
+  int32_t current_stage_;
 };
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pipeline/init.cc b/mindspore/ccsrc/pipeline/init.cc
index f5cacc7ed5..5c6727670b 100644
--- a/mindspore/ccsrc/pipeline/init.cc
+++ b/mindspore/ccsrc/pipeline/init.cc
@@ -191,6 +191,12 @@ PYBIND11_MODULE(_c_expression, m) {
     .def("get_parameter_broadcast_is_set", &ParallelContext::parameter_broadcast_is_set,
          "Get parameter broadcast is set.")
     .def("set_parameter_broadcast", &ParallelContext::set_parameter_broadcast, "Set parameter broadcast.")
+    .def("set_strategy_ckpt_load_file", &ParallelContext::set_strategy_ckpt_load_file,
+         "Set strategy checkpoint load file.")
+    .def("set_strategy_ckpt_save_file", &ParallelContext::set_strategy_ckpt_save_file,
+         "Set strategy checkpoint save file.")
+    .def("get_strategy_ckpt_load_file", &ParallelContext::strategy_ckpt_load_file, "Get strategy checkpoint load file.")
+    .def("get_strategy_ckpt_save_file", &ParallelContext::strategy_ckpt_save_file, "Get strategy checkpoint save file.")
     .def("reset", &ParallelContext::Reset, "Reset auto parallel context.");
 
   (void)py::class_<CostModelContext, std::shared_ptr<CostModelContext>>(m, "CostModelContext")
diff --git a/mindspore/ccsrc/utils/node_strategy.proto b/mindspore/ccsrc/utils/node_strategy.proto
index dc06482ba1..8ec25f21a6 100644
--- a/mindspore/ccsrc/utils/node_strategy.proto
+++ b/mindspore/ccsrc/utils/node_strategy.proto
@@ -33,6 +33,6 @@ message ParallelStrategyItem {
 }
 
 message ParallelStrategyMap {
-    required uint32 train_time = 1;
+    required uint32 current_stage = 1;
     repeated ParallelStrategyItem parallel_strategy_item = 2;
 }
\ No newline at end of file
diff --git a/mindspore/context.py b/mindspore/context.py
index 237b2143ed..7341db620a 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -404,7 +404,7 @@ def _context():
 
 
 @args_type_check(device_num=int, global_rank=int, mirror_mean=bool, cast_before_mirror=bool, parallel_mode=str,
-                 parameter_broadcast=bool)
+                 parameter_broadcast=bool, strategy_ckpt_load_file=str, strategy_ckpt_save_file=str)
 def set_auto_parallel_context(**kwargs):
     """
     Set auto parallel context.
@@ -436,6 +436,8 @@ def set_auto_parallel_context(**kwargs):
         parameter_broadcast (bool): Indicating whether to broadcast parameters before training.
                        "stand_alone", "semi_auto_parallel" and "auto_parallel" do not support parameter
                        broadcast. Default: False.
+        strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: ''
+        strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: ''
 
     Raises:
         ValueError: If input key is not attribute in auto parallel context.
@@ -447,6 +449,8 @@ def set_auto_parallel_context(**kwargs):
         >>> context.set_auto_parallel_context(cast_before_mirror=False)
         >>> context.set_auto_parallel_context(parallel_mode="auto_parallel")
         >>> context.set_auto_parallel_context(parameter_broadcast=False)
+        >>> context.set_auto_parallel_context(strategy_ckpt_load_file="./strategy_stage1.ckpt")
+        >>> context.set_auto_parallel_context(strategy_ckpt_save_file="./strategy_stage1.ckpt")
     """
     _set_auto_parallel_context(**kwargs)
 
@@ -477,6 +481,8 @@ def reset_auto_parallel_context():
     - cast_before_mirror: True.
     - parallel_mode: "stand_alone".
     - parameter_broadcast: False.
+    - strategy_ckpt_load_file: "".
+    - strategy_ckpt_save_file: "".
     """
     _reset_auto_parallel_context()
 
diff --git a/mindspore/ops/primitive.py b/mindspore/ops/primitive.py
index d281b4f76c..24c81003bd 100644
--- a/mindspore/ops/primitive.py
+++ b/mindspore/ops/primitive.py
@@ -88,6 +88,8 @@ class Primitive(Primitive_):
         for name in self.attrs:
             value = self.attrs[name]
             cloned.add_prim_attr(name, value)
+        if hasattr(self, 'instance_name'):
+            cloned.set_prim_instance_name(self.instance_name)
         return cloned
 
     def add_prim_attr(self, name, value):
diff --git a/mindspore/parallel/_auto_parallel_context.py b/mindspore/parallel/_auto_parallel_context.py
index 0608989d94..f3f8d443e9 100644
--- a/mindspore/parallel/_auto_parallel_context.py
+++ b/mindspore/parallel/_auto_parallel_context.py
@@ -208,6 +208,36 @@ class _AutoParallelContext:
         self.check_context_handle()
         return self._context_handle.get_parameter_broadcast()
 
+    def set_strategy_ckpt_load_file(self, strategy_ckpt_load_file):
+        """
+        Set strategy checkpoint load path.
+
+        Args:
+            strategy_ckpt_load_file (bool): Path to load parallel strategy checkpoint.
+        """
+        self.check_context_handle()
+        self._context_handle.set_strategy_ckpt_load_file(strategy_ckpt_load_file)
+
+    def get_strategy_ckpt_load_file(self):
+        """Get strategy checkpoint load path."""
+        self.check_context_handle()
+        return self._context_handle.get_strategy_ckpt_load_file()
+
+    def set_strategy_ckpt_save_file(self, strategy_ckpt_save_file):
+        """
+        Set strategy checkpoint save path.
+
+        Args:
+            strategy_ckpt_save_file (bool): Path to save parallel strategy checkpoint.
+        """
+        self.check_context_handle()
+        self._context_handle.set_strategy_ckpt_save_file(strategy_ckpt_save_file)
+
+    def get_strategy_ckpt_save_file(self):
+        """Get strategy checkpoint save path."""
+        self.check_context_handle()
+        return self._context_handle.get_strategy_ckpt_save_file()
+
     def get_parameter_broadcast_is_set(self):
         """Get parameter broadcast is set or not."""
         self.check_context_handle()
@@ -315,7 +345,9 @@ _set_auto_parallel_context_func_map = {
     "cast_before_mirror": auto_parallel_context().set_cast_before_mirror,
     "loss_repeated_mean": auto_parallel_context().set_loss_repeated_mean,
     "parallel_mode": auto_parallel_context().set_parallel_mode,
-    "parameter_broadcast": auto_parallel_context().set_parameter_broadcast}
+    "parameter_broadcast": auto_parallel_context().set_parameter_broadcast,
+    "strategy_ckpt_load_file": auto_parallel_context().set_strategy_ckpt_load_file,
+    "strategy_ckpt_save_file": auto_parallel_context().set_strategy_ckpt_save_file}
 
 
 _get_auto_parallel_context_func_map = {
@@ -325,11 +357,14 @@ _get_auto_parallel_context_func_map = {
     "cast_before_mirror": auto_parallel_context().get_cast_before_mirror,
     "loss_repeated_mean": auto_parallel_context().get_loss_repeated_mean,
     "parallel_mode": auto_parallel_context().get_parallel_mode,
-    "parameter_broadcast": auto_parallel_context().get_parameter_broadcast}
+    "parameter_broadcast": auto_parallel_context().get_parameter_broadcast,
+    "strategy_ckpt_load_file": auto_parallel_context().get_strategy_ckpt_load_file,
+    "strategy_ckpt_save_file": auto_parallel_context().get_strategy_ckpt_save_file}
 
 
 @args_type_check(device_num=int, global_rank=int, mirror_mean=bool, cast_before_mirror=bool,
-                 loss_repeated_mean=bool, parallel_mode=str, parameter_broadcast=bool)
+                 loss_repeated_mean=bool, parallel_mode=str, parameter_broadcast=bool,
+                 strategy_ckpt_load_file=str, strategy_ckpt_save_file=str)
 def _set_auto_parallel_context(**kwargs):
     """
     Set auto parallel context.
@@ -360,6 +395,8 @@ def _set_auto_parallel_context(**kwargs):
         parameter_broadcast (bool): Indicating whether to broadcast parameters before training.
                        "stand_alone", "semi_auto_parallel" and "auto_parallel" do not support parameter
                        broadcast. Default: False.
+        strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: ''
+        strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: ''
 
     Raises:
         ValueError: If input key is not attribute in auto parallel context.
@@ -400,5 +437,7 @@ def _reset_auto_parallel_context():
     - cast_before_mirror: True.
     - parallel_mode: "stand_alone".
     - parameter_broadcast: False.
+    - strategy_ckpt_load_file: ""
+    - strategy_ckpt_save_file: ""
     """
     auto_parallel_context().reset()
diff --git a/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc b/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc
index 73de5071cd..43d0dd4b3f 100644
--- a/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc
+++ b/tests/ut/cpp/stub/parallel_strategy_checkpoint/parallel_strategy_checkpoint_stub.cc
@@ -25,9 +25,7 @@ StrategyCheckpoint& StrategyCheckpoint::GetInstance() {
   return instance;
 }
 
-bool StrategyCheckpoint::CheckPointExit() const { return false; }
-
-Status StrategyCheckpoint::RemoveCheckPoint() const { return SUCCESS; }
+bool StrategyCheckpoint::CheckPointExit(const std::string path) const { return false; }
 
 Status StrategyCheckpoint::Load(StrategyMap* strategy_map) { return SUCCESS; }
 
diff --git a/tests/ut/python/parallel/test_strategy_checkpoint.py b/tests/ut/python/parallel/test_strategy_checkpoint.py
index 09f4a54cbf..89b6dd1dbb 100644
--- a/tests/ut/python/parallel/test_strategy_checkpoint.py
+++ b/tests/ut/python/parallel/test_strategy_checkpoint.py
@@ -14,10 +14,10 @@
 
 import numpy as np
 from mindspore import context
-from mindspore.context import set_auto_parallel_context
+from mindspore.context import set_auto_parallel_context, reset_auto_parallel_context
 import mindspore.nn as nn
 from mindspore.ops import operations as P
-from mindspore import Tensor
+from mindspore import Tensor, Parameter
 from tests.ut.python.ops.test_math_ops import VirtualLoss
 import mindspore as ms
 from mindspore.common.api import _executor
@@ -25,17 +25,15 @@ from mindspore.ops import composite as C
 
 
 # model_parallel test
-# export PARALLEL_CHECKPOINT_ON=on
-# export PARALLEL_TRAIN_TIMES=4
-def test_six_matmul():
+def test_six_matmul_save():
     class NetWithLoss(nn.Cell):
         def __init__(self, network):
             super(NetWithLoss, self).__init__()
             self.loss = VirtualLoss()
             self.network = network
 
-        def construct(self, x1, x2, x3, x4, x5, x6, x7):
-            predict = self.network(x1, x2, x3, x4, x5, x6, x7)
+        def construct(self, x1, x6):
+            predict = self.network(x1, x6)
             return self.loss(predict)
 
 
@@ -44,8 +42,8 @@ def test_six_matmul():
             super(GradWrap, self).__init__()
             self.network = network
 
-        def construct(self, x1, x2, x3, x4, x5, x6, x7):
-            return C.grad_all(self.network)(x1, x2, x3, x4, x5, x6, x7)
+        def construct(self, x1, x6):
+            return C.grad_all(self.network)(x1, x6)
 
     class Net(nn.Cell):
         def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6):
@@ -56,45 +54,46 @@ def test_six_matmul():
             self.matmul4 = P.MatMul().set_strategy(strategy4)
             self.matmul5 = P.MatMul().set_strategy(strategy5)
             self.matmul6 = P.MatMul().set_strategy(strategy6)
-
-        def construct(self, x1, x2, x3, x4, x5, x6, x7):
-            out = self.matmul1(x1, x2)
-            out = self.matmul2(out, x3)
-            out = self.matmul3(out, x4)
-            out = self.matmul4(out, x5)
-            out = self.matmul5(out, x6)
-            out = self.matmul6(out, x7)
+            self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1")
+            self.weight2 = Parameter(Tensor(np.ones([64, 64]), dtype=ms.float32), name="weight2")
+            self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3")
+            self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4")
+            self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5")
+
+        def construct(self, x1, x6):
+            out = self.matmul1(x1, self.weight1)
+            out = self.matmul2(out, self.weight2)
+            out = self.matmul3(out, self.weight3)
+            out = self.matmul4(out, self.weight4)
+            out = self.matmul5(out, self.weight5)
+            out = self.matmul6(out, x6)
             return out
 
-    set_auto_parallel_context(device_num=512, global_rank=0)
+    reset_auto_parallel_context()
+    set_auto_parallel_context(device_num=8, global_rank=0, strategy_ckpt_save_file="./strategy_stage1.ckpt")
     strategy1 = ((8, 1), (1, 1))
     strategy2 = ((1, 8), (8, 1))
     strategy3 = ((2, 2), (2, 2))
-    strategy4 = ((4, 2), (2, 4))
-    strategy5 = ((2, 4), (4, 2))
-    strategy6 = ((4, 4), (4, 4))
+    strategy4 = ((1, 1), (1, 8))
+    strategy5 = ((4, 2), (2, 1))
+    strategy6 = ((4, 1), (1, 2))
     net = GradWrap(NetWithLoss(Net(strategy1, strategy2, strategy3, strategy4, strategy5, strategy6)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    x1 = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    x2 = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    x3 = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    x4 = Tensor(np.ones([64, 128]), dtype=ms.float32)
-    x5 = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    x6 = Tensor(np.ones([64, 32]), dtype=ms.float32)
-    x7 = Tensor(np.ones([32, 32]), dtype=ms.float32)
-    _executor.compile(net, x1, x2, x3, x4, x5, x6, x7)
+    x1 = Tensor(np.ones([32, 32]), dtype=ms.float32)
+    x6 = Tensor(np.ones([128, 32]), dtype=ms.float32)
+    _executor.compile(net, x1, x6)
 
-# remove matmul2
-def test_six_matmul_repeated1():
+# remove matmul2, add matmul7
+def test_six_matmul_load():
     class NetWithLoss(nn.Cell):
         def __init__(self, network):
             super(NetWithLoss, self).__init__()
             self.loss = VirtualLoss()
             self.network = network
 
-        def construct(self, x1, x2, x4, x5, x6, x7):
-            predict = self.network(x1, x2, x4, x5, x6, x7)
+        def construct(self, x1, x6, x7):
+            predict = self.network(x1, x6, x7)
             return self.loss(predict)
 
 
@@ -103,53 +102,58 @@ def test_six_matmul_repeated1():
             super(GradWrap, self).__init__()
             self.network = network
 
-        def construct(self, x1, x2, x4, x5, x6, x7):
-            return C.grad_all(self.network)(x1, x2, x4, x5, x6, x7)
+        def construct(self, x1, x6, x7):
+            return C.grad_all(self.network)(x1, x6, x7)
 
     class Net(nn.Cell):
-        def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6):
+        def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6, strategy7):
             super().__init__()
             self.matmul1 = P.MatMul().set_strategy(strategy1)
             self.matmul3 = P.MatMul().set_strategy(strategy3)
             self.matmul4 = P.MatMul().set_strategy(strategy4)
             self.matmul5 = P.MatMul().set_strategy(strategy5)
             self.matmul6 = P.MatMul().set_strategy(strategy6)
-
-        def construct(self, x1, x2, x4, x5, x6, x7):
-            out = self.matmul1(x1, x2)
-            out = self.matmul3(out, x4)
-            out = self.matmul4(out, x5)
-            out = self.matmul5(out, x6)
-            out = self.matmul6(out, x7)
+            self.matmul7 = P.MatMul().set_strategy(strategy7)
+            self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1")
+            self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3")
+            self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4")
+            self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5")
+
+        def construct(self, x1, x6, x7):
+            out = self.matmul1(x1, self.weight1)
+            out = self.matmul3(out, self.weight3)
+            out = self.matmul4(out, self.weight4)
+            out = self.matmul5(out, self.weight5)
+            out = self.matmul6(out, x6)
+            out = self.matmul7(out, x7)
             return out
 
-    set_auto_parallel_context(device_num=512, global_rank=0)
+    reset_auto_parallel_context()
+    set_auto_parallel_context(device_num=8, global_rank=0, strategy_ckpt_load_file="./strategy_stage1.ckpt")
     strategy1 = ((8, 1), (1, 1))
     strategy3 = ((8, 1), (1, 1))
     strategy4 = ((8, 1), (1, 1))
     strategy5 = ((8, 1), (1, 1))
     strategy6 = ((8, 1), (1, 1))
-    net = GradWrap(NetWithLoss(Net(strategy1, strategy3, strategy4, strategy5, strategy6)))
+    strategy7 = ((8, 1), (1, 1))
+    net = GradWrap(NetWithLoss(Net(strategy1, strategy3, strategy4, strategy5, strategy6, strategy7)))
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
 
-    x1 = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    x2 = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    x4 = Tensor(np.ones([64, 128]), dtype=ms.float32)
-    x5 = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    x6 = Tensor(np.ones([64, 32]), dtype=ms.float32)
+    x1 = Tensor(np.ones([32, 32]), dtype=ms.float32)
+    x6 = Tensor(np.ones([128, 32]), dtype=ms.float32)
     x7 = Tensor(np.ones([32, 32]), dtype=ms.float32)
-    _executor.compile(net, x1, x2, x4, x5, x6, x7)
+    _executor.compile(net, x1, x6, x7)
 
-# add matmul7
-def test_six_matmul_repeated2():
+# model_parallel test
+def test_six_matmul_save_auto():
     class NetWithLoss(nn.Cell):
         def __init__(self, network):
             super(NetWithLoss, self).__init__()
             self.loss = VirtualLoss()
             self.network = network
 
-        def construct(self, x1, x2, x4, x5, x6, x7, x8):
-            predict = self.network(x1, x2, x4, x5, x6, x7, x8)
+        def construct(self, x1, x6):
+            predict = self.network(x1, x6)
             return self.loss(predict)
 
 
@@ -158,60 +162,52 @@ def test_six_matmul_repeated2():
             super(GradWrap, self).__init__()
             self.network = network
 
-        def construct(self, x1, x2, x4, x5, x6, x7, x8):
-            return C.grad_all(self.network)(x1, x2, x4, x5, x6, x7, x8)
+        def construct(self, x1, x6):
+            return C.grad_all(self.network)(x1, x6)
 
     class Net(nn.Cell):
-        def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6, strategy7):
+        def __init__(self):
             super().__init__()
-            self.matmul1 = P.MatMul().set_strategy(strategy1)
-            self.matmul3 = P.MatMul().set_strategy(strategy3)
-            self.matmul4 = P.MatMul().set_strategy(strategy4)
-            self.matmul5 = P.MatMul().set_strategy(strategy5)
-            self.matmul6 = P.MatMul().set_strategy(strategy6)
-            self.matmul7 = P.MatMul().set_strategy(strategy7)
-
-        def construct(self, x1, x2, x4, x5, x6, x7, x8):
-            out = self.matmul1(x1, x2)
-            out = self.matmul3(out, x4)
-            out = self.matmul4(out, x5)
-            out = self.matmul5(out, x6)
-            out = self.matmul6(out, x7)
-            out = self.matmul7(out, x8)
+            self.matmul1 = P.MatMul()
+            self.matmul2 = P.MatMul()
+            self.matmul3 = P.MatMul()
+            self.matmul4 = P.MatMul()
+            self.matmul5 = P.MatMul()
+            self.matmul6 = P.MatMul()
+            self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1")
+            self.weight2 = Parameter(Tensor(np.ones([64, 64]), dtype=ms.float32), name="weight2")
+            self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3")
+            self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4")
+            self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5")
+
+        def construct(self, x1, x6):
+            out = self.matmul1(x1, self.weight1)
+            out = self.matmul2(out, self.weight2)
+            out = self.matmul3(out, self.weight3)
+            out = self.matmul4(out, self.weight4)
+            out = self.matmul5(out, self.weight5)
+            out = self.matmul6(out, x6)
             return out
 
-    set_auto_parallel_context(device_num=512, global_rank=0)
-    strategy1 = ((8, 1), (1, 1))
-    strategy3 = ((8, 1), (1, 1))
-    strategy4 = ((8, 1), (1, 1))
-    strategy5 = ((8, 1), (1, 1))
-    strategy6 = ((8, 1), (1, 1))
-    strategy7 = ((8, 1), (1, 1))
-    net = GradWrap(NetWithLoss(Net(strategy1, strategy3, strategy4, strategy5, strategy6, strategy7)))
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-
-    x1 = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    x2 = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    x4 = Tensor(np.ones([64, 128]), dtype=ms.float32)
-    x5 = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    x6 = Tensor(np.ones([64, 32]), dtype=ms.float32)
-    x7 = Tensor(np.ones([32, 32]), dtype=ms.float32)
-    x8 = Tensor(np.ones([32, 128]), dtype=ms.float32)
-    _executor.compile(net, x1, x2, x4, x5, x6, x7, x8)
+    reset_auto_parallel_context()
+    set_auto_parallel_context(device_num=8, global_rank=0, strategy_ckpt_save_file="./strategy_stage1_auto.ckpt")
+    net = GradWrap(NetWithLoss(Net()))
+    context.set_auto_parallel_context(parallel_mode="auto_parallel")
 
+    x1 = Tensor(np.ones([32, 32]), dtype=ms.float32)
+    x6 = Tensor(np.ones([128, 32]), dtype=ms.float32)
+    _executor.compile(net, x1, x6)
 
-# add scope2
-def test_six_matmul_repeated3():
+# remove matmul2, add matmul7
+def test_six_matmul_load_auto():
     class NetWithLoss(nn.Cell):
-        def __init__(self, network1, network2):
+        def __init__(self, network):
             super(NetWithLoss, self).__init__()
             self.loss = VirtualLoss()
-            self.network = network1
-            self.network2 = network2
+            self.network = network
 
-        def construct(self, x1, x2, x4, x5, x6, x7, x8, x9, x10):
-            predict = self.network(x1, x2, x4, x5, x6, x7, x8)
-            predict = self.network2(predict, x9, x10)
+        def construct(self, x1, x6, x7):
+            predict = self.network(x1, x6, x7)
             return self.loss(predict)
 
 
@@ -220,62 +216,42 @@ def test_six_matmul_repeated3():
             super(GradWrap, self).__init__()
             self.network = network
 
-        def construct(self, x1, x2, x4, x5, x6, x7, x8, x9, x10):
-            return C.grad_all(self.network)(x1, x2, x4, x5, x6, x7, x8, x9, x10)
+        def construct(self, x1, x6, x7):
+            return C.grad_all(self.network)(x1, x6, x7)
 
     class Net(nn.Cell):
-        def __init__(self, strategy1, strategy3, strategy4, strategy5, strategy6, strategy7):
+        def __init__(self, strategy1, strategy3, strategy4, strategy5):
             super().__init__()
             self.matmul1 = P.MatMul().set_strategy(strategy1)
             self.matmul3 = P.MatMul().set_strategy(strategy3)
             self.matmul4 = P.MatMul().set_strategy(strategy4)
             self.matmul5 = P.MatMul().set_strategy(strategy5)
-            self.matmul6 = P.MatMul().set_strategy(strategy6)
-            self.matmul7 = P.MatMul().set_strategy(strategy7)
-
-        def construct(self, x1, x2, x4, x5, x6, x7, x8):
-            out = self.matmul1(x1, x2)
-            out = self.matmul3(out, x4)
-            out = self.matmul4(out, x5)
-            out = self.matmul5(out, x6)
-            out = self.matmul6(out, x7)
-            out = self.matmul7(out, x8)
-            return out
-
-    class Net1(nn.Cell):
-        def __init__(self, strategy1, strategy2):
-            super().__init__()
-            self.matmul1 = P.MatMul().set_strategy(strategy1)
-            self.matmul2 = P.MatMul().set_strategy(strategy2)
-
-        def construct(self, x1, x2, x3):
-            out = self.matmul1(x1, x2)
-            out = self.matmul2(out, x3)
+            self.matmul6 = P.MatMul()
+            self.matmul7 = P.MatMul()
+            self.weight1 = Parameter(Tensor(np.ones([32, 64]), dtype=ms.float32), name="weight1")
+            self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3")
+            self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4")
+            self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5")
+
+        def construct(self, x1, x6, x7):
+            out = self.matmul1(x1, self.weight1)
+            out = self.matmul3(out, self.weight3)
+            out = self.matmul4(out, self.weight4)
+            out = self.matmul5(out, self.weight5)
+            out = self.matmul6(out, x6)
+            out = self.matmul7(out, x7)
             return out
 
+    reset_auto_parallel_context()
+    set_auto_parallel_context(device_num=8, global_rank=0, strategy_ckpt_load_file="./strategy_stage1_auto.ckpt")
+    strategy1 = ((2, 2), (2, 2))
+    strategy3 = ((2, 2), (2, 2))
+    strategy4 = ((2, 2), (2, 2))
+    strategy5 = ((2, 2), (2, 2))
+    net = GradWrap(NetWithLoss(Net(strategy1, strategy3, strategy4, strategy5)))
+    context.set_auto_parallel_context(parallel_mode="auto_parallel")
 
-    set_auto_parallel_context(device_num=512, global_rank=0)
-    strategy1 = ((8, 1), (1, 1))
-    strategy3 = ((8, 1), (1, 1))
-    strategy4 = ((8, 1), (1, 1))
-    strategy5 = ((8, 1), (1, 1))
-    strategy6 = ((8, 1), (1, 1))
-    strategy7 = ((8, 1), (1, 1))
-    strategy8 = ((8, 1), (1, 1))
-    strategy9 = ((8, 1), (1, 1))
-    net1 = Net(strategy1, strategy3, strategy4, strategy5, strategy6, strategy7)
-    net2 = Net1(strategy8, strategy9)
-    net = GradWrap(NetWithLoss(net1, net2))
-    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel")
-
-    x1 = Tensor(np.ones([128, 32]), dtype=ms.float32)
-    x2 = Tensor(np.ones([32, 64]), dtype=ms.float32)
-    x4 = Tensor(np.ones([64, 128]), dtype=ms.float32)
-    x5 = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    x6 = Tensor(np.ones([64, 32]), dtype=ms.float32)
+    x1 = Tensor(np.ones([32, 32]), dtype=ms.float32)
+    x6 = Tensor(np.ones([128, 32]), dtype=ms.float32)
     x7 = Tensor(np.ones([32, 32]), dtype=ms.float32)
-    x8 = Tensor(np.ones([32, 128]), dtype=ms.float32)
-    x9 = Tensor(np.ones([128, 64]), dtype=ms.float32)
-    x10 = Tensor(np.ones([64, 64]), dtype=ms.float32)
-    _executor.compile(net, x1, x2, x4, x5, x6, x7, x8, x9, x10)
-
+    _executor.compile(net, x1, x6, x7)
\ No newline at end of file

From c2be5bb8d9ae3f74666ad77c3f5cdb18d4a5518f Mon Sep 17 00:00:00 2001
From: lianliguang <lianliguang@huawei.com>
Date: Tue, 28 Apr 2020 15:32:15 +0800
Subject: [PATCH 149/242] change runtime error to type error when cannot find
 kernel info

---
 mindspore/ccsrc/device/ascend/kernel_select_ascend.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
index 1efd3d6c22..d8779bc550 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
+++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
@@ -482,8 +482,8 @@ void SelectKernelInfo(const CNodePtr &kernel_node) {
     if (selected_kernel_info == nullptr) {
       std::ostringstream buffer;
       PrintInputAndOutputInferType(buffer, kernel_node);
-      MS_LOG(EXCEPTION) << "The node [" << kernel_node->DebugString()
-                        << "] cannot find valid kernel info, not supported the type" << buffer.str();
+      MS_EXCEPTION(TypeError) << "The node [" << kernel_node->DebugString()
+                              << "] cannot find valid kernel info, not supported the type" << buffer.str();
     } else {
       PrintRaiseOrReducePrecisionSelectedInfo(kernel_node, selected_kernel_info, precision_reduce);
     }

From 7714a7fa878ba45c1e6b840ac8797a9e9c08c4b2 Mon Sep 17 00:00:00 2001
From: fary86 <fary.fanrui@huawei.com>
Date: Tue, 28 Apr 2020 16:35:32 +0800
Subject: [PATCH 150/242] Fix checking bug of PReLU

---
 mindspore/ops/operations/nn_ops.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 66656b559e..6ae6c68844 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -2066,8 +2066,9 @@ class PReLU(PrimitiveWithInfer):
         return input_x_shape
 
     def infer_dtype(self, input_x_dtype, weight_dtype):
-        args = {"input_x": input_x_dtype, "weight": weight_dtype}
-        validator.check_tensor_type_same(args, (mstype.float16, mstype.float32), self.name)
+        valid_types = (mstype.float16, mstype.float32)
+        validator.check_tensor_type_same({"input_x": input_x_dtype}, valid_types, self.name)
+        validator.check_tensor_type_same({"weight": weight_dtype}, valid_types, self.name)
         return input_x_dtype
 
 

From cd899fba0a53fbdaebe058a907ccc13d4f312767 Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Tue, 28 Apr 2020 14:25:59 +0800
Subject: [PATCH 151/242] ONNX adapter for the MaxPoolWithArgmax

---
 mindspore/ccsrc/onnx/onnx_exporter.cc   | 44 ++++++++++++++++++++++---
 tests/ut/python/utils/test_serialize.py | 25 ++++++++++++++
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/mindspore/ccsrc/onnx/onnx_exporter.cc b/mindspore/ccsrc/onnx/onnx_exporter.cc
index 168e625a89..1c5a7b93c3 100644
--- a/mindspore/ccsrc/onnx/onnx_exporter.cc
+++ b/mindspore/ccsrc/onnx/onnx_exporter.cc
@@ -29,11 +29,12 @@
 
 namespace mindspore {
 enum OpMergeMode {
-  OP_MERGE_UNDEFINED = 0,   // undefined behavior
-  OP_MERGE_IGNORE = 1,      // indicate an input op merged into other op in compute node list
-  OP_MERGE_CONV = 2,        // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv`
-  OP_MERGE_GEMM = 3,        // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm`
-  OP_MERGE_BATCH_NORM = 4,  // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX BatchNormalization`
+  OP_MERGE_UNDEFINED = 0,            // undefined behavior
+  OP_MERGE_IGNORE = 1,               // indicate an input op merged into other op in compute node list
+  OP_MERGE_CONV = 2,                 // indicate `MindSpore Conv + BiasAdd` --> `ONNX Conv`
+  OP_MERGE_GEMM = 3,                 // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm`
+  OP_MERGE_BATCH_NORM = 4,           // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX BatchNormalization`
+  OP_MERGE_MAXPOOL_WITH_ARGMAX = 5,  // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool`
 };
 
 struct OpMergedInfo {
@@ -233,6 +234,13 @@ OPERATOR_ONNX_CONVERT_DEFINE(
     .Attr("padding", "auto_pad", onnx::AttributeProto_AttributeType_STRING, SetPoolingPadMode)
     .Attr("strides", "strides", onnx::AttributeProto_AttributeType_INTS, SetAttrTupleValueToProto<2>))
 
+OPERATOR_ONNX_CONVERT_DEFINE(
+  MaxPoolWithArgmax, MaxPool,
+  OpNameInfo()
+    .Attr("ksize", "kernel_shape", onnx::AttributeProto_AttributeType_INTS, SetAttrTupleValueToProto<2>)
+    .Attr("padding", "auto_pad", onnx::AttributeProto_AttributeType_STRING, SetPoolingPadMode)
+    .Attr("strides", "strides", onnx::AttributeProto_AttributeType_INTS, SetAttrTupleValueToProto<2>))
+
 OPERATOR_ONNX_CONVERT_DEFINE(
   AvgPool, AveragePool,
   OpNameInfo()
@@ -254,6 +262,7 @@ void RegisterOpConverters(const std::function<void(OpNameInfo &&)> &fn) {
 
   fn(OP_CONVERT_FUNCTION_NAME(Flatten)());
   fn(OP_CONVERT_FUNCTION_NAME(MaxPool)());
+  fn(OP_CONVERT_FUNCTION_NAME(MaxPoolWithArgmax)());
   fn(OP_CONVERT_FUNCTION_NAME(AvgPool)());
 
   fn(OP_CONVERT_FUNCTION_NAME(Squeeze)());
@@ -328,6 +337,8 @@ class OnnxExporter {
                        onnx::GraphProto *graph_proto);
   void ExportMergeBatchNorm(const FuncGraphPtr &func_graph, const CNodePtr &node,
                             std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
+  void ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph, const CNodePtr &node,
+                                    std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
 
   void ExportOutput(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
                     onnx::GraphProto *graph_proto);
@@ -516,6 +527,12 @@ void OnnxExporter::MatchAndMark(const FuncGraphPtr &func_graph, const std::vecto
       op_merged_infos[cnode].mode = OP_MERGE_BATCH_NORM;
       op_merged_infos[cnode->input(1)].mode = OP_MERGE_IGNORE;
       op_merged_infos[cnode->input(1)].referred_count -= 1;
+    } else if (cnode->IsApply(prim::kPrimTupleGetItem) &&
+               IsPrimitiveCNode(cnode->input(1), std::make_shared<Primitive>("MaxPoolWithArgmax")) &&
+               GetInt32Value(cnode->input(2)) == 0) {
+      op_merged_infos[cnode].mode = OP_MERGE_MAXPOOL_WITH_ARGMAX;
+      op_merged_infos[cnode->input(1)].mode = OP_MERGE_IGNORE;
+      op_merged_infos[cnode->input(1)].referred_count -= 1;
     }
   }
 }
@@ -563,6 +580,9 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
       case OP_MERGE_BATCH_NORM:
         ExportMergeBatchNorm(func_graph, cnode, node_map_ptr, graph_proto);
         break;
+      case OP_MERGE_MAXPOOL_WITH_ARGMAX:
+        ExportMergeMaxPoolWithArgmax(func_graph, cnode, node_map_ptr, graph_proto);
+        break;
       default:
         ExportCNode(func_graph, cnode, node_map_ptr, graph_proto);
         break;
@@ -811,6 +831,20 @@ void OnnxExporter::ExportMergeBatchNorm(const FuncGraphPtr &func_graph, const CN
   (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_batch_norm, inputs, graph_proto);
 }
 
+void OnnxExporter::ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph, const CNodePtr &node,
+                                                std::map<AnfNodePtr, size_t> *node_map_ptr,
+                                                onnx::GraphProto *const graph_proto) {
+  auto maxpool_with_argmax_node = dyn_cast<CNode>(node->input(1));
+
+  PrimitivePtr prim_maxpool_with_argmax =
+    dyn_cast<Primitive>((dyn_cast<ValueNode>(maxpool_with_argmax_node->input(0)))->value());
+  std::vector<AnfNodePtr> inputs;
+  for (size_t i = 1; i < maxpool_with_argmax_node->inputs().size(); i++) {
+    inputs.push_back(maxpool_with_argmax_node->input(i));
+  }
+  (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_maxpool_with_argmax, inputs, graph_proto);
+}
+
 void OnnxExporter::ExportOutput(const FuncGraphPtr & /*func_graph*/, const CNodePtr &node,
                                 std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
   if (node->inputs().size() != 2) {
diff --git a/tests/ut/python/utils/test_serialize.py b/tests/ut/python/utils/test_serialize.py
index cc6f346b77..59a4b93833 100644
--- a/tests/ut/python/utils/test_serialize.py
+++ b/tests/ut/python/utils/test_serialize.py
@@ -362,6 +362,31 @@ def test_lenet5_onnx_export():
     net = LeNet5()
     export(net, input, file_name='lenet5.onnx', file_format='ONNX')
 
+class DefinedNet(nn.Cell):
+    """simple Net definition with maxpoolwithargmax."""
+    def __init__(self, num_classes=10):
+        super(DefinedNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=0, weight_init="zeros")
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU()
+        self.maxpool = P.MaxPoolWithArgmax(padding="same", ksize=2, strides=2)
+        self.flatten = nn.Flatten()
+        self.fc = nn.Dense(int(56*56*64), num_classes)
+
+    def construct(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x, argmax = self.maxpool(x)
+        x = self.flatten(x)
+        x = self.fc(x)
+        return x
+
+def test_net_onnx_maxpoolwithargmax_export():
+    input = Tensor(np.ones([1, 3, 224, 224]).astype(np.float32) * 0.01)
+    net = DefinedNet()
+    export(net, input, file_name='definedNet.onnx', file_format='ONNX')
+
 
 @run_on_onnxruntime
 def test_lenet5_onnx_load_run():

From a8d81c8b7bf9d54ae66ce8cab15836610e624724 Mon Sep 17 00:00:00 2001
From: leilei_snow <fangzhenglei@huawei.com>
Date: Tue, 28 Apr 2020 16:57:04 +0800
Subject: [PATCH 152/242] fix api document about dynamic_lr

---
 mindspore/nn/dynamic_lr.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/mindspore/nn/dynamic_lr.py b/mindspore/nn/dynamic_lr.py
index 00e6a45901..6eeba415a7 100644
--- a/mindspore/nn/dynamic_lr.py
+++ b/mindspore/nn/dynamic_lr.py
@@ -32,6 +32,7 @@ def piecewise_constant_lr(milestone, learning_rates):
 
     Args:
         milestone (Union[list[int], tuple[int]]): A list of milestone. This list is a monotone increasing list.
+            Every element is a milestone step, and must be greater than 0.
         learning_rates (Union[list[float], tuple[float]]): A list of learning rates.
 
     Returns:
@@ -40,7 +41,7 @@ def piecewise_constant_lr(milestone, learning_rates):
     Examples:
         >>> milestone = [2, 5, 10]
         >>> learning_rates = [0.1, 0.05, 0.01]
-        >>> lr = piecewise_constant_lr(milestone, learning_rates)
+        >>> piecewise_constant_lr(milestone, learning_rates)
         [0.1, 0.1, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01, 0.01, 0.01]
     """
     validator.check_value_type('milestone', milestone, (tuple, list), None)
@@ -100,7 +101,7 @@ def exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch,
         >>> total_step = 6
         >>> step_per_epoch = 2
         >>> decay_epoch = 1
-        >>> lr = exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch)
+        >>> exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch)
         [0.1, 0.1, 0.09000000000000001, 0.09000000000000001, 0.08100000000000002, 0.08100000000000002]
     """
     _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
@@ -142,7 +143,7 @@ def natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch,
         >>> total_step = 6
         >>> step_per_epoch = 2
         >>> decay_epoch = 2
-        >>> lr = natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True)
+        >>> natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True)
         [0.1, 0.1, 0.1, 0.1, 0.016529888822158657, 0.016529888822158657]
     """
     _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
@@ -185,7 +186,7 @@ def inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, deca
         >>> total_step = 6
         >>> step_per_epoch = 1
         >>> decay_epoch = 1
-        >>> lr = inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True)
+        >>> inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, True)
         [0.1, 0.06666666666666667, 0.05, 0.04, 0.03333333333333333, 0.028571428571428574]
     """
     _check_inputs(learning_rate, decay_rate, total_step, step_per_epoch, decay_epoch, is_stair)
@@ -227,7 +228,7 @@ def cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch):
         >>> total_step = 6
         >>> step_per_epoch = 2
         >>> decay_epoch = 2
-        >>> lr = cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch)
+        >>> cosine_decay_lr(min_lr, max_lr, total_step, step_per_epoch, decay_epoch)
         [0.1, 0.1, 0.05500000000000001, 0.05500000000000001, 0.01, 0.01]
     """
     validator.check_float_positive('min_lr', min_lr, None)
@@ -282,7 +283,7 @@ def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_e
         >>> step_per_epoch = 2
         >>> decay_epoch = 2
         >>> power = 0.5
-        >>> lr = polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power)
+        >>> polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_epoch, decay_epoch, power)
         [0.1, 0.1, 0.07363961030678928, 0.07363961030678928, 0.01, 0.01]
     """
     validator.check_float_positive('learning_rate', learning_rate, None)

From 25af911ed904301847d33ddca8905b65599f69da Mon Sep 17 00:00:00 2001
From: VectorSL <shiliang10@huawei.com>
Date: Tue, 28 Apr 2020 17:03:12 +0800
Subject: [PATCH 153/242] gpu update bn

---
 mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h  | 5 +++--
 .../ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h    | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h
index 6f0c59e29a..26f4332273 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/fused_batch_norm_gpu_kernel.h
@@ -82,6 +82,7 @@ class FusedBatchNormGpuKernel : public GpuKernel {
   }
   bool Init(const CNodePtr &kernel_node) override {
     InitResource();
+    cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))];
     size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
     if (input_num != 5) {
       MS_LOG(EXCEPTION) << "input tensor size is " << input_num << ", FusedBatchNormGpuKernel should be 5";
@@ -112,11 +113,11 @@ class FusedBatchNormGpuKernel : public GpuKernel {
     }
 
     CHECK_CUDNN_RET_WITH_EXCEPT(
-      cudnnSetTensor4dDescriptor(x_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch_, channel_, height_, width_),
+      cudnnSetTensor4dDescriptor(x_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, batch_, channel_, height_, width_),
       "Set x desc failed");
 
     CHECK_CUDNN_RET_WITH_EXCEPT(
-      cudnnSetTensor4dDescriptor(y_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch_, channel_, height_, width_),
+      cudnnSetTensor4dDescriptor(y_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, batch_, channel_, height_, width_),
       "Set y desc failed");
 
     CHECK_CUDNN_RET_WITH_EXCEPT(
diff --git a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
index 08eac28af7..07372ad22d 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/fused_batchnorm_grad_gpu_kernel.h
@@ -110,7 +110,7 @@ class FusedBatchNormGradGpuKernel : public GpuKernel {
       cudnnSetTensor4dDescriptor(dx_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, batch_, channel_, height_, width_),
       "Set dx desc failed");
     CHECK_CUDNN_RET_WITH_EXCEPT(
-      cudnnSetTensor4dDescriptor(scale_bias_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, 1, channel_, 1, 1),
+      cudnnSetTensor4dDescriptor(scale_bias_desc_, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, channel_, 1, 1),
       "Set para desc failed");
 
     InitSizeLists();

From 309060b1c23f183bf60814e678492ba4fcaf29d9 Mon Sep 17 00:00:00 2001
From: ch-l <ch.l@huawei.com>
Date: Thu, 23 Apr 2020 17:46:06 +0200
Subject: [PATCH 154/242] complet cost models

---
 .../auto_parallel/rec_core/rec_cost.cc        |  90 +----
 .../auto_parallel/rec_core/rec_cost.h         |  65 ++-
 .../rec_core/rec_generate_strategy.cc         |  24 +-
 .../auto_parallel/rec_core/rec_graph.h        |  19 +-
 .../auto_parallel/rec_core/rec_parse_graph.cc | 374 +++++++++---------
 .../auto_parallel/rec_core/rec_parse_graph.h  |  12 +-
 .../auto_parallel/rec_core/rec_partition.cc   |  51 ++-
 7 files changed, 288 insertions(+), 347 deletions(-)

diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
index 4591753efe..3fea107a73 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
@@ -446,51 +446,8 @@ StrategyRec CostPooling::ChoseStr(const std::vector<double> &cost_op, StrategyRe
   return str;
 }
 
-// Get optimal strategy for Add
-StrategyRec CostAdd::GetOptimalStr(const Graph::NodeType &node,
-                                   const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
-                                   const Graph &graph) {
-  int tensor_n = static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n);
-  int tensor_c = static_cast<int>(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c);
-  int tensor_h = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h);
-  int tensor_w = static_cast<int>(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w);
-
-  std::vector<double> cost_op;
-  std::vector<std::vector<float>> mode;
-
-  if (tensor_n < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{0.5, 1, 1, 1}, {0.5, 1, 1, 1}, {0.5, 1, 1, 1}}, graph));
-  }
-
-  if (tensor_c < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 0.5, 1, 1}, {1, 0.5, 1, 1}, {1, 0.5, 1, 1}}, graph));
-  }
-
-  if (tensor_h < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 1, 0.5, 1}, {1, 1, 0.5, 1}, {1, 1, 0.5, 1}}, graph));
-  }
-
-  if (tensor_w < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 1, 1, 0.5}, {1, 1, 1, 0.5}, {1, 1, 1, 0.5}}, graph));
-  }
-
-  return ChoseStr(cost_op, node.apply.str);
-}
-
 // Chose strategy for Add
-StrategyRec CostAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
+StrategyRec CostTensorAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
   uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
   if (cost_op[min_position] > (DOUBLE_MAX - 0.1)) {
     return str;
@@ -540,49 +497,6 @@ StrategyRec CostReshape::GetOptimalStr(const Graph::NodeType &node) const { retu
 
 StrategyRec CostReshape::ChoseStr(StrategyRec str) const { return str; }
 
-// Get optimal strategy for Biasadd
-StrategyRec CostBiasAdd::GetOptimalStr(const Graph::NodeType &node,
-                                       const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
-                                       const Graph &graph) {
-  int tensor_n = static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n);
-  int tensor_c = static_cast<int>(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c);
-  int tensor_h = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h);
-  int tensor_w = static_cast<int>(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w);
-
-  std::vector<double> cost_op;
-  std::vector<std::vector<float>> mode;
-
-  if (tensor_n < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{0.5, 1, 1, 1}, {0.5, 1, 1, 1}, {0.5, 1, 1, 1}}, graph));
-  }
-
-  if (tensor_c < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 0.5, 1, 1}, {1, 0.5, 1, 1}, {1, 0.5, 1, 1}}, graph));
-  }
-
-  if (tensor_h < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 1, 0.5, 1}, {1, 1, 0.5, 1}, {1, 1, 0.5, 1}}, graph));
-  }
-
-  if (tensor_w < 2) {
-    cost_op.push_back(DOUBLE_MAX);
-  } else {
-    cost_op.push_back(cost_in_ + CostRedis(node, node_name_to_strategy,
-                                           mode = {{1, 1, 1, 0.5}, {1, 1, 1, 0.5}, {1, 1, 1, 0.5}}, graph));
-  }
-
-  return ChoseStr(cost_op, node.apply.str);
-}
-
 // Chose strategy for BiasAdd
 StrategyRec CostBiasAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRec str) {
   uint64_t min_position = min_element(cost_op.begin(), cost_op.end()) - cost_op.begin();
@@ -629,7 +543,7 @@ StrategyRec CostBiasAdd::ChoseStr(const std::vector<double> &cost_op, StrategyRe
   return str;
 }
 
-// Get optimal strategy for Common OPs: ReLU and Softmax
+// Get optimal strategy for Common OPs
 StrategyRec CostCommon::GetOptimalStr(const Graph::NodeType &node,
                                       const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                                       const Graph &graph) {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
index af37b9178e..85e5e5ea94 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
@@ -157,21 +157,6 @@ class CostPooling {
   double cost_in_ = 0;
 };  // class CostPooling is used to compute the cost of Pooling operator.
 
-// class CostAdd is used to compute the cost of Add operator.
-class CostAdd {
- public:
-  StrategyRec GetOptimalStr(const Graph::NodeType &node,
-                            const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
-                            const Graph &graph);
-
-  double GetMinCostIn() const { return cost_in_; }
-
- private:
-  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
-
-  double cost_in_ = 0;
-};  // class CostAdd is used to compute the cost of Add operator.
-
 // class CostReshape is used to compute the cost of Reshape operator.
 class CostReshape {
  public:
@@ -185,35 +170,41 @@ class CostReshape {
   double cost_in_ = 0;
 };  // class CostReshape is used to compute the cost of Reshape operator.
 
-// class CostBiasAdd is used to compute the cost of BiasAdd operator.
-class CostBiasAdd {
+// class CostCommon is used to compute the cost of an element-wise operator
+class CostCommon {
  public:
-  StrategyRec GetOptimalStr(const Graph::NodeType &node,
-                            const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
-                            const Graph &graph);
+  virtual StrategyRec GetOptimalStr(const Graph::NodeType &node,
+                                    const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
+                                    const Graph &graph);
 
-  double GetMinCostIn() const { return cost_in_; }
+  virtual double GetMinCostIn() const { return cost_in_; }
 
- private:
-  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
+ protected:
+  virtual StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
 
   double cost_in_ = 0;
-};  // class CostBiasAdd is used to compute the cost of BiasAdd operator.
-
-// class CostCommon is used to compute the cost of the element independent operator.
-class CostCommon {
- public:
-  StrategyRec GetOptimalStr(const Graph::NodeType &node,
-                            const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
-                            const Graph &graph);
-
-  double GetMinCostIn() const { return cost_in_; }
+};  // class CostCommon is used to compute the cost of an element-wise operator
 
- private:
+// class CostBiasAdd is used to compute the cost of the addition between a tensor and a bias
+class CostBiasAdd : public CostCommon {
   StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
-
-  double cost_in_ = 0;
-};  // class CostCommon is used to compute the cost of Softmax & || Activation operator.
+};
+// class CostAdd is used to compute the cost of Add operator.
+class CostTensorAdd : public CostCommon {
+  StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
+};
+
+// all the following operation are element-wise and have the same cost
+class CostOneHot : public CostCommon {};
+class CostReLU : public CostCommon {};
+class CostLog : public CostCommon {};
+class CostExp : public CostCommon {};
+class CostAdd : public CostCommon {};
+class CostSub : public CostCommon {};
+class CostMul : public CostCommon {};
+class CostDiv : public CostCommon {};
+class CostSqueeze : public CostCommon {};
+class CostCast : public CostCommon {};
 
 // class BatchNorm is used to compute the cost of BatchNorm operator.
 class CostBatchNorm {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
index b2c34127a1..e942c8005f 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
@@ -38,6 +38,12 @@ void GenerateStrategy(std::shared_ptr<Graph> graph, bool mask_special_ops,
     for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
       stra.push_back(PrepareStrategy(graph, ops, iter_ops, iter_op_inputs));
     }
+    // OneHot's scalar parameters were removed by entire_costgraph, we had to complete them.
+    if (ops[iter_ops]->type() == ONEHOT) {
+      std::vector<int32_t> s_Onehot = {};
+      stra.push_back(s_Onehot);
+      stra.push_back(s_Onehot);
+    }
     StrategyPtr sp = std::make_shared<Strategy>(0, stra);
     ops[iter_ops]->SetSelectedStrategyAndCost(sp, ops[iter_ops]->selected_cost());
   }
@@ -201,12 +207,13 @@ std::vector<int32_t> PrepareStrategy(const std::shared_ptr<Graph> &graph,
   }
 }
 
+// use to respect strategy checks of auto parallel
 void MaskSpecialOps(std::shared_ptr<Graph> graph) {
   size_t iter_nodes = graph->nodes.size();
   for (size_t i = 0; i < iter_nodes; i++) {
     Graph::NodeType &node = graph->nodes[i];
 
-    if (node.apply.op_type == 1) {  // For Convolution
+    if (node.apply.op_type == kRecConvolution) {  // For convolution
       // cover input tensor strategy
       node.apply.arguments[0].tensor_str.str_n = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
       node.apply.arguments[0].tensor_str.str_c = 1;
@@ -217,19 +224,12 @@ void MaskSpecialOps(std::shared_ptr<Graph> graph) {
       node.apply.arguments[1].tensor_str.str_c = 1;
       node.apply.arguments[1].tensor_str.str_h = 1;
       node.apply.arguments[1].tensor_str.str_w = 1;
-    } else if (node.apply.op_type == 8) {  // For BN
-      node.apply.arguments[0].tensor_str.str_n = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
-      node.apply.arguments[0].tensor_str.str_c = 1;
+    } else if (node.apply.op_type == kRecBiasAdd || node.apply.op_type == kRecMatMul) {
+      // For MatMul and BiasAdd
       node.apply.arguments[0].tensor_str.str_h = 1;
       node.apply.arguments[0].tensor_str.str_w = 1;
-      // cover 1-d argument blobs
-      node.apply.arguments[1].tensor_str.str_n = 1;
-      node.apply.arguments[2].tensor_str.str_c = 1;
-      node.apply.arguments[3].tensor_str.str_h = 1;
-      node.apply.arguments[4].tensor_str.str_w = 1;
-    } else if (node.apply.op_type == 4 || node.apply.op_type == 9) {  // For SparseSoftmaxCrossEntropyWithLogits
-      node.tensor_parm.tensor_str.str_h = 1.0 / static_cast<float>(g_device_manager->DeviceNum());
-      node.tensor_parm.tensor_str.str_w = 1;
+      node.apply.arguments[1].tensor_str.str_h = 1;
+      node.apply.arguments[1].tensor_str.str_w = 1;
     }
   }
 }
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
index 6ab2782cb2..ae5ccabaf5 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_graph.h
@@ -27,17 +27,26 @@
 namespace mindspore {
 namespace parallel {
 enum OperatorType {
+  kRecUnkownType,
   kRecMatMul,
   kRecConvolution,
   kRecPooling,
-  kRecAdd,
-  kRecSoftmax,
-  kRecReshape,
-  kRecBiasAdd,
+  kRecTensorAdd,
   kRecReLU,
   kRecBatchNorm,
+  kRecReshape,
+  kRecBiasAdd,
+  kRecSoftmax,
   kRecSparseSoftmaxCrossEntropyWithLogits,
-  kRecUnkownType
+  kRecOneHot,
+  kRecLog,
+  kRecExp,
+  kRecAdd,
+  kRecSub,
+  kRecMul,
+  kRecDiv,
+  kRecSqueeze,
+  kRecCast
 };
 
 enum InfoType { kApplication, kConstant };
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
index 6b438cb670..b9b1b7b914 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
@@ -1,187 +1,187 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "parallel/auto_parallel/rec_core/rec_parse_graph.h"
-
-#include <algorithm>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "ir/value.h"
-#include "parallel/auto_parallel/rec_core/rec_graph.h"
-#include "parallel/auto_parallel/rec_core/rec_tensor.h"
-#include "parallel/ops_info/operator_info.h"
-
-namespace mindspore {
-namespace parallel {
-const TensorParam MakeTensor(int n, int c, int h, int w) {
-  TensorParam new_tensor;
-  new_tensor.tensor_type = kFloat32;
-  new_tensor.tensor_shape.shape_n = n;
-  new_tensor.tensor_shape.shape_c = c;
-  new_tensor.tensor_shape.shape_h = h;
-  new_tensor.tensor_shape.shape_w = w;
-  const TensorParam &tensor = new_tensor;
-  return tensor;
-}
-
-Graph::NodeType MakeNewOperator(std::vector<std::shared_ptr<OperatorInfo>> ops, size_t iter_ops) {
-  Graph::NodeType NewOp;
-  NewOp.name = ops[iter_ops]->name();
-  NewOp.info = InfoType::kApplication;
-
-  auto op_type = ops[iter_ops]->type();
-  auto idx = DictOpType.find(op_type);
-  if (idx == DictOpType.end()) {
-    NewOp.apply.op_type = OperatorType::kRecUnkownType;
-    MS_LOG(INFO) << "Unknown operator type.";
-  } else {
-    NewOp.apply.op_type = DictOpType.at(op_type);
-  }
-
-  if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 4) {
-    NewOp.tensor_parm = MakeTensor(
-      ops[iter_ops]->outputs_tensor_info()[0].shape()[0], ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
-      ops[iter_ops]->outputs_tensor_info()[0].shape()[2], ops[iter_ops]->outputs_tensor_info()[0].shape()[3]);
-  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 2) {
-    NewOp.tensor_parm = Fill2DTensor(ops, iter_ops, NewOp);
-  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 1) {
-    NewOp.tensor_parm = MakeTensor(1, 1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
-  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 0) {
-    NewOp.tensor_parm = MakeTensor(1, 1, 1, 1);
-  } else {
-    MS_LOG(ERROR) << "Tensor's shape is unknown.";
-  }
-
-  NewOp.apply = CompleteOperatorInputs(ops, iter_ops, NewOp);
-  return NewOp;
-}
-
-TensorParam Fill2DTensor(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
-                         Graph::NodeType NewTensor) {
-  if (NewTensor.apply.op_type == OperatorType::kRecMatMul) {
-    auto attrs = ops[iter_ops]->attrs();
-    bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
-    bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
-    if (transpose_a) {
-      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[1],
-                                         ops[iter_ops]->inputs_tensor_info()[0].shape()[0]);
-    } else if (transpose_b) {
-      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[1],
-                                         ops[iter_ops]->inputs_tensor_info()[0].shape()[0]);
-    } else {
-      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[0],
-                                         ops[iter_ops]->inputs_tensor_info()[0].shape()[1]);
-    }
-  } else {
-    NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[0].shape()[0],
-                                       ops[iter_ops]->inputs_tensor_info()[0].shape()[1]);
-  }
-  return NewTensor.tensor_parm;
-}
-
-OperatorRec CompleteOperatorInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
-                                   Graph::NodeType NewTensor) {
-  for (size_t iter_input_tensors = 0; iter_input_tensors < ops[iter_ops]->inputs_tensor_info().size();
-       iter_input_tensors++) {
-    if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 4) {
-      NewTensor.apply.arguments[iter_input_tensors] =
-        MakeTensor(ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
-                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
-                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[2],
-                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[3]);
-    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 2) {
-      NewTensor.apply.arguments[iter_input_tensors] = Complete2DInputs(ops, iter_ops, iter_input_tensors, NewTensor);
-    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 1) {
-      NewTensor.apply.arguments[iter_input_tensors] =
-        MakeTensor(1, 1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
-    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 0) {
-      NewTensor.apply.arguments[iter_input_tensors] = MakeTensor(1, 1, 1, 1);
-    } else {
-      MS_LOG(ERROR) << "Tensor's shape is unknown.";
-    }
-  }
-  return NewTensor.apply;
-}
-
-TensorParam Complete2DInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
-                             const size_t iter_input_tensors, Graph::NodeType NewTensor) {
-  if (NewTensor.apply.op_type == OperatorType::kRecMatMul) {
-    auto attrs = ops[iter_ops]->attrs();
-    bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
-    bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
-    if (transpose_a && (iter_input_tensors == 0)) {
-      NewTensor.apply.arguments[iter_input_tensors] =
-        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
-                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
-    } else if (transpose_b && (iter_input_tensors == 1)) {
-      NewTensor.apply.arguments[iter_input_tensors] =
-        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
-                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
-    } else {
-      NewTensor.apply.arguments[iter_input_tensors] =
-        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
-                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]);
-    }
-  } else {
-    NewTensor.apply.arguments[iter_input_tensors] =
-      MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
-                 ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]);
-  }
-  return NewTensor.apply.arguments[iter_input_tensors];
-}
-
-std::shared_ptr<Graph> ParseGraph(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
-                                  const std::vector<std::vector<std::string>> &input_tensor_names) {
-  std::shared_ptr<Graph> graph(new Graph);
-  if (ops.size() > SIZE_MAX / 2) {
-    MS_LOG(EXCEPTION) << "Total number of operators is bigger than " << SIZE_MAX / 2;
-  }
-
-  for (size_t iter_ops = 0; iter_ops < ops.size(); iter_ops++) {
-    Graph::NodeType NewOp = MakeNewOperator(ops, iter_ops);
-    graph->nodes.push_back(NewOp);
-  }
-  MakeEdge(input_tensor_names, graph);
-
-  return graph;
-}
-
-void MakeEdge(const std::vector<std::vector<std::string>> &input_tensor_names, std::shared_ptr<Graph> graph) {
-  for (size_t iter_i = 0; iter_i < input_tensor_names.size(); iter_i++) {
-    for (size_t iter_j = 1; iter_j < input_tensor_names[iter_i].size(); iter_j++) {
-      size_t head_node_index = GetIndexInInputTensorNames(input_tensor_names, input_tensor_names[iter_i][iter_j]);
-      if (head_node_index < SIZE_MAX / 2 && head_node_index != iter_i) {
-        graph->nodes[iter_i].node_in.push_back(head_node_index);
-        graph->nodes[head_node_index].node_out.push_back(iter_i);
-      }
-    }
-  }
-}
-
-size_t GetIndexInInputTensorNames(const std::vector<std::vector<std::string>> &input_tensor_name,
-                                  const std::string &input_name) {
-  for (size_t index = 0; index < input_tensor_name.size(); index++) {
-    if (input_tensor_name[index][0] == input_name) {
-      return index;
-    }
-  }
-  MS_LOG(INFO) << "Get index failed, using SIZE_MAX insted";
-  return SIZE_MAX;
-}
-}  // namespace parallel
-}  // namespace mindspore
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "parallel/auto_parallel/rec_core/rec_parse_graph.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "ir/value.h"
+#include "parallel/auto_parallel/rec_core/rec_graph.h"
+#include "parallel/auto_parallel/rec_core/rec_tensor.h"
+#include "parallel/ops_info/operator_info.h"
+
+namespace mindspore {
+namespace parallel {
+const TensorParam MakeTensor(int n, int c, int h, int w) {
+  TensorParam new_tensor;
+  new_tensor.tensor_type = kFloat32;
+  new_tensor.tensor_shape.shape_n = n;
+  new_tensor.tensor_shape.shape_c = c;
+  new_tensor.tensor_shape.shape_h = h;
+  new_tensor.tensor_shape.shape_w = w;
+  const TensorParam &tensor = new_tensor;
+  return tensor;
+}
+
+Graph::NodeType MakeNewOperator(std::vector<std::shared_ptr<OperatorInfo>> ops, size_t iter_ops) {
+  Graph::NodeType NewOp;
+  NewOp.name = ops[iter_ops]->name();
+  NewOp.info = InfoType::kApplication;
+
+  auto op_type = ops[iter_ops]->type();
+  auto idx = DictOpType.find(op_type);
+  if (idx == DictOpType.end()) {
+    NewOp.apply.op_type = OperatorType::kRecUnkownType;
+    MS_LOG(INFO) << "Unknown operator type.";
+  } else {
+    NewOp.apply.op_type = DictOpType.at(op_type);
+  }
+
+  if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 4) {
+    NewOp.tensor_parm = MakeTensor(
+      ops[iter_ops]->outputs_tensor_info()[0].shape()[0], ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
+      ops[iter_ops]->outputs_tensor_info()[0].shape()[2], ops[iter_ops]->outputs_tensor_info()[0].shape()[3]);
+  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 2) {
+    NewOp.tensor_parm = Fill2DTensor(ops, iter_ops, NewOp);
+  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 1) {
+    NewOp.tensor_parm = MakeTensor(1, 1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
+  } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 0) {
+    NewOp.tensor_parm = MakeTensor(1, 1, 1, 1);
+  } else {
+    MS_LOG(ERROR) << "Tensor's shape is unknown.";
+  }
+
+  NewOp.apply = CompleteOperatorInputs(ops, iter_ops, NewOp);
+  return NewOp;
+}
+
+TensorParam Fill2DTensor(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
+                         Graph::NodeType NewTensor) {
+  if (NewTensor.apply.op_type == OperatorType::kRecMatMul) {
+    auto attrs = ops[iter_ops]->attrs();
+    bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
+    bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
+    if (transpose_a) {
+      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
+                                         ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
+    } else if (transpose_b) {
+      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
+                                         ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
+    } else {
+      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0],
+                                         ops[iter_ops]->outputs_tensor_info()[0].shape()[1]);
+    }
+  } else {
+    NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0],
+                                       ops[iter_ops]->outputs_tensor_info()[0].shape()[1]);
+  }
+  return NewTensor.tensor_parm;
+}
+
+OperatorRec CompleteOperatorInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
+                                   Graph::NodeType NewTensor) {
+  for (size_t iter_input_tensors = 0; iter_input_tensors < ops[iter_ops]->inputs_tensor_info().size();
+       iter_input_tensors++) {
+    if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 4) {
+      NewTensor.apply.arguments[iter_input_tensors] =
+        MakeTensor(ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
+                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
+                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[2],
+                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[3]);
+    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 2) {
+      NewTensor.apply.arguments[iter_input_tensors] = Complete2DInputs(ops, iter_ops, iter_input_tensors, NewTensor);
+    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 1) {
+      NewTensor.apply.arguments[iter_input_tensors] =
+        MakeTensor(1, 1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
+    } else if (ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape().size() == 0) {
+      NewTensor.apply.arguments[iter_input_tensors] = MakeTensor(1, 1, 1, 1);
+    } else {
+      MS_LOG(ERROR) << "Tensor's shape is unknown.";
+    }
+  }
+  return NewTensor.apply;
+}
+
+TensorParam Complete2DInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
+                             const size_t iter_input_tensors, Graph::NodeType NewTensor) {
+  if (NewTensor.apply.op_type == OperatorType::kRecMatMul) {
+    auto attrs = ops[iter_ops]->attrs();
+    bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
+    bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
+    if (transpose_a && (iter_input_tensors == 0)) {
+      NewTensor.apply.arguments[iter_input_tensors] =
+        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
+                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
+    } else if (transpose_b && (iter_input_tensors == 1)) {
+      NewTensor.apply.arguments[iter_input_tensors] =
+        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1],
+                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0]);
+    } else {
+      NewTensor.apply.arguments[iter_input_tensors] =
+        MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
+                   ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]);
+    }
+  } else {
+    NewTensor.apply.arguments[iter_input_tensors] =
+      MakeTensor(1, 1, ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[0],
+                 ops[iter_ops]->inputs_tensor_info()[iter_input_tensors].shape()[1]);
+  }
+  return NewTensor.apply.arguments[iter_input_tensors];
+}
+
+std::shared_ptr<Graph> ParseGraph(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
+                                  const std::vector<std::vector<std::string>> &input_tensor_names) {
+  std::shared_ptr<Graph> graph(new Graph);
+  if (ops.size() > SIZE_MAX / 2) {
+    MS_LOG(EXCEPTION) << "Total number of operators is bigger than " << SIZE_MAX / 2;
+  }
+
+  for (size_t iter_ops = 0; iter_ops < ops.size(); iter_ops++) {
+    Graph::NodeType NewOp = MakeNewOperator(ops, iter_ops);
+    graph->nodes.push_back(NewOp);
+  }
+  MakeEdge(input_tensor_names, graph);
+
+  return graph;
+}
+
+void MakeEdge(const std::vector<std::vector<std::string>> &input_tensor_names, std::shared_ptr<Graph> graph) {
+  for (size_t iter_i = 0; iter_i < input_tensor_names.size(); iter_i++) {
+    for (size_t iter_j = 1; iter_j < input_tensor_names[iter_i].size(); iter_j++) {
+      size_t head_node_index = GetIndexInInputTensorNames(input_tensor_names, input_tensor_names[iter_i][iter_j]);
+      if (head_node_index < SIZE_MAX / 2 && head_node_index != iter_i) {
+        graph->nodes[iter_i].node_in.push_back(head_node_index);
+        graph->nodes[head_node_index].node_out.push_back(iter_i);
+      }
+    }
+  }
+}
+
+size_t GetIndexInInputTensorNames(const std::vector<std::vector<std::string>> &input_tensor_name,
+                                  const std::string &input_name) {
+  for (size_t index = 0; index < input_tensor_name.size(); index++) {
+    if (input_tensor_name[index][0] == input_name) {
+      return index;
+    }
+  }
+  MS_LOG(INFO) << "Get index failed, using SIZE_MAX insted";
+  return SIZE_MAX;
+}
+}  // namespace parallel
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
index ae50ced418..17a8174dde 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
@@ -31,15 +31,23 @@ namespace parallel {
 const std::map<std::string, OperatorType> DictOpType{
   {MATMUL, OperatorType::kRecMatMul},
   {CONV2D, OperatorType::kRecConvolution},
+  {MAXPOOL, OperatorType::kRecPooling},
   {MAXPOOLV2, OperatorType::kRecPooling},
   {SIMPLE_MEAN, OperatorType::kRecPooling},
-  {TENSOR_ADD, OperatorType::kRecAdd},
+  {TENSOR_ADD, OperatorType::kRecTensorAdd},
   {RESHAPE, OperatorType::kRecReshape},
   {BIAS_ADD, OperatorType::kRecBiasAdd},
   {RELU, OperatorType::kRecReLU},
   {BATCH_NORM, OperatorType::kRecBatchNorm},
   {SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits},
-};
+  {ONEHOT, OperatorType::kRecOneHot},
+  {LOG, OperatorType::kRecLog},
+  {EXP, OperatorType::kRecExp},
+  {SUB, OperatorType::kRecSub},
+  {MUL, OperatorType::kRecMul},
+  {DIV, OperatorType::kRecDiv},
+  {SQUEEZE, OperatorType::kRecSqueeze},
+  {CAST, OperatorType::kRecCast}};
 
 const TensorParam MakeTensor(int n, int c, int h, int w);
 
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
index 81e0eaa2dd..a37bc16bfc 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
@@ -49,14 +49,14 @@ double GetWeights(const Graph::NodeType &node) {
     auto cost_ptr = std::make_shared<CostPooling>();
 
     return cost_ptr->GetMinCostIn();
-  } else if (op.op_type == OperatorType::kRecAdd) {
-    // For Add
-    auto cost_ptr = std::make_shared<CostAdd>();
+  } else if (op.op_type == OperatorType::kRecTensorAdd) {
+    // For TensorAdd
+    auto cost_ptr = std::make_shared<CostTensorAdd>();
 
     return cost_ptr->GetMinCostIn();
-  } else if (op.op_type == OperatorType::kRecSoftmax || op.op_type == OperatorType::kRecReLU ||
+  } else if (op.op_type == OperatorType::kRecReLU || op.op_type == OperatorType::kRecSoftmax ||
              op.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
-    // For Softmax & || Activation
+    // For Activation and Softmax
     auto cost_ptr = std::make_shared<CostCommon>();
 
     return cost_ptr->GetMinCostIn();
@@ -74,6 +74,15 @@ double GetWeights(const Graph::NodeType &node) {
     // For BatchNorm
     auto cost_ptr = std::make_shared<CostBatchNorm>();
 
+    return cost_ptr->GetMinCostIn();
+  } else if (op.op_type == OperatorType::kRecOneHot || op.op_type == OperatorType::kRecLog ||
+             op.op_type == OperatorType::kRecExp || op.op_type == OperatorType::kRecAdd ||
+             op.op_type == OperatorType::kRecSub || op.op_type == OperatorType::kRecMul ||
+             op.op_type == OperatorType::kRecDiv || op.op_type == OperatorType::kRecSqueeze ||
+             op.op_type == OperatorType::kRecCast) {
+    // For element-wise op
+    auto cost_ptr = std::make_shared<CostCommon>();
+
     return cost_ptr->GetMinCostIn();
   } else if (op.op_type == OperatorType::kRecUnkownType) {
     // For unknown type
@@ -118,47 +127,57 @@ StrategyRec PartitionNode(const Graph::NodeType &node,
                           std::shared_ptr<Graph> graph) {
   MS_EXCEPTION_IF_NULL(graph);
 
-  if (node.apply.op_type == 0) {
+  if (node.apply.op_type == OperatorType::kRecMatMul) {
     // For MatMul
     auto cost_ptr = std::make_shared<CostMatMul>();
 
     return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 1) {
+  } else if (node.apply.op_type == OperatorType::kRecConvolution) {
     // For Convolution
     auto cost_ptr = std::make_shared<CostConvolution>();
 
     return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 2) {
+  } else if (node.apply.op_type == OperatorType::kRecPooling) {
     // For Pooling
     auto cost_ptr = std::make_shared<CostPooling>();
 
     return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 3) {
-    // For Add
-    auto cost_ptr = std::make_shared<CostAdd>();
+  } else if (node.apply.op_type == OperatorType::kRecTensorAdd) {
+    // For TensorAdd
+    auto cost_ptr = std::make_shared<CostTensorAdd>();
 
     return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 4 || node.apply.op_type == 7 || node.apply.op_type == 9) {
+  } else if (node.apply.op_type == OperatorType::kRecReLU || node.apply.op_type == OperatorType::kRecSoftmax ||
+             node.apply.op_type == OperatorType::kRecSparseSoftmaxCrossEntropyWithLogits) {
     // For Softmax & Activation
     auto cost_ptr = std::make_shared<CostCommon>();
 
     return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 5) {
+  } else if (node.apply.op_type == OperatorType::kRecReshape) {
     // For Reshape
     auto cost_ptr = std::make_shared<CostReshape>();
 
     return cost_ptr->GetOptimalStr(node);
-  } else if (node.apply.op_type == 6) {
+  } else if (node.apply.op_type == OperatorType::kRecBiasAdd) {
     // For BiasAdd
     auto cost_ptr = std::make_shared<CostBiasAdd>();
 
     return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 8) {
+  } else if (node.apply.op_type == OperatorType::kRecBatchNorm) {
     // For BatchNorm
     auto cost_ptr = std::make_shared<CostBatchNorm>();
 
     return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
-  } else if (node.apply.op_type == 10) {
+  } else if (node.apply.op_type == OperatorType::kRecOneHot || node.apply.op_type == OperatorType::kRecLog ||
+             node.apply.op_type == OperatorType::kRecExp || node.apply.op_type == OperatorType::kRecAdd ||
+             node.apply.op_type == OperatorType::kRecSub || node.apply.op_type == OperatorType::kRecMul ||
+             node.apply.op_type == OperatorType::kRecDiv || node.apply.op_type == OperatorType::kRecSqueeze ||
+             node.apply.op_type == OperatorType::kRecCast) {
+    // For element-wise op
+    auto cost_ptr = std::make_shared<CostCommon>();
+
+    return cost_ptr->GetOptimalStr(node, node_name_to_strategy, *graph);
+  } else if (node.apply.op_type == OperatorType::kRecUnkownType) {
     // For unknown type
     StrategyRec default_strategy;
     return default_strategy;

From 78a4894e67a9024fe4b9de70ed6f7510d22ba171 Mon Sep 17 00:00:00 2001
From: zhoufeng <zhoufeng54@huawei.com>
Date: Tue, 28 Apr 2020 17:23:34 +0800
Subject: [PATCH 155/242] modify mindspore_gvar link order

---
 mindspore/ccsrc/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index 7fb42ab359..4c6ceb38e1 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -192,13 +192,13 @@ set_target_properties(_c_expression PROPERTIES INSTALL_RPATH ${ORIGIN_PATH})
 if (CMAKE_SYSTEM_NAME MATCHES "Windows")
     target_link_libraries(mindspore mindspore::pybind11_module)
     target_link_libraries(mindspore mindspore_gvar)
+    target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
 else ()
+    target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
     target_link_libraries(_c_expression PRIVATE mindspore::pybind11_module)
     target_link_libraries(_c_expression PRIVATE mindspore_gvar)
 endif ()
 
-target_link_libraries(_c_expression PRIVATE -Wl,--whole-archive mindspore -Wl,--no-whole-archive)
-
 if (USE_GLOG)
     target_link_libraries(_c_expression PRIVATE mindspore::glog)
 endif ()

From 7e00410a2f64d9e79e3993a699587cc871851b07 Mon Sep 17 00:00:00 2001
From: jonyguo <guozhijian@huawei.com>
Date: Mon, 13 Apr 2020 19:26:03 +0800
Subject: [PATCH 156/242] enhance: add example for zhwiki and CLUERNER2020 to
 mindrecord

---
 .../ImageNet_Similar_Perf}/README.md          |    24 +-
 .../imagenet/__init__.py                      |     0
 .../ImageNet_Similar_Perf}/imagenet/mr_api.py |     0
 .../ImageNet_Similar_Perf}/run_imagenet.sh    |     0
 .../ImageNet_Similar_Perf}/run_template.sh    |     0
 .../template/__init__.py                      |     0
 .../ImageNet_Similar_Perf}/template/mr_api.py |     0
 .../ImageNet_Similar_Perf}/writer.py          |     0
 .../nlp_to_mindrecord/CLUERNER2020/README.md  |    82 +
 .../CLUERNER2020/create_dataset.py            |    36 +
 .../CLUERNER2020/data/README.md               |     1 +
 .../CLUERNER2020/data_processor_seq.py        |   162 +
 .../CLUERNER2020/label2id.json                |    43 +
 example/nlp_to_mindrecord/CLUERNER2020/run.sh |    20 +
 .../CLUERNER2020/run_read.sh                  |    17 +
 .../CLUERNER2020/tokenization.py              |   388 +
 .../nlp_to_mindrecord/CLUERNER2020/vocab.txt  | 21128 ++++++++++++++++
 example/nlp_to_mindrecord/zhwiki/README.md    |   107 +
 .../zhwiki/create_dataset.py                  |    43 +
 .../zhwiki/create_pretraining_data.py         |   428 +
 example/nlp_to_mindrecord/zhwiki/run.sh       |    29 +
 example/nlp_to_mindrecord/zhwiki/run_read.sh  |    17 +
 .../nlp_to_mindrecord/zhwiki/sample_text.txt  |    33 +
 .../nlp_to_mindrecord/zhwiki/tokenization.py  |   394 +
 example/nlp_to_mindrecord/zhwiki/vocab.txt    | 21128 ++++++++++++++++
 25 files changed, 44073 insertions(+), 7 deletions(-)
 rename example/{convert_to_mindrecord => cv_to_mindrecord/ImageNet_Similar_Perf}/README.md (84%)
 rename example/{convert_to_mindrecord => cv_to_mindrecord/ImageNet_Similar_Perf}/imagenet/__init__.py (100%)
 rename example/{convert_to_mindrecord => cv_to_mindrecord/ImageNet_Similar_Perf}/imagenet/mr_api.py (100%)
 rename example/{convert_to_mindrecord => cv_to_mindrecord/ImageNet_Similar_Perf}/run_imagenet.sh (100%)
 rename example/{convert_to_mindrecord => cv_to_mindrecord/ImageNet_Similar_Perf}/run_template.sh (100%)
 rename example/{convert_to_mindrecord => cv_to_mindrecord/ImageNet_Similar_Perf}/template/__init__.py (100%)
 rename example/{convert_to_mindrecord => cv_to_mindrecord/ImageNet_Similar_Perf}/template/mr_api.py (100%)
 rename example/{convert_to_mindrecord => cv_to_mindrecord/ImageNet_Similar_Perf}/writer.py (100%)
 create mode 100644 example/nlp_to_mindrecord/CLUERNER2020/README.md
 create mode 100644 example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py
 create mode 100644 example/nlp_to_mindrecord/CLUERNER2020/data/README.md
 create mode 100644 example/nlp_to_mindrecord/CLUERNER2020/data_processor_seq.py
 create mode 100644 example/nlp_to_mindrecord/CLUERNER2020/label2id.json
 create mode 100644 example/nlp_to_mindrecord/CLUERNER2020/run.sh
 create mode 100644 example/nlp_to_mindrecord/CLUERNER2020/run_read.sh
 create mode 100644 example/nlp_to_mindrecord/CLUERNER2020/tokenization.py
 create mode 100644 example/nlp_to_mindrecord/CLUERNER2020/vocab.txt
 create mode 100644 example/nlp_to_mindrecord/zhwiki/README.md
 create mode 100644 example/nlp_to_mindrecord/zhwiki/create_dataset.py
 create mode 100644 example/nlp_to_mindrecord/zhwiki/create_pretraining_data.py
 create mode 100644 example/nlp_to_mindrecord/zhwiki/run.sh
 create mode 100644 example/nlp_to_mindrecord/zhwiki/run_read.sh
 create mode 100644 example/nlp_to_mindrecord/zhwiki/sample_text.txt
 create mode 100644 example/nlp_to_mindrecord/zhwiki/tokenization.py
 create mode 100644 example/nlp_to_mindrecord/zhwiki/vocab.txt

diff --git a/example/convert_to_mindrecord/README.md b/example/cv_to_mindrecord/ImageNet_Similar_Perf/README.md
similarity index 84%
rename from example/convert_to_mindrecord/README.md
rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/README.md
index 008cff5ee3..8bdcb9e25d 100644
--- a/example/convert_to_mindrecord/README.md
+++ b/example/cv_to_mindrecord/ImageNet_Similar_Perf/README.md
@@ -35,11 +35,20 @@ This example provides an efficient way to generate MindRecord. Users only need t
     n02110185 3 mouse
     n02096294 4 orange
     ```
+
 2. Edit run_imagenet.sh and modify the parameters
-3. Run the bash script  
+    ```
+    --mindrecord_file: output MindRecord file.
+    --mindrecord_partitions: the partitions for MindRecord.
+    --label_file: ImageNet label map file.
+    --image_dir: ImageNet dir which contain sub dir.
+    ```
+
+3. Run the bash script
     ```bash  
     bash run_imagenet.sh
     ```  
+
 4. Performance result
 
     |  Training Data |  General API | Current Example |  Env  |
@@ -47,29 +56,30 @@ This example provides an efficient way to generate MindRecord. Users only need t
     |ImageNet(140G)|  2h40m |  50m  |  CPU: Intel Xeon Gold 6130 x 64, Memory: 256G, Storage: HDD |
 
 ## How to use the example for other dataset
+
 ### Create work space
 
 Assume the dataset name is 'xyz'
 * Create work space from template
     ```shell
-    cd ${your_mindspore_home}/example/convert_to_mindrecord
+    cd ${your_mindspore_home}/example/cv_to_mindrecord/ImageNet_Similar_Perf
     cp -r template xyz
     ```
 
 ### Implement data generator
 
-Edit dictionary data generator  
+Edit dictionary data generator.
 * Edit file 
     ```shell
-    cd ${your_mindspore_home}/example/convert_to_mindrecord
+    cd ${your_mindspore_home}/example/cv_to_mindrecord/ImageNet_Similar_Perf
     vi xyz/mr_api.py
     ```
 
-Two API, 'mindrecord_task_number' and 'mindrecord_dict_data', must be implemented
+Two API, 'mindrecord_task_number' and 'mindrecord_dict_data', must be implemented.
 - 'mindrecord_task_number()' returns number of tasks. Return 1 if data row is generated serially. Return N if generator can be split into N parallel-run tasks.
 - 'mindrecord_dict_data(task_id)' yields dictionary data row by row. 'task_id' is 0..N-1, if N is return value of mindrecord_task_number()
 
-Tricky for parallel run
+Tricky for parallel run.
 - For ImageNet, one directory can be a task.
 - For TFRecord with multiple files, each file can be a task.
 - For TFRecord with 1 file only, it could also be split into N tasks. Task_id=K means: data row is picked only if (count % N == K) 
@@ -78,7 +88,7 @@ Tricky for parallel run
 
 * run python script 
     ```shell
-    cd ${your_mindspore_home}/example/convert_to_mindrecord
+    cd ${your_mindspore_home}/example/cv_to_mindrecord/ImageNet_Similar_Perf
     python writer.py --mindrecord_script xyz [...]
     ```
     > You can put this command in script **run_xyz.sh** for easy execution
diff --git a/example/convert_to_mindrecord/imagenet/__init__.py b/example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/__init__.py
similarity index 100%
rename from example/convert_to_mindrecord/imagenet/__init__.py
rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/__init__.py
diff --git a/example/convert_to_mindrecord/imagenet/mr_api.py b/example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py
similarity index 100%
rename from example/convert_to_mindrecord/imagenet/mr_api.py
rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py
diff --git a/example/convert_to_mindrecord/run_imagenet.sh b/example/cv_to_mindrecord/ImageNet_Similar_Perf/run_imagenet.sh
similarity index 100%
rename from example/convert_to_mindrecord/run_imagenet.sh
rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/run_imagenet.sh
diff --git a/example/convert_to_mindrecord/run_template.sh b/example/cv_to_mindrecord/ImageNet_Similar_Perf/run_template.sh
similarity index 100%
rename from example/convert_to_mindrecord/run_template.sh
rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/run_template.sh
diff --git a/example/convert_to_mindrecord/template/__init__.py b/example/cv_to_mindrecord/ImageNet_Similar_Perf/template/__init__.py
similarity index 100%
rename from example/convert_to_mindrecord/template/__init__.py
rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/template/__init__.py
diff --git a/example/convert_to_mindrecord/template/mr_api.py b/example/cv_to_mindrecord/ImageNet_Similar_Perf/template/mr_api.py
similarity index 100%
rename from example/convert_to_mindrecord/template/mr_api.py
rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/template/mr_api.py
diff --git a/example/convert_to_mindrecord/writer.py b/example/cv_to_mindrecord/ImageNet_Similar_Perf/writer.py
similarity index 100%
rename from example/convert_to_mindrecord/writer.py
rename to example/cv_to_mindrecord/ImageNet_Similar_Perf/writer.py
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/README.md b/example/nlp_to_mindrecord/CLUERNER2020/README.md
new file mode 100644
index 0000000000..7511b4ff88
--- /dev/null
+++ b/example/nlp_to_mindrecord/CLUERNER2020/README.md
@@ -0,0 +1,82 @@
+# Guideline to Convert Training Data CLUERNER2020 to MindRecord For Bert Fine Tuning
+
+<!-- TOC -->
+
+- [What does the example do](#what-does-the-example-do)
+- [How to use the example to process CLUERNER2020](#how-to-use-the-example-to-process-cluerner2020)
+    - [Download CLUERNER2020 and unzip](#download-cluerner2020-and-unzip)
+    - [Generate MindRecord](#generate-mindrecord)
+    - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord)
+
+
+<!-- /TOC -->
+
+## What does the example do
+
+This example is based on [CLUERNER2020](https://www.cluebenchmarks.com/introduce.html) training data, generating MindRecord file, and finally used for Bert Fine Tuning progress.
+
+1.  run.sh: generate MindRecord entry script
+    - data_processor_seq.py: the script from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version), we just change the part of the generated tfrecord to MindRecord.
+    - label2id.json: the file from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version).
+    - tokenization.py: the script from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version).
+    - vocab.txt: the file from [CLUEbenchmark/CLUENER2020](https://github.com/CLUEbenchmark/CLUENER2020/tree/master/tf_version).
+2.  run_read.py: create MindDataset by MindRecord entry script.
+    - create_dataset.py: use MindDataset to read MindRecord to generate dataset.
+3. data: the output directory for MindRecord.
+4. cluener_public: the CLUENER2020 training data.
+
+## How to use the example to process CLUERNER2020
+
+Download CLUERNER2020, convert it to MindRecord, use MindDataset to read MindRecord.
+
+### Download CLUERNER2020 and unzip
+
+1. Download the training data zip.
+    > [CLUERNER2020 dataset download address](https://www.cluebenchmarks.com/introduce.html) **-> 任务介绍 -> CLUENER 细粒度命名实体识别 -> cluener下载链接**
+
+2. Unzip the training data to dir example/nlp_to_mindrecord/CLUERNER2020/cluener_public.
+    ```
+    unzip -d {your-mindspore}/example/nlp_to_mindrecord/CLUERNER2020/cluener_public cluener_public.zip
+    ```
+
+### Generate MindRecord
+
+1. Run the run.sh script.
+    ```bash
+    bash run.sh
+    ```
+
+2. Output like this:
+    ```
+    ...
+    [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:12.498.235 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/train.mindrecord'], and the list of index files are: ['data/train.mindrecord.db']
+    ...
+    [INFO] ME(17603,python):2020-04-28-16:56:13.400.175 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    [INFO] ME(17603,python):2020-04-28-16:56:13.400.863 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    [INFO] ME(17603,python):2020-04-28-16:56:13.401.534 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    [INFO] ME(17603,python):2020-04-28-16:56:13.402.179 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    [INFO] ME(17603,python):2020-04-28-16:56:13.402.702 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    ...
+    [INFO] ME(17603:139620983514944,MainProcess):2020-04-28-16:56:13.431.208 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['data/dev.mindrecord'], and the list of index files are: ['data/dev.mindrecord.db']
+    ```
+
+### Create MindDataset By MindRecord
+
+1. Run the run_read.sh script.
+    ```bash
+    bash run_read.sh
+    ```
+
+2. Output like this:
+    ```
+    ...
+    example 1340: input_ids: [ 101 3173 1290 4852 7676 3949  122 3299  123  126 3189 4510 8020 6381 5442 7357 2590 3636 8021 7676 3949 4294 1166 6121 3124 1277 6121 3124 7270 2135 3295 5789 3326 123  126 3189 1355 6134 1093 1325 3173 2399 6590 6791 8024  102    0    0    0    0    0    0    0    0    0    0   0    0    0    0    0    0    0    0]
+    example 1340: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1  1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+    example 1340: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+    example 1340: label_ids: [ 0 18 19 20  2  4  0  0  0  0  0  0  0 34 36 26 27 28  0 34 35 35 35 35 35 35 35 35 35 36 26 27 28  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
+    example 1341: input_ids: [ 101 1728  711 4293 3868 1168 2190 2150 3791  934 3633 3428 4638 6237 7025 8024 3297 1400 5310 3362 6206 5023 5401 1744 3297 7770 3791 7368  976 1139 1104 2137  511 102    0    0    0    0    0    0    0    0   0    0    0    0    0    0    0    0    0    0    0    0    0    0   0    0    0    0    0    0    0    0]
+    example 1341: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+    example 1341: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
+   example 1341: label_ids: [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0 18 19 19 19 19 20  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
+    ...
+    ```
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py b/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py
new file mode 100644
index 0000000000..22914e985d
--- /dev/null
+++ b/example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py
@@ -0,0 +1,36 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""create MindDataset by MindRecord"""
+import mindspore.dataset as ds
+
+def create_dataset(data_file):
+    """create MindDataset"""
+    num_readers = 4
+    data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True)
+    index = 0
+    for item in data_set.create_dict_iterator():
+        # print("example {}: {}".format(index, item))
+        print("example {}: input_ids: {}".format(index, item['input_ids']))
+        print("example {}: input_mask: {}".format(index, item['input_mask']))
+        print("example {}: segment_ids: {}".format(index, item['segment_ids']))
+        print("example {}: label_ids: {}".format(index, item['label_ids']))
+        index += 1
+        if index % 1000 == 0:
+            print("read rows: {}".format(index))
+    print("total rows: {}".format(index))
+
+if __name__ == '__main__':
+    create_dataset('data/train.mindrecord')
+    create_dataset('data/dev.mindrecord')
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/data/README.md b/example/nlp_to_mindrecord/CLUERNER2020/data/README.md
new file mode 100644
index 0000000000..7904933f43
--- /dev/null
+++ b/example/nlp_to_mindrecord/CLUERNER2020/data/README.md
@@ -0,0 +1 @@
+## output dir
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/data_processor_seq.py b/example/nlp_to_mindrecord/CLUERNER2020/data_processor_seq.py
new file mode 100644
index 0000000000..e0b5ff6ac1
--- /dev/null
+++ b/example/nlp_to_mindrecord/CLUERNER2020/data_processor_seq.py
@@ -0,0 +1,162 @@
+#!/usr/bin/python
+# coding:utf8
+"""
+@author: Cong Yu
+@time: 2019-12-07 17:03
+"""
+import json
+import tokenization
+import collections
+
+import numpy as np
+from mindspore.mindrecord import FileWriter
+
+# pylint: skip-file
+
+def _truncate_seq_pair(tokens_a, tokens_b, max_length):
+    """Truncates a sequence pair in place to the maximum length."""
+
+    # This is a simple heuristic which will always truncate the longer sequence
+    # one token at a time. This makes more sense than truncating an equal percent
+    # of tokens from each, since if one sequence is very short then each token
+    # that's truncated likely contains more information than a longer sequence.
+    while True:
+        total_length = len(tokens_a) + len(tokens_b)
+        if total_length <= max_length:
+            break
+        if len(tokens_a) > len(tokens_b):
+            tokens_a.pop()
+        else:
+            tokens_b.pop()
+
+
+def process_one_example(tokenizer, label2id, text, label, max_seq_len=128):
+    # textlist = text.split(' ')
+    # labellist = label.split(' ')
+    textlist = list(text)
+    labellist = list(label)
+    tokens = []
+    labels = []
+    for i, word in enumerate(textlist):
+        token = tokenizer.tokenize(word)
+        tokens.extend(token)
+        label_1 = labellist[i]
+        for m in range(len(token)):
+            if m == 0:
+                labels.append(label_1)
+            else:
+                print("some unknown token...")
+                labels.append(labels[0])
+    # tokens = tokenizer.tokenize(example.text)  -2 的原因是因为序列需要加一个句首和句尾标志
+    if len(tokens) >= max_seq_len - 1:
+        tokens = tokens[0:(max_seq_len - 2)]
+        labels = labels[0:(max_seq_len - 2)]
+    ntokens = []
+    segment_ids = []
+    label_ids = []
+    ntokens.append("[CLS]")  # 句子开始设置CLS 标志
+    segment_ids.append(0)
+    # [CLS] [SEP] 可以为 他们构建标签，或者 统一到某个标签，反正他们是不变的，基本不参加训练 即：x-l 永远不变
+    label_ids.append(0)  # label2id["[CLS]"]
+    for i, token in enumerate(tokens):
+        ntokens.append(token)
+        segment_ids.append(0)
+        label_ids.append(label2id[labels[i]])
+    ntokens.append("[SEP]")
+    segment_ids.append(0)
+    # append("O") or append("[SEP]") not sure!
+    label_ids.append(0)  # label2id["[SEP]"]
+    input_ids = tokenizer.convert_tokens_to_ids(ntokens)
+    input_mask = [1] * len(input_ids)
+    while len(input_ids) < max_seq_len:
+        input_ids.append(0)
+        input_mask.append(0)
+        segment_ids.append(0)
+        label_ids.append(0)
+        ntokens.append("**NULL**")
+    assert len(input_ids) == max_seq_len
+    assert len(input_mask) == max_seq_len
+    assert len(segment_ids) == max_seq_len
+    assert len(label_ids) == max_seq_len
+
+    feature = (input_ids, input_mask, segment_ids, label_ids)
+    return feature
+
+
+def prepare_mindrecord_data(tokenizer, max_seq_len, label2id, path, out_path):
+    """
+        生成训练数据， *.mindrecord, 单标签分类模型, 随机打乱数据
+    """
+    writer = FileWriter(out_path)
+
+    data_schema = {"input_ids": {"type": "int64", "shape": [-1]},
+                   "input_mask": {"type": "int64", "shape": [-1]},
+                   "segment_ids": {"type": "int64", "shape": [-1]},
+                   "label_ids": {"type": "int64", "shape": [-1]}}
+    writer.add_schema(data_schema, "CLUENER2020 schema")
+
+    example_count = 0
+
+    for line in open(path):
+        if not line.strip():
+            continue
+        _ = json.loads(line.strip())
+        len_ = len(_["text"])
+        labels = ["O"] * len_
+        for k, v in _["label"].items():
+            for kk, vv in v.items():
+                for vvv in vv:
+                    span = vvv
+                    s = span[0]
+                    e = span[1] + 1
+                    # print(s, e)
+                    if e - s == 1:
+                        labels[s] = "S_" + k
+                    else:
+                        labels[s] = "B_" + k
+                        for i in range(s + 1, e - 1):
+                            labels[i] = "M_" + k
+                        labels[e - 1] = "E_" + k
+            # print()
+        # feature = process_one_example(tokenizer, label2id, row[column_name_x1], row[column_name_y],
+        #                               max_seq_len=max_seq_len)
+        feature = process_one_example(tokenizer, label2id, list(_["text"]), labels,
+                                      max_seq_len=max_seq_len)
+
+        features = collections.OrderedDict()
+        # 序列标注任务
+        features["input_ids"] = np.asarray(feature[0])
+        features["input_mask"] = np.asarray(feature[1])
+        features["segment_ids"] = np.asarray(feature[2])
+        features["label_ids"] = np.asarray(feature[3])
+        if example_count < 5:
+            print("*** Example ***")
+            print(_["text"])
+            print(_["label"])
+            print("input_ids: %s" % " ".join([str(x) for x in feature[0]]))
+            print("input_mask: %s" % " ".join([str(x) for x in feature[1]]))
+            print("segment_ids: %s" % " ".join([str(x) for x in feature[2]]))
+            print("label: %s " % " ".join([str(x) for x in feature[3]]))
+
+        writer.write_raw_data([features])
+        example_count += 1
+
+        # if example_count == 20:
+        #     break
+        if example_count % 3000 == 0:
+            print(example_count)
+    print("total example:", example_count)
+    writer.commit()
+
+
+if __name__ == "__main__":
+    vocab_file = "./vocab.txt"
+    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file)
+    label2id = json.loads(open("label2id.json").read())
+
+    max_seq_len = 64
+
+    prepare_mindrecord_data(tokenizer, max_seq_len, label2id, path="cluener_public/train.json",
+                           out_path="data/train.mindrecord")
+    prepare_mindrecord_data(tokenizer, max_seq_len, label2id, path="cluener_public/dev.json",
+                           out_path="data/dev.mindrecord")
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/label2id.json b/example/nlp_to_mindrecord/CLUERNER2020/label2id.json
new file mode 100644
index 0000000000..f296bcb28f
--- /dev/null
+++ b/example/nlp_to_mindrecord/CLUERNER2020/label2id.json
@@ -0,0 +1,43 @@
+{
+  "O": 0,
+  "S_address": 1,
+  "B_address": 2,
+  "M_address": 3,
+  "E_address": 4,
+  "S_book": 5,
+  "B_book": 6,
+  "M_book": 7,
+  "E_book": 8,
+  "S_company": 9,
+  "B_company": 10,
+  "M_company": 11,
+  "E_company": 12,
+  "S_game": 13,
+  "B_game": 14,
+  "M_game": 15,
+  "E_game": 16,
+  "S_government": 17,
+  "B_government": 18,
+  "M_government": 19,
+  "E_government": 20,
+  "S_movie": 21,
+  "B_movie": 22,
+  "M_movie": 23,
+  "E_movie": 24,
+  "S_name": 25,
+  "B_name": 26,
+  "M_name": 27,
+  "E_name": 28,
+  "S_organization": 29,
+  "B_organization": 30,
+  "M_organization": 31,
+  "E_organization": 32,
+  "S_position": 33,
+  "B_position": 34,
+  "M_position": 35,
+  "E_position": 36,
+  "S_scene": 37,
+  "B_scene": 38,
+  "M_scene": 39,
+  "E_scene": 40
+}
\ No newline at end of file
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/run.sh b/example/nlp_to_mindrecord/CLUERNER2020/run.sh
new file mode 100644
index 0000000000..0200b2e9d7
--- /dev/null
+++ b/example/nlp_to_mindrecord/CLUERNER2020/run.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+rm data/train.mindrecord*
+rm data/dev.mindrecord*
+
+python data_processor_seq.py
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh b/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh
new file mode 100644
index 0000000000..1ffe4de1cf
--- /dev/null
+++ b/example/nlp_to_mindrecord/CLUERNER2020/run_read.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+python create_dataset.py
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/tokenization.py b/example/nlp_to_mindrecord/CLUERNER2020/tokenization.py
new file mode 100644
index 0000000000..856021d6a9
--- /dev/null
+++ b/example/nlp_to_mindrecord/CLUERNER2020/tokenization.py
@@ -0,0 +1,388 @@
+"""Tokenization classes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+import unicodedata
+import six
+
+# pylint: skip-file
+
+def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
+    """Checks whether the casing config is consistent with the checkpoint name."""
+
+    # The casing has to be passed in by the user and there is no explicit check
+    # as to whether it matches the checkpoint. The casing information probably
+    # should have been stored in the bert_config.json file, but it's not, so
+    # we have to heuristically detect it to validate.
+
+    if not init_checkpoint:
+        return
+
+    m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint)
+    if m is None:
+        return
+
+    model_name = m.group(1)
+
+    lower_models = [
+        "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12",
+        "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12"
+    ]
+
+    cased_models = [
+        "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16",
+        "multi_cased_L-12_H-768_A-12"
+    ]
+
+    is_bad_config = False
+    if model_name in lower_models and not do_lower_case:
+        is_bad_config = True
+        actual_flag = "False"
+        case_name = "lowercased"
+        opposite_flag = "True"
+
+    if model_name in cased_models and do_lower_case:
+        is_bad_config = True
+        actual_flag = "True"
+        case_name = "cased"
+        opposite_flag = "False"
+
+    if is_bad_config:
+        raise ValueError(
+            "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. "
+            "However, `%s` seems to be a %s model, so you "
+            "should pass in `--do_lower_case=%s` so that the fine-tuning matches "
+            "how the model was pre-training. If this error is wrong, please "
+            "just comment out this check." % (actual_flag, init_checkpoint,
+                                              model_name, case_name, opposite_flag))
+
+
+def convert_to_unicode(text):
+    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
+    if six.PY3:
+        if isinstance(text, str):
+            return text
+        elif isinstance(text, bytes):
+            return text.decode("utf-8", "ignore")
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    elif six.PY2:
+        if isinstance(text, str):
+            return text.decode("utf-8", "ignore")
+        elif isinstance(text, unicode):
+            return text
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    else:
+        raise ValueError("Not running on Python2 or Python 3?")
+
+
+def printable_text(text):
+    """Returns text encoded in a way suitable for print or `tf.logging`."""
+
+    # These functions want `str` for both Python2 and Python3, but in one case
+    # it's a Unicode string and in the other it's a byte string.
+    if six.PY3:
+        if isinstance(text, str):
+            return text
+        elif isinstance(text, bytes):
+            return text.decode("utf-8", "ignore")
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    elif six.PY2:
+        if isinstance(text, str):
+            return text
+        elif isinstance(text, unicode):
+            return text.encode("utf-8")
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    else:
+        raise ValueError("Not running on Python2 or Python 3?")
+
+
+def load_vocab(vocab_file):
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    index = 0
+    with open(vocab_file, "r") as reader:
+        while True:
+            token = convert_to_unicode(reader.readline())
+            if not token:
+                break
+            token = token.strip()
+            vocab[token] = index
+            index += 1
+    return vocab
+
+
+def convert_by_vocab(vocab, items):
+    """Converts a sequence of [tokens|ids] using the vocab."""
+    output = []
+    for item in items:
+        if item in vocab:
+            output.append(vocab[item])
+        else:
+            output.append(vocab['[UNK]'])
+    return output
+
+
+def convert_tokens_to_ids(vocab, tokens):
+    return convert_by_vocab(vocab, tokens)
+
+
+def convert_ids_to_tokens(inv_vocab, ids):
+    return convert_by_vocab(inv_vocab, ids)
+
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a piece of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class FullTokenizer(object):
+    """Runs end-to-end tokenziation."""
+
+    def __init__(self, vocab_file, do_lower_case=True):
+        self.vocab = load_vocab(vocab_file)
+        self.inv_vocab = {v: k for k, v in self.vocab.items()}
+        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in self.basic_tokenizer.tokenize(text):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        return convert_by_vocab(self.vocab, tokens)
+
+    def convert_ids_to_tokens(self, ids):
+        return convert_by_vocab(self.inv_vocab, ids)
+
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self, do_lower_case=True):
+        """Constructs a BasicTokenizer.
+
+        Args:
+          do_lower_case: Whether to lower case the input.
+        """
+        self.do_lower_case = do_lower_case
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text."""
+        text = convert_to_unicode(text)
+        text = self._clean_text(text)
+
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text):
+        """Splits punctuation on a piece of text."""
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+                (cp >= 0x3400 and cp <= 0x4DBF) or  #
+                (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+                (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+                (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+                (cp >= 0x2B820 and cp <= 0x2CEAF) or
+                (cp >= 0xF900 and cp <= 0xFAFF) or  #
+                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenziation."""
+
+    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+          input = "unaffable"
+          output = ["un", "##aff", "##able"]
+
+        Args:
+          text: A single token or whitespace separated tokens. This should have
+            already been passed through `BasicTokenizer.
+
+        Returns:
+          A list of wordpiece tokens.
+        """
+
+        text = convert_to_unicode(text)
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start > 0:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
diff --git a/example/nlp_to_mindrecord/CLUERNER2020/vocab.txt b/example/nlp_to_mindrecord/CLUERNER2020/vocab.txt
new file mode 100644
index 0000000000..ca4f978103
--- /dev/null
+++ b/example/nlp_to_mindrecord/CLUERNER2020/vocab.txt
@@ -0,0 +1,21128 @@
+[PAD]
+[unused1]
+[unused2]
+[unused3]
+[unused4]
+[unused5]
+[unused6]
+[unused7]
+[unused8]
+[unused9]
+[unused10]
+[unused11]
+[unused12]
+[unused13]
+[unused14]
+[unused15]
+[unused16]
+[unused17]
+[unused18]
+[unused19]
+[unused20]
+[unused21]
+[unused22]
+[unused23]
+[unused24]
+[unused25]
+[unused26]
+[unused27]
+[unused28]
+[unused29]
+[unused30]
+[unused31]
+[unused32]
+[unused33]
+[unused34]
+[unused35]
+[unused36]
+[unused37]
+[unused38]
+[unused39]
+[unused40]
+[unused41]
+[unused42]
+[unused43]
+[unused44]
+[unused45]
+[unused46]
+[unused47]
+[unused48]
+[unused49]
+[unused50]
+[unused51]
+[unused52]
+[unused53]
+[unused54]
+[unused55]
+[unused56]
+[unused57]
+[unused58]
+[unused59]
+[unused60]
+[unused61]
+[unused62]
+[unused63]
+[unused64]
+[unused65]
+[unused66]
+[unused67]
+[unused68]
+[unused69]
+[unused70]
+[unused71]
+[unused72]
+[unused73]
+[unused74]
+[unused75]
+[unused76]
+[unused77]
+[unused78]
+[unused79]
+[unused80]
+[unused81]
+[unused82]
+[unused83]
+[unused84]
+[unused85]
+[unused86]
+[unused87]
+[unused88]
+[unused89]
+[unused90]
+[unused91]
+[unused92]
+[unused93]
+[unused94]
+[unused95]
+[unused96]
+[unused97]
+[unused98]
+[unused99]
+[UNK]
+[CLS]
+[SEP]
+[MASK]
+<S>
+<T>
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+£
+¤
+¥
+§
+©
+«
+®
+°
+±
+²
+³
+µ
+·
+¹
+º
+»
+¼
+×
+ß
+æ
+÷
+ø
+đ
+ŋ
+ɔ
+ə
+ɡ
+ʰ
+ˇ
+ˈ
+ˊ
+ˋ
+ˍ
+ː
+˙
+˚
+ˢ
+α
+β
+γ
+δ
+ε
+η
+θ
+ι
+κ
+λ
+μ
+ν
+ο
+π
+ρ
+ς
+σ
+τ
+υ
+φ
+χ
+ψ
+ω
+а
+б
+в
+г
+д
+е
+ж
+з
+и
+к
+л
+м
+н
+о
+п
+р
+с
+т
+у
+ф
+х
+ц
+ч
+ш
+ы
+ь
+я
+і
+ا
+ب
+ة
+ت
+د
+ر
+س
+ع
+ل
+م
+ن
+ه
+و
+ي
+۩
+ก
+ง
+น
+ม
+ย
+ร
+อ
+า
+เ
+๑
+་
+ღ
+ᄀ
+ᄁ
+ᄂ
+ᄃ
+ᄅ
+ᄆ
+ᄇ
+ᄈ
+ᄉ
+ᄋ
+ᄌ
+ᄎ
+ᄏ
+ᄐ
+ᄑ
+ᄒ
+ᅡ
+ᅢ
+ᅣ
+ᅥ
+ᅦ
+ᅧ
+ᅨ
+ᅩ
+ᅪ
+ᅬ
+ᅭ
+ᅮ
+ᅯ
+ᅲ
+ᅳ
+ᅴ
+ᅵ
+ᆨ
+ᆫ
+ᆯ
+ᆷ
+ᆸ
+ᆺ
+ᆻ
+ᆼ
+ᗜ
+ᵃ
+ᵉ
+ᵍ
+ᵏ
+ᵐ
+ᵒ
+ᵘ
+‖
+„
+†
+•
+‥
+‧
+ 
+‰
+′
+″
+‹
+›
+※
+‿
+⁄
+ⁱ
+⁺
+ⁿ
+₁
+₂
+₃
+₄
+€
+℃
+№
+™
+ⅰ
+ⅱ
+ⅲ
+ⅳ
+ⅴ
+←
+↑
+→
+↓
+↔
+↗
+↘
+⇒
+∀
+−
+∕
+∙
+√
+∞
+∟
+∠
+∣
+∥
+∩
+∮
+∶
+∼
+∽
+≈
+≒
+≡
+≤
+≥
+≦
+≧
+≪
+≫
+⊙
+⋅
+⋈
+⋯
+⌒
+①
+②
+③
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+⑴
+⑵
+⑶
+⑷
+⑸
+⒈
+⒉
+⒊
+⒋
+ⓒ
+ⓔ
+ⓘ
+─
+━
+│
+┃
+┅
+┆
+┊
+┌
+└
+├
+┣
+═
+║
+╚
+╞
+╠
+╭
+╮
+╯
+╰
+╱
+╳
+▂
+▃
+▅
+▇
+█
+▉
+▋
+▌
+▍
+▎
+■
+□
+▪
+▫
+▬
+▲
+△
+▶
+►
+▼
+▽
+◆
+◇
+○
+◎
+●
+◕
+◠
+◢
+◤
+☀
+★
+☆
+☕
+☞
+☺
+☼
+♀
+♂
+♠
+♡
+♣
+♥
+♦
+♪
+♫
+♬
+✈
+✔
+✕
+✖
+✦
+✨
+✪
+✰
+✿
+❀
+❤
+➜
+➤
+⦿
+、
+。
+〃
+々
+〇
+〈
+〉
+《
+》
+「
+」
+『
+』
+【
+】
+〓
+〔
+〕
+〖
+〗
+〜
+〝
+〞
+ぁ
+あ
+ぃ
+い
+う
+ぇ
+え
+お
+か
+き
+く
+け
+こ
+さ
+し
+す
+せ
+そ
+た
+ち
+っ
+つ
+て
+と
+な
+に
+ぬ
+ね
+の
+は
+ひ
+ふ
+へ
+ほ
+ま
+み
+む
+め
+も
+ゃ
+や
+ゅ
+ゆ
+ょ
+よ
+ら
+り
+る
+れ
+ろ
+わ
+を
+ん
+゜
+ゝ
+ァ
+ア
+ィ
+イ
+ゥ
+ウ
+ェ
+エ
+ォ
+オ
+カ
+キ
+ク
+ケ
+コ
+サ
+シ
+ス
+セ
+ソ
+タ
+チ
+ッ
+ツ
+テ
+ト
+ナ
+ニ
+ヌ
+ネ
+ノ
+ハ
+ヒ
+フ
+ヘ
+ホ
+マ
+ミ
+ム
+メ
+モ
+ャ
+ヤ
+ュ
+ユ
+ョ
+ヨ
+ラ
+リ
+ル
+レ
+ロ
+ワ
+ヲ
+ン
+ヶ
+・
+ー
+ヽ
+ㄅ
+ㄆ
+ㄇ
+ㄉ
+ㄋ
+ㄌ
+ㄍ
+ㄎ
+ㄏ
+ㄒ
+ㄚ
+ㄛ
+ㄞ
+ㄟ
+ㄢ
+ㄤ
+ㄥ
+ㄧ
+ㄨ
+ㆍ
+㈦
+㊣
+㎡
+㗎
+一
+丁
+七
+万
+丈
+三
+上
+下
+不
+与
+丐
+丑
+专
+且
+丕
+世
+丘
+丙
+业
+丛
+东
+丝
+丞
+丟
+両
+丢
+两
+严
+並
+丧
+丨
+个
+丫
+中
+丰
+串
+临
+丶
+丸
+丹
+为
+主
+丼
+丽
+举
+丿
+乂
+乃
+久
+么
+义
+之
+乌
+乍
+乎
+乏
+乐
+乒
+乓
+乔
+乖
+乗
+乘
+乙
+乜
+九
+乞
+也
+习
+乡
+书
+乩
+买
+乱
+乳
+乾
+亀
+亂
+了
+予
+争
+事
+二
+于
+亏
+云
+互
+五
+井
+亘
+亙
+亚
+些
+亜
+亞
+亟
+亡
+亢
+交
+亥
+亦
+产
+亨
+亩
+享
+京
+亭
+亮
+亲
+亳
+亵
+人
+亿
+什
+仁
+仃
+仄
+仅
+仆
+仇
+今
+介
+仍
+从
+仏
+仑
+仓
+仔
+仕
+他
+仗
+付
+仙
+仝
+仞
+仟
+代
+令
+以
+仨
+仪
+们
+仮
+仰
+仲
+件
+价
+任
+份
+仿
+企
+伉
+伊
+伍
+伎
+伏
+伐
+休
+伕
+众
+优
+伙
+会
+伝
+伞
+伟
+传
+伢
+伤
+伦
+伪
+伫
+伯
+估
+伴
+伶
+伸
+伺
+似
+伽
+佃
+但
+佇
+佈
+位
+低
+住
+佐
+佑
+体
+佔
+何
+佗
+佘
+余
+佚
+佛
+作
+佝
+佞
+佟
+你
+佢
+佣
+佤
+佥
+佩
+佬
+佯
+佰
+佳
+併
+佶
+佻
+佼
+使
+侃
+侄
+來
+侈
+例
+侍
+侏
+侑
+侖
+侗
+供
+依
+侠
+価
+侣
+侥
+侦
+侧
+侨
+侬
+侮
+侯
+侵
+侶
+侷
+便
+係
+促
+俄
+俊
+俎
+俏
+俐
+俑
+俗
+俘
+俚
+保
+俞
+俟
+俠
+信
+俨
+俩
+俪
+俬
+俭
+修
+俯
+俱
+俳
+俸
+俺
+俾
+倆
+倉
+個
+倌
+倍
+倏
+們
+倒
+倔
+倖
+倘
+候
+倚
+倜
+借
+倡
+値
+倦
+倩
+倪
+倫
+倬
+倭
+倶
+债
+值
+倾
+偃
+假
+偈
+偉
+偌
+偎
+偏
+偕
+做
+停
+健
+側
+偵
+偶
+偷
+偻
+偽
+偿
+傀
+傅
+傍
+傑
+傘
+備
+傚
+傢
+傣
+傥
+储
+傩
+催
+傭
+傲
+傳
+債
+傷
+傻
+傾
+僅
+働
+像
+僑
+僕
+僖
+僚
+僥
+僧
+僭
+僮
+僱
+僵
+價
+僻
+儀
+儂
+億
+儆
+儉
+儋
+儒
+儕
+儘
+償
+儡
+優
+儲
+儷
+儼
+儿
+兀
+允
+元
+兄
+充
+兆
+兇
+先
+光
+克
+兌
+免
+児
+兑
+兒
+兔
+兖
+党
+兜
+兢
+入
+內
+全
+兩
+八
+公
+六
+兮
+兰
+共
+兲
+关
+兴
+兵
+其
+具
+典
+兹
+养
+兼
+兽
+冀
+内
+円
+冇
+冈
+冉
+冊
+册
+再
+冏
+冒
+冕
+冗
+写
+军
+农
+冠
+冢
+冤
+冥
+冨
+冪
+冬
+冯
+冰
+冲
+决
+况
+冶
+冷
+冻
+冼
+冽
+冾
+净
+凄
+准
+凇
+凈
+凉
+凋
+凌
+凍
+减
+凑
+凛
+凜
+凝
+几
+凡
+凤
+処
+凪
+凭
+凯
+凰
+凱
+凳
+凶
+凸
+凹
+出
+击
+函
+凿
+刀
+刁
+刃
+分
+切
+刈
+刊
+刍
+刎
+刑
+划
+列
+刘
+则
+刚
+创
+初
+删
+判
+別
+刨
+利
+刪
+别
+刮
+到
+制
+刷
+券
+刹
+刺
+刻
+刽
+剁
+剂
+剃
+則
+剉
+削
+剋
+剌
+前
+剎
+剐
+剑
+剔
+剖
+剛
+剜
+剝
+剣
+剤
+剥
+剧
+剩
+剪
+副
+割
+創
+剷
+剽
+剿
+劃
+劇
+劈
+劉
+劊
+劍
+劏
+劑
+力
+劝
+办
+功
+加
+务
+劣
+动
+助
+努
+劫
+劭
+励
+劲
+劳
+労
+劵
+効
+劾
+势
+勁
+勃
+勇
+勉
+勋
+勐
+勒
+動
+勖
+勘
+務
+勛
+勝
+勞
+募
+勢
+勤
+勧
+勳
+勵
+勸
+勺
+勻
+勾
+勿
+匀
+包
+匆
+匈
+匍
+匐
+匕
+化
+北
+匙
+匝
+匠
+匡
+匣
+匪
+匮
+匯
+匱
+匹
+区
+医
+匾
+匿
+區
+十
+千
+卅
+升
+午
+卉
+半
+卍
+华
+协
+卑
+卒
+卓
+協
+单
+卖
+南
+単
+博
+卜
+卞
+卟
+占
+卡
+卢
+卤
+卦
+卧
+卫
+卮
+卯
+印
+危
+即
+却
+卵
+卷
+卸
+卻
+卿
+厂
+厄
+厅
+历
+厉
+压
+厌
+厕
+厘
+厚
+厝
+原
+厢
+厥
+厦
+厨
+厩
+厭
+厮
+厲
+厳
+去
+县
+叁
+参
+參
+又
+叉
+及
+友
+双
+反
+収
+发
+叔
+取
+受
+变
+叙
+叛
+叟
+叠
+叡
+叢
+口
+古
+句
+另
+叨
+叩
+只
+叫
+召
+叭
+叮
+可
+台
+叱
+史
+右
+叵
+叶
+号
+司
+叹
+叻
+叼
+叽
+吁
+吃
+各
+吆
+合
+吉
+吊
+吋
+同
+名
+后
+吏
+吐
+向
+吒
+吓
+吕
+吖
+吗
+君
+吝
+吞
+吟
+吠
+吡
+否
+吧
+吨
+吩
+含
+听
+吭
+吮
+启
+吱
+吳
+吴
+吵
+吶
+吸
+吹
+吻
+吼
+吽
+吾
+呀
+呂
+呃
+呆
+呈
+告
+呋
+呎
+呐
+呓
+呕
+呗
+员
+呛
+呜
+呢
+呤
+呦
+周
+呱
+呲
+味
+呵
+呷
+呸
+呻
+呼
+命
+咀
+咁
+咂
+咄
+咆
+咋
+和
+咎
+咏
+咐
+咒
+咔
+咕
+咖
+咗
+咘
+咙
+咚
+咛
+咣
+咤
+咦
+咧
+咨
+咩
+咪
+咫
+咬
+咭
+咯
+咱
+咲
+咳
+咸
+咻
+咽
+咿
+哀
+品
+哂
+哄
+哆
+哇
+哈
+哉
+哋
+哌
+响
+哎
+哏
+哐
+哑
+哒
+哔
+哗
+哟
+員
+哥
+哦
+哧
+哨
+哩
+哪
+哭
+哮
+哲
+哺
+哼
+哽
+唁
+唄
+唆
+唇
+唉
+唏
+唐
+唑
+唔
+唠
+唤
+唧
+唬
+售
+唯
+唰
+唱
+唳
+唷
+唸
+唾
+啃
+啄
+商
+啉
+啊
+問
+啓
+啕
+啖
+啜
+啞
+啟
+啡
+啤
+啥
+啦
+啧
+啪
+啫
+啬
+啮
+啰
+啱
+啲
+啵
+啶
+啷
+啸
+啻
+啼
+啾
+喀
+喂
+喃
+善
+喆
+喇
+喉
+喊
+喋
+喎
+喏
+喔
+喘
+喙
+喚
+喜
+喝
+喟
+喧
+喪
+喫
+喬
+單
+喰
+喱
+喲
+喳
+喵
+営
+喷
+喹
+喺
+喻
+喽
+嗅
+嗆
+嗇
+嗎
+嗑
+嗒
+嗓
+嗔
+嗖
+嗚
+嗜
+嗝
+嗟
+嗡
+嗣
+嗤
+嗦
+嗨
+嗪
+嗬
+嗯
+嗰
+嗲
+嗳
+嗶
+嗷
+嗽
+嘀
+嘅
+嘆
+嘈
+嘉
+嘌
+嘍
+嘎
+嘔
+嘖
+嘗
+嘘
+嘚
+嘛
+嘜
+嘞
+嘟
+嘢
+嘣
+嘤
+嘧
+嘩
+嘭
+嘮
+嘯
+嘰
+嘱
+嘲
+嘴
+嘶
+嘸
+嘹
+嘻
+嘿
+噁
+噌
+噎
+噓
+噔
+噗
+噙
+噜
+噠
+噢
+噤
+器
+噩
+噪
+噬
+噱
+噴
+噶
+噸
+噹
+噻
+噼
+嚀
+嚇
+嚎
+嚏
+嚐
+嚓
+嚕
+嚟
+嚣
+嚥
+嚨
+嚮
+嚴
+嚷
+嚼
+囂
+囉
+囊
+囍
+囑
+囔
+囗
+囚
+四
+囝
+回
+囟
+因
+囡
+团
+団
+囤
+囧
+囪
+囫
+园
+困
+囱
+囲
+図
+围
+囹
+固
+国
+图
+囿
+圃
+圄
+圆
+圈
+國
+圍
+圏
+園
+圓
+圖
+團
+圜
+土
+圣
+圧
+在
+圩
+圭
+地
+圳
+场
+圻
+圾
+址
+坂
+均
+坊
+坍
+坎
+坏
+坐
+坑
+块
+坚
+坛
+坝
+坞
+坟
+坠
+坡
+坤
+坦
+坨
+坪
+坯
+坳
+坵
+坷
+垂
+垃
+垄
+型
+垒
+垚
+垛
+垠
+垢
+垣
+垦
+垩
+垫
+垭
+垮
+垵
+埂
+埃
+埋
+城
+埔
+埕
+埗
+域
+埠
+埤
+埵
+執
+埸
+培
+基
+埼
+堀
+堂
+堃
+堅
+堆
+堇
+堑
+堕
+堙
+堡
+堤
+堪
+堯
+堰
+報
+場
+堵
+堺
+堿
+塊
+塌
+塑
+塔
+塗
+塘
+塚
+塞
+塢
+塩
+填
+塬
+塭
+塵
+塾
+墀
+境
+墅
+墉
+墊
+墒
+墓
+増
+墘
+墙
+墜
+增
+墟
+墨
+墩
+墮
+墳
+墻
+墾
+壁
+壅
+壆
+壇
+壊
+壑
+壓
+壕
+壘
+壞
+壟
+壢
+壤
+壩
+士
+壬
+壮
+壯
+声
+売
+壳
+壶
+壹
+壺
+壽
+处
+备
+変
+复
+夏
+夔
+夕
+外
+夙
+多
+夜
+够
+夠
+夢
+夥
+大
+天
+太
+夫
+夭
+央
+夯
+失
+头
+夷
+夸
+夹
+夺
+夾
+奂
+奄
+奇
+奈
+奉
+奋
+奎
+奏
+奐
+契
+奔
+奕
+奖
+套
+奘
+奚
+奠
+奢
+奥
+奧
+奪
+奬
+奮
+女
+奴
+奶
+奸
+她
+好
+如
+妃
+妄
+妆
+妇
+妈
+妊
+妍
+妒
+妓
+妖
+妘
+妙
+妝
+妞
+妣
+妤
+妥
+妨
+妩
+妪
+妮
+妲
+妳
+妹
+妻
+妾
+姆
+姉
+姊
+始
+姍
+姐
+姑
+姒
+姓
+委
+姗
+姚
+姜
+姝
+姣
+姥
+姦
+姨
+姪
+姫
+姬
+姹
+姻
+姿
+威
+娃
+娄
+娅
+娆
+娇
+娉
+娑
+娓
+娘
+娛
+娜
+娟
+娠
+娣
+娥
+娩
+娱
+娲
+娴
+娶
+娼
+婀
+婁
+婆
+婉
+婊
+婕
+婚
+婢
+婦
+婧
+婪
+婭
+婴
+婵
+婶
+婷
+婺
+婿
+媒
+媚
+媛
+媞
+媧
+媲
+媳
+媽
+媾
+嫁
+嫂
+嫉
+嫌
+嫑
+嫔
+嫖
+嫘
+嫚
+嫡
+嫣
+嫦
+嫩
+嫲
+嫵
+嫻
+嬅
+嬉
+嬌
+嬗
+嬛
+嬢
+嬤
+嬪
+嬰
+嬴
+嬷
+嬸
+嬿
+孀
+孃
+子
+孑
+孔
+孕
+孖
+字
+存
+孙
+孚
+孛
+孜
+孝
+孟
+孢
+季
+孤
+学
+孩
+孪
+孫
+孬
+孰
+孱
+孳
+孵
+學
+孺
+孽
+孿
+宁
+它
+宅
+宇
+守
+安
+宋
+完
+宏
+宓
+宕
+宗
+官
+宙
+定
+宛
+宜
+宝
+实
+実
+宠
+审
+客
+宣
+室
+宥
+宦
+宪
+宫
+宮
+宰
+害
+宴
+宵
+家
+宸
+容
+宽
+宾
+宿
+寂
+寄
+寅
+密
+寇
+富
+寐
+寒
+寓
+寛
+寝
+寞
+察
+寡
+寢
+寥
+實
+寧
+寨
+審
+寫
+寬
+寮
+寰
+寵
+寶
+寸
+对
+寺
+寻
+导
+対
+寿
+封
+専
+射
+将
+將
+專
+尉
+尊
+尋
+對
+導
+小
+少
+尔
+尕
+尖
+尘
+尚
+尝
+尤
+尧
+尬
+就
+尴
+尷
+尸
+尹
+尺
+尻
+尼
+尽
+尾
+尿
+局
+屁
+层
+屄
+居
+屆
+屈
+屉
+届
+屋
+屌
+屍
+屎
+屏
+屐
+屑
+展
+屜
+属
+屠
+屡
+屢
+層
+履
+屬
+屯
+山
+屹
+屿
+岀
+岁
+岂
+岌
+岐
+岑
+岔
+岖
+岗
+岘
+岙
+岚
+岛
+岡
+岩
+岫
+岬
+岭
+岱
+岳
+岷
+岸
+峇
+峋
+峒
+峙
+峡
+峤
+峥
+峦
+峨
+峪
+峭
+峯
+峰
+峴
+島
+峻
+峽
+崁
+崂
+崆
+崇
+崎
+崑
+崔
+崖
+崗
+崙
+崛
+崧
+崩
+崭
+崴
+崽
+嵇
+嵊
+嵋
+嵌
+嵐
+嵘
+嵩
+嵬
+嵯
+嶂
+嶄
+嶇
+嶋
+嶙
+嶺
+嶼
+嶽
+巅
+巍
+巒
+巔
+巖
+川
+州
+巡
+巢
+工
+左
+巧
+巨
+巩
+巫
+差
+己
+已
+巳
+巴
+巷
+巻
+巽
+巾
+巿
+币
+市
+布
+帅
+帆
+师
+希
+帐
+帑
+帕
+帖
+帘
+帚
+帛
+帜
+帝
+帥
+带
+帧
+師
+席
+帮
+帯
+帰
+帳
+帶
+帷
+常
+帼
+帽
+幀
+幂
+幄
+幅
+幌
+幔
+幕
+幟
+幡
+幢
+幣
+幫
+干
+平
+年
+并
+幸
+幹
+幺
+幻
+幼
+幽
+幾
+广
+庁
+広
+庄
+庆
+庇
+床
+序
+庐
+库
+应
+底
+庖
+店
+庙
+庚
+府
+庞
+废
+庠
+度
+座
+庫
+庭
+庵
+庶
+康
+庸
+庹
+庾
+廁
+廂
+廃
+廈
+廉
+廊
+廓
+廖
+廚
+廝
+廟
+廠
+廢
+廣
+廬
+廳
+延
+廷
+建
+廿
+开
+弁
+异
+弃
+弄
+弈
+弊
+弋
+式
+弑
+弒
+弓
+弔
+引
+弗
+弘
+弛
+弟
+张
+弥
+弦
+弧
+弩
+弭
+弯
+弱
+張
+強
+弹
+强
+弼
+弾
+彅
+彆
+彈
+彌
+彎
+归
+当
+录
+彗
+彙
+彝
+形
+彤
+彥
+彦
+彧
+彩
+彪
+彫
+彬
+彭
+彰
+影
+彷
+役
+彻
+彼
+彿
+往
+征
+径
+待
+徇
+很
+徉
+徊
+律
+後
+徐
+徑
+徒
+従
+徕
+得
+徘
+徙
+徜
+從
+徠
+御
+徨
+復
+循
+徬
+微
+徳
+徴
+徵
+德
+徹
+徼
+徽
+心
+必
+忆
+忌
+忍
+忏
+忐
+忑
+忒
+忖
+志
+忘
+忙
+応
+忠
+忡
+忤
+忧
+忪
+快
+忱
+念
+忻
+忽
+忿
+怀
+态
+怂
+怅
+怆
+怎
+怏
+怒
+怔
+怕
+怖
+怙
+怜
+思
+怠
+怡
+急
+怦
+性
+怨
+怪
+怯
+怵
+总
+怼
+恁
+恃
+恆
+恋
+恍
+恐
+恒
+恕
+恙
+恚
+恢
+恣
+恤
+恥
+恨
+恩
+恪
+恫
+恬
+恭
+息
+恰
+恳
+恵
+恶
+恸
+恺
+恻
+恼
+恿
+悄
+悅
+悉
+悌
+悍
+悔
+悖
+悚
+悟
+悠
+患
+悦
+您
+悩
+悪
+悬
+悯
+悱
+悲
+悴
+悵
+悶
+悸
+悻
+悼
+悽
+情
+惆
+惇
+惊
+惋
+惑
+惕
+惘
+惚
+惜
+惟
+惠
+惡
+惦
+惧
+惨
+惩
+惫
+惬
+惭
+惮
+惯
+惰
+惱
+想
+惴
+惶
+惹
+惺
+愁
+愆
+愈
+愉
+愍
+意
+愕
+愚
+愛
+愜
+感
+愣
+愤
+愧
+愫
+愷
+愿
+慄
+慈
+態
+慌
+慎
+慑
+慕
+慘
+慚
+慟
+慢
+慣
+慧
+慨
+慫
+慮
+慰
+慳
+慵
+慶
+慷
+慾
+憂
+憊
+憋
+憎
+憐
+憑
+憔
+憚
+憤
+憧
+憨
+憩
+憫
+憬
+憲
+憶
+憾
+懂
+懇
+懈
+應
+懊
+懋
+懑
+懒
+懦
+懲
+懵
+懶
+懷
+懸
+懺
+懼
+懾
+懿
+戀
+戈
+戊
+戌
+戍
+戎
+戏
+成
+我
+戒
+戕
+或
+战
+戚
+戛
+戟
+戡
+戦
+截
+戬
+戮
+戰
+戲
+戳
+戴
+戶
+户
+戸
+戻
+戾
+房
+所
+扁
+扇
+扈
+扉
+手
+才
+扎
+扑
+扒
+打
+扔
+払
+托
+扛
+扣
+扦
+执
+扩
+扪
+扫
+扬
+扭
+扮
+扯
+扰
+扱
+扳
+扶
+批
+扼
+找
+承
+技
+抄
+抉
+把
+抑
+抒
+抓
+投
+抖
+抗
+折
+抚
+抛
+抜
+択
+抟
+抠
+抡
+抢
+护
+报
+抨
+披
+抬
+抱
+抵
+抹
+押
+抽
+抿
+拂
+拄
+担
+拆
+拇
+拈
+拉
+拋
+拌
+拍
+拎
+拐
+拒
+拓
+拔
+拖
+拗
+拘
+拙
+拚
+招
+拜
+拟
+拡
+拢
+拣
+拥
+拦
+拧
+拨
+择
+括
+拭
+拮
+拯
+拱
+拳
+拴
+拷
+拼
+拽
+拾
+拿
+持
+挂
+指
+挈
+按
+挎
+挑
+挖
+挙
+挚
+挛
+挝
+挞
+挟
+挠
+挡
+挣
+挤
+挥
+挨
+挪
+挫
+振
+挲
+挹
+挺
+挽
+挾
+捂
+捅
+捆
+捉
+捋
+捌
+捍
+捎
+捏
+捐
+捕
+捞
+损
+捡
+换
+捣
+捧
+捨
+捩
+据
+捱
+捲
+捶
+捷
+捺
+捻
+掀
+掂
+掃
+掇
+授
+掉
+掌
+掏
+掐
+排
+掖
+掘
+掙
+掛
+掠
+採
+探
+掣
+接
+控
+推
+掩
+措
+掬
+掰
+掲
+掳
+掴
+掷
+掸
+掺
+揀
+揃
+揄
+揆
+揉
+揍
+描
+提
+插
+揖
+揚
+換
+握
+揣
+揩
+揪
+揭
+揮
+援
+揶
+揸
+揹
+揽
+搀
+搁
+搂
+搅
+損
+搏
+搐
+搓
+搔
+搖
+搗
+搜
+搞
+搡
+搪
+搬
+搭
+搵
+搶
+携
+搽
+摀
+摁
+摄
+摆
+摇
+摈
+摊
+摒
+摔
+摘
+摞
+摟
+摧
+摩
+摯
+摳
+摸
+摹
+摺
+摻
+撂
+撃
+撅
+撇
+撈
+撐
+撑
+撒
+撓
+撕
+撚
+撞
+撤
+撥
+撩
+撫
+撬
+播
+撮
+撰
+撲
+撵
+撷
+撸
+撻
+撼
+撿
+擀
+擁
+擂
+擄
+擅
+擇
+擊
+擋
+操
+擎
+擒
+擔
+擘
+據
+擞
+擠
+擡
+擢
+擦
+擬
+擰
+擱
+擲
+擴
+擷
+擺
+擼
+擾
+攀
+攏
+攒
+攔
+攘
+攙
+攜
+攝
+攞
+攢
+攣
+攤
+攥
+攪
+攫
+攬
+支
+收
+攸
+改
+攻
+放
+政
+故
+效
+敌
+敍
+敎
+敏
+救
+敕
+敖
+敗
+敘
+教
+敛
+敝
+敞
+敢
+散
+敦
+敬
+数
+敲
+整
+敵
+敷
+數
+斂
+斃
+文
+斋
+斌
+斎
+斐
+斑
+斓
+斗
+料
+斛
+斜
+斟
+斡
+斤
+斥
+斧
+斩
+斫
+斬
+断
+斯
+新
+斷
+方
+於
+施
+旁
+旃
+旅
+旋
+旌
+旎
+族
+旖
+旗
+无
+既
+日
+旦
+旧
+旨
+早
+旬
+旭
+旮
+旱
+时
+旷
+旺
+旻
+昀
+昂
+昆
+昇
+昉
+昊
+昌
+明
+昏
+易
+昔
+昕
+昙
+星
+映
+春
+昧
+昨
+昭
+是
+昱
+昴
+昵
+昶
+昼
+显
+晁
+時
+晃
+晉
+晋
+晌
+晏
+晒
+晓
+晔
+晕
+晖
+晗
+晚
+晝
+晞
+晟
+晤
+晦
+晨
+晩
+普
+景
+晰
+晴
+晶
+晷
+智
+晾
+暂
+暄
+暇
+暈
+暉
+暌
+暐
+暑
+暖
+暗
+暝
+暢
+暧
+暨
+暫
+暮
+暱
+暴
+暸
+暹
+曄
+曆
+曇
+曉
+曖
+曙
+曜
+曝
+曠
+曦
+曬
+曰
+曲
+曳
+更
+書
+曹
+曼
+曾
+替
+最
+會
+月
+有
+朋
+服
+朐
+朔
+朕
+朗
+望
+朝
+期
+朦
+朧
+木
+未
+末
+本
+札
+朮
+术
+朱
+朴
+朵
+机
+朽
+杀
+杂
+权
+杆
+杈
+杉
+李
+杏
+材
+村
+杓
+杖
+杜
+杞
+束
+杠
+条
+来
+杨
+杭
+杯
+杰
+東
+杳
+杵
+杷
+杼
+松
+板
+极
+构
+枇
+枉
+枋
+析
+枕
+林
+枚
+果
+枝
+枢
+枣
+枪
+枫
+枭
+枯
+枰
+枱
+枳
+架
+枷
+枸
+柄
+柏
+某
+柑
+柒
+染
+柔
+柘
+柚
+柜
+柞
+柠
+柢
+查
+柩
+柬
+柯
+柱
+柳
+柴
+柵
+査
+柿
+栀
+栃
+栄
+栅
+标
+栈
+栉
+栋
+栎
+栏
+树
+栓
+栖
+栗
+校
+栩
+株
+样
+核
+根
+格
+栽
+栾
+桀
+桁
+桂
+桃
+桅
+框
+案
+桉
+桌
+桎
+桐
+桑
+桓
+桔
+桜
+桠
+桡
+桢
+档
+桥
+桦
+桧
+桨
+桩
+桶
+桿
+梁
+梅
+梆
+梏
+梓
+梗
+條
+梟
+梢
+梦
+梧
+梨
+梭
+梯
+械
+梳
+梵
+梶
+检
+棂
+棄
+棉
+棋
+棍
+棒
+棕
+棗
+棘
+棚
+棟
+棠
+棣
+棧
+森
+棱
+棲
+棵
+棹
+棺
+椁
+椅
+椋
+植
+椎
+椒
+検
+椪
+椭
+椰
+椹
+椽
+椿
+楂
+楊
+楓
+楔
+楚
+楝
+楞
+楠
+楣
+楨
+楫
+業
+楮
+極
+楷
+楸
+楹
+楼
+楽
+概
+榄
+榆
+榈
+榉
+榔
+榕
+榖
+榛
+榜
+榨
+榫
+榭
+榮
+榱
+榴
+榷
+榻
+槁
+槃
+構
+槌
+槍
+槎
+槐
+槓
+様
+槛
+槟
+槤
+槭
+槲
+槳
+槻
+槽
+槿
+樁
+樂
+樊
+樑
+樓
+標
+樞
+樟
+模
+樣
+権
+横
+樫
+樯
+樱
+樵
+樸
+樹
+樺
+樽
+樾
+橄
+橇
+橋
+橐
+橘
+橙
+機
+橡
+橢
+橫
+橱
+橹
+橼
+檀
+檄
+檎
+檐
+檔
+檗
+檜
+檢
+檬
+檯
+檳
+檸
+檻
+櫃
+櫚
+櫛
+櫥
+櫸
+櫻
+欄
+權
+欒
+欖
+欠
+次
+欢
+欣
+欧
+欲
+欸
+欺
+欽
+款
+歆
+歇
+歉
+歌
+歎
+歐
+歓
+歙
+歛
+歡
+止
+正
+此
+步
+武
+歧
+歩
+歪
+歯
+歲
+歳
+歴
+歷
+歸
+歹
+死
+歼
+殁
+殃
+殆
+殇
+殉
+殊
+残
+殒
+殓
+殖
+殘
+殞
+殡
+殤
+殭
+殯
+殲
+殴
+段
+殷
+殺
+殼
+殿
+毀
+毁
+毂
+毅
+毆
+毋
+母
+毎
+每
+毒
+毓
+比
+毕
+毗
+毘
+毙
+毛
+毡
+毫
+毯
+毽
+氈
+氏
+氐
+民
+氓
+气
+氖
+気
+氙
+氛
+氟
+氡
+氢
+氣
+氤
+氦
+氧
+氨
+氪
+氫
+氮
+氯
+氰
+氲
+水
+氷
+永
+氹
+氾
+汀
+汁
+求
+汆
+汇
+汉
+汎
+汐
+汕
+汗
+汙
+汛
+汝
+汞
+江
+池
+污
+汤
+汨
+汩
+汪
+汰
+汲
+汴
+汶
+汹
+決
+汽
+汾
+沁
+沂
+沃
+沅
+沈
+沉
+沌
+沏
+沐
+沒
+沓
+沖
+沙
+沛
+沟
+没
+沢
+沣
+沥
+沦
+沧
+沪
+沫
+沭
+沮
+沱
+河
+沸
+油
+治
+沼
+沽
+沾
+沿
+況
+泄
+泉
+泊
+泌
+泓
+法
+泗
+泛
+泞
+泠
+泡
+波
+泣
+泥
+注
+泪
+泫
+泮
+泯
+泰
+泱
+泳
+泵
+泷
+泸
+泻
+泼
+泽
+泾
+洁
+洄
+洋
+洒
+洗
+洙
+洛
+洞
+津
+洩
+洪
+洮
+洱
+洲
+洵
+洶
+洸
+洹
+活
+洼
+洽
+派
+流
+浃
+浄
+浅
+浆
+浇
+浊
+测
+济
+浏
+浑
+浒
+浓
+浔
+浙
+浚
+浜
+浣
+浦
+浩
+浪
+浬
+浮
+浯
+浴
+海
+浸
+涂
+涅
+涇
+消
+涉
+涌
+涎
+涓
+涔
+涕
+涙
+涛
+涝
+涞
+涟
+涠
+涡
+涣
+涤
+润
+涧
+涨
+涩
+涪
+涮
+涯
+液
+涵
+涸
+涼
+涿
+淀
+淄
+淅
+淆
+淇
+淋
+淌
+淑
+淒
+淖
+淘
+淙
+淚
+淞
+淡
+淤
+淦
+淨
+淩
+淪
+淫
+淬
+淮
+深
+淳
+淵
+混
+淹
+淺
+添
+淼
+清
+済
+渉
+渊
+渋
+渍
+渎
+渐
+渔
+渗
+渙
+渚
+減
+渝
+渠
+渡
+渣
+渤
+渥
+渦
+温
+測
+渭
+港
+渲
+渴
+游
+渺
+渾
+湃
+湄
+湊
+湍
+湖
+湘
+湛
+湟
+湧
+湫
+湮
+湯
+湳
+湾
+湿
+満
+溃
+溅
+溉
+溏
+源
+準
+溜
+溝
+溟
+溢
+溥
+溧
+溪
+溫
+溯
+溱
+溴
+溶
+溺
+溼
+滁
+滂
+滄
+滅
+滇
+滋
+滌
+滑
+滓
+滔
+滕
+滙
+滚
+滝
+滞
+滟
+满
+滢
+滤
+滥
+滦
+滨
+滩
+滬
+滯
+滲
+滴
+滷
+滸
+滾
+滿
+漁
+漂
+漆
+漉
+漏
+漓
+演
+漕
+漠
+漢
+漣
+漩
+漪
+漫
+漬
+漯
+漱
+漲
+漳
+漸
+漾
+漿
+潆
+潇
+潋
+潍
+潑
+潔
+潘
+潛
+潜
+潞
+潟
+潢
+潤
+潦
+潧
+潭
+潮
+潰
+潴
+潸
+潺
+潼
+澀
+澄
+澆
+澈
+澍
+澎
+澗
+澜
+澡
+澤
+澧
+澱
+澳
+澹
+激
+濁
+濂
+濃
+濑
+濒
+濕
+濘
+濛
+濟
+濠
+濡
+濤
+濫
+濬
+濮
+濯
+濱
+濺
+濾
+瀅
+瀆
+瀉
+瀋
+瀏
+瀑
+瀕
+瀘
+瀚
+瀛
+瀝
+瀞
+瀟
+瀧
+瀨
+瀬
+瀰
+瀾
+灌
+灏
+灑
+灘
+灝
+灞
+灣
+火
+灬
+灭
+灯
+灰
+灵
+灶
+灸
+灼
+災
+灾
+灿
+炀
+炁
+炅
+炉
+炊
+炎
+炒
+炔
+炕
+炖
+炙
+炜
+炫
+炬
+炭
+炮
+炯
+炳
+炷
+炸
+点
+為
+炼
+炽
+烁
+烂
+烃
+烈
+烊
+烏
+烘
+烙
+烛
+烟
+烤
+烦
+烧
+烨
+烩
+烫
+烬
+热
+烯
+烷
+烹
+烽
+焉
+焊
+焕
+焖
+焗
+焘
+焙
+焚
+焜
+無
+焦
+焯
+焰
+焱
+然
+焼
+煅
+煉
+煊
+煌
+煎
+煒
+煖
+煙
+煜
+煞
+煤
+煥
+煦
+照
+煨
+煩
+煮
+煲
+煸
+煽
+熄
+熊
+熏
+熒
+熔
+熙
+熟
+熠
+熨
+熬
+熱
+熵
+熹
+熾
+燁
+燃
+燄
+燈
+燉
+燊
+燎
+燒
+燔
+燕
+燙
+燜
+營
+燥
+燦
+燧
+燭
+燮
+燴
+燻
+燼
+燿
+爆
+爍
+爐
+爛
+爪
+爬
+爭
+爰
+爱
+爲
+爵
+父
+爷
+爸
+爹
+爺
+爻
+爽
+爾
+牆
+片
+版
+牌
+牍
+牒
+牙
+牛
+牝
+牟
+牠
+牡
+牢
+牦
+牧
+物
+牯
+牲
+牴
+牵
+特
+牺
+牽
+犀
+犁
+犄
+犊
+犍
+犒
+犢
+犧
+犬
+犯
+状
+犷
+犸
+犹
+狀
+狂
+狄
+狈
+狎
+狐
+狒
+狗
+狙
+狞
+狠
+狡
+狩
+独
+狭
+狮
+狰
+狱
+狸
+狹
+狼
+狽
+猎
+猕
+猖
+猗
+猙
+猛
+猜
+猝
+猥
+猩
+猪
+猫
+猬
+献
+猴
+猶
+猷
+猾
+猿
+獄
+獅
+獎
+獐
+獒
+獗
+獠
+獣
+獨
+獭
+獰
+獲
+獵
+獷
+獸
+獺
+獻
+獼
+獾
+玄
+率
+玉
+王
+玑
+玖
+玛
+玟
+玠
+玥
+玩
+玫
+玮
+环
+现
+玲
+玳
+玷
+玺
+玻
+珀
+珂
+珅
+珈
+珉
+珊
+珍
+珏
+珐
+珑
+珙
+珞
+珠
+珣
+珥
+珩
+珪
+班
+珮
+珲
+珺
+現
+球
+琅
+理
+琇
+琉
+琊
+琍
+琏
+琐
+琛
+琢
+琥
+琦
+琨
+琪
+琬
+琮
+琰
+琲
+琳
+琴
+琵
+琶
+琺
+琼
+瑀
+瑁
+瑄
+瑋
+瑕
+瑗
+瑙
+瑚
+瑛
+瑜
+瑞
+瑟
+瑠
+瑣
+瑤
+瑩
+瑪
+瑯
+瑰
+瑶
+瑾
+璀
+璁
+璃
+璇
+璉
+璋
+璎
+璐
+璜
+璞
+璟
+璧
+璨
+環
+璽
+璿
+瓊
+瓏
+瓒
+瓜
+瓢
+瓣
+瓤
+瓦
+瓮
+瓯
+瓴
+瓶
+瓷
+甄
+甌
+甕
+甘
+甙
+甚
+甜
+生
+產
+産
+甥
+甦
+用
+甩
+甫
+甬
+甭
+甯
+田
+由
+甲
+申
+电
+男
+甸
+町
+画
+甾
+畀
+畅
+界
+畏
+畑
+畔
+留
+畜
+畝
+畢
+略
+畦
+番
+畫
+異
+畲
+畳
+畴
+當
+畸
+畹
+畿
+疆
+疇
+疊
+疏
+疑
+疔
+疖
+疗
+疙
+疚
+疝
+疟
+疡
+疣
+疤
+疥
+疫
+疮
+疯
+疱
+疲
+疳
+疵
+疸
+疹
+疼
+疽
+疾
+痂
+病
+症
+痈
+痉
+痊
+痍
+痒
+痔
+痕
+痘
+痙
+痛
+痞
+痠
+痢
+痣
+痤
+痧
+痨
+痪
+痫
+痰
+痱
+痴
+痹
+痺
+痼
+痿
+瘀
+瘁
+瘋
+瘍
+瘓
+瘘
+瘙
+瘟
+瘠
+瘡
+瘢
+瘤
+瘦
+瘧
+瘩
+瘪
+瘫
+瘴
+瘸
+瘾
+療
+癇
+癌
+癒
+癖
+癜
+癞
+癡
+癢
+癣
+癥
+癫
+癬
+癮
+癱
+癲
+癸
+発
+登
+發
+白
+百
+皂
+的
+皆
+皇
+皈
+皋
+皎
+皑
+皓
+皖
+皙
+皚
+皮
+皰
+皱
+皴
+皺
+皿
+盂
+盃
+盅
+盆
+盈
+益
+盎
+盏
+盐
+监
+盒
+盔
+盖
+盗
+盘
+盛
+盜
+盞
+盟
+盡
+監
+盤
+盥
+盧
+盪
+目
+盯
+盱
+盲
+直
+相
+盹
+盼
+盾
+省
+眈
+眉
+看
+県
+眙
+眞
+真
+眠
+眦
+眨
+眩
+眯
+眶
+眷
+眸
+眺
+眼
+眾
+着
+睁
+睇
+睏
+睐
+睑
+睛
+睜
+睞
+睡
+睢
+督
+睥
+睦
+睨
+睪
+睫
+睬
+睹
+睽
+睾
+睿
+瞄
+瞅
+瞇
+瞋
+瞌
+瞎
+瞑
+瞒
+瞓
+瞞
+瞟
+瞠
+瞥
+瞧
+瞩
+瞪
+瞬
+瞭
+瞰
+瞳
+瞻
+瞼
+瞿
+矇
+矍
+矗
+矚
+矛
+矜
+矢
+矣
+知
+矩
+矫
+短
+矮
+矯
+石
+矶
+矽
+矾
+矿
+码
+砂
+砌
+砍
+砒
+研
+砖
+砗
+砚
+砝
+砣
+砥
+砧
+砭
+砰
+砲
+破
+砷
+砸
+砺
+砼
+砾
+础
+硅
+硐
+硒
+硕
+硝
+硫
+硬
+确
+硯
+硼
+碁
+碇
+碉
+碌
+碍
+碎
+碑
+碓
+碗
+碘
+碚
+碛
+碟
+碣
+碧
+碩
+碰
+碱
+碳
+碴
+確
+碼
+碾
+磁
+磅
+磊
+磋
+磐
+磕
+磚
+磡
+磨
+磬
+磯
+磲
+磷
+磺
+礁
+礎
+礙
+礡
+礦
+礪
+礫
+礴
+示
+礼
+社
+祀
+祁
+祂
+祇
+祈
+祉
+祎
+祐
+祕
+祖
+祗
+祚
+祛
+祜
+祝
+神
+祟
+祠
+祢
+祥
+票
+祭
+祯
+祷
+祸
+祺
+祿
+禀
+禁
+禄
+禅
+禍
+禎
+福
+禛
+禦
+禧
+禪
+禮
+禱
+禹
+禺
+离
+禽
+禾
+禿
+秀
+私
+秃
+秆
+秉
+秋
+种
+科
+秒
+秘
+租
+秣
+秤
+秦
+秧
+秩
+秭
+积
+称
+秸
+移
+秽
+稀
+稅
+程
+稍
+税
+稔
+稗
+稚
+稜
+稞
+稟
+稠
+稣
+種
+稱
+稲
+稳
+稷
+稹
+稻
+稼
+稽
+稿
+穀
+穂
+穆
+穌
+積
+穎
+穗
+穢
+穩
+穫
+穴
+究
+穷
+穹
+空
+穿
+突
+窃
+窄
+窈
+窍
+窑
+窒
+窓
+窕
+窖
+窗
+窘
+窜
+窝
+窟
+窠
+窥
+窦
+窨
+窩
+窪
+窮
+窯
+窺
+窿
+竄
+竅
+竇
+竊
+立
+竖
+站
+竜
+竞
+竟
+章
+竣
+童
+竭
+端
+競
+竹
+竺
+竽
+竿
+笃
+笆
+笈
+笋
+笏
+笑
+笔
+笙
+笛
+笞
+笠
+符
+笨
+第
+笹
+笺
+笼
+筆
+等
+筊
+筋
+筍
+筏
+筐
+筑
+筒
+答
+策
+筛
+筝
+筠
+筱
+筲
+筵
+筷
+筹
+签
+简
+箇
+箋
+箍
+箏
+箐
+箔
+箕
+算
+箝
+管
+箩
+箫
+箭
+箱
+箴
+箸
+節
+篁
+範
+篆
+篇
+築
+篑
+篓
+篙
+篝
+篠
+篡
+篤
+篩
+篪
+篮
+篱
+篷
+簇
+簌
+簍
+簡
+簦
+簧
+簪
+簫
+簷
+簸
+簽
+簾
+簿
+籁
+籃
+籌
+籍
+籐
+籟
+籠
+籤
+籬
+籮
+籲
+米
+类
+籼
+籽
+粄
+粉
+粑
+粒
+粕
+粗
+粘
+粟
+粤
+粥
+粧
+粪
+粮
+粱
+粲
+粳
+粵
+粹
+粼
+粽
+精
+粿
+糅
+糊
+糍
+糕
+糖
+糗
+糙
+糜
+糞
+糟
+糠
+糧
+糬
+糯
+糰
+糸
+系
+糾
+紀
+紂
+約
+紅
+紉
+紊
+紋
+納
+紐
+紓
+純
+紗
+紘
+紙
+級
+紛
+紜
+素
+紡
+索
+紧
+紫
+紮
+累
+細
+紳
+紹
+紺
+終
+絃
+組
+絆
+経
+結
+絕
+絞
+絡
+絢
+給
+絨
+絮
+統
+絲
+絳
+絵
+絶
+絹
+綁
+綏
+綑
+經
+継
+続
+綜
+綠
+綢
+綦
+綫
+綬
+維
+綱
+網
+綴
+綵
+綸
+綺
+綻
+綽
+綾
+綿
+緊
+緋
+総
+緑
+緒
+緘
+線
+緝
+緞
+締
+緣
+編
+緩
+緬
+緯
+練
+緹
+緻
+縁
+縄
+縈
+縛
+縝
+縣
+縫
+縮
+縱
+縴
+縷
+總
+績
+繁
+繃
+繆
+繇
+繋
+織
+繕
+繚
+繞
+繡
+繩
+繪
+繫
+繭
+繳
+繹
+繼
+繽
+纂
+續
+纍
+纏
+纓
+纔
+纖
+纜
+纠
+红
+纣
+纤
+约
+级
+纨
+纪
+纫
+纬
+纭
+纯
+纰
+纱
+纲
+纳
+纵
+纶
+纷
+纸
+纹
+纺
+纽
+纾
+线
+绀
+练
+组
+绅
+细
+织
+终
+绊
+绍
+绎
+经
+绑
+绒
+结
+绔
+绕
+绘
+给
+绚
+绛
+络
+绝
+绞
+统
+绡
+绢
+绣
+绥
+绦
+继
+绩
+绪
+绫
+续
+绮
+绯
+绰
+绳
+维
+绵
+绶
+绷
+绸
+绻
+综
+绽
+绾
+绿
+缀
+缄
+缅
+缆
+缇
+缈
+缉
+缎
+缓
+缔
+缕
+编
+缘
+缙
+缚
+缜
+缝
+缠
+缢
+缤
+缥
+缨
+缩
+缪
+缭
+缮
+缰
+缱
+缴
+缸
+缺
+缽
+罂
+罄
+罌
+罐
+网
+罔
+罕
+罗
+罚
+罡
+罢
+罩
+罪
+置
+罰
+署
+罵
+罷
+罹
+羁
+羅
+羈
+羊
+羌
+美
+羔
+羚
+羞
+羟
+羡
+羣
+群
+羥
+羧
+羨
+義
+羯
+羲
+羸
+羹
+羽
+羿
+翁
+翅
+翊
+翌
+翎
+習
+翔
+翘
+翟
+翠
+翡
+翦
+翩
+翰
+翱
+翳
+翹
+翻
+翼
+耀
+老
+考
+耄
+者
+耆
+耋
+而
+耍
+耐
+耒
+耕
+耗
+耘
+耙
+耦
+耨
+耳
+耶
+耷
+耸
+耻
+耽
+耿
+聂
+聆
+聊
+聋
+职
+聒
+联
+聖
+聘
+聚
+聞
+聪
+聯
+聰
+聲
+聳
+聴
+聶
+職
+聽
+聾
+聿
+肃
+肄
+肅
+肆
+肇
+肉
+肋
+肌
+肏
+肓
+肖
+肘
+肚
+肛
+肝
+肠
+股
+肢
+肤
+肥
+肩
+肪
+肮
+肯
+肱
+育
+肴
+肺
+肽
+肾
+肿
+胀
+胁
+胃
+胄
+胆
+背
+胍
+胎
+胖
+胚
+胛
+胜
+胝
+胞
+胡
+胤
+胥
+胧
+胫
+胭
+胯
+胰
+胱
+胳
+胴
+胶
+胸
+胺
+能
+脂
+脅
+脆
+脇
+脈
+脉
+脊
+脍
+脏
+脐
+脑
+脓
+脖
+脘
+脚
+脛
+脣
+脩
+脫
+脯
+脱
+脲
+脳
+脸
+脹
+脾
+腆
+腈
+腊
+腋
+腌
+腎
+腐
+腑
+腓
+腔
+腕
+腥
+腦
+腩
+腫
+腭
+腮
+腰
+腱
+腳
+腴
+腸
+腹
+腺
+腻
+腼
+腾
+腿
+膀
+膈
+膊
+膏
+膑
+膘
+膚
+膛
+膜
+膝
+膠
+膦
+膨
+膩
+膳
+膺
+膻
+膽
+膾
+膿
+臀
+臂
+臃
+臆
+臉
+臊
+臍
+臓
+臘
+臟
+臣
+臥
+臧
+臨
+自
+臬
+臭
+至
+致
+臺
+臻
+臼
+臾
+舀
+舂
+舅
+舆
+與
+興
+舉
+舊
+舌
+舍
+舎
+舐
+舒
+舔
+舖
+舗
+舛
+舜
+舞
+舟
+航
+舫
+般
+舰
+舱
+舵
+舶
+舷
+舸
+船
+舺
+舾
+艇
+艋
+艘
+艙
+艦
+艮
+良
+艰
+艱
+色
+艳
+艷
+艹
+艺
+艾
+节
+芃
+芈
+芊
+芋
+芍
+芎
+芒
+芙
+芜
+芝
+芡
+芥
+芦
+芩
+芪
+芫
+芬
+芭
+芮
+芯
+花
+芳
+芷
+芸
+芹
+芻
+芽
+芾
+苁
+苄
+苇
+苋
+苍
+苏
+苑
+苒
+苓
+苔
+苕
+苗
+苛
+苜
+苞
+苟
+苡
+苣
+若
+苦
+苫
+苯
+英
+苷
+苹
+苻
+茁
+茂
+范
+茄
+茅
+茉
+茎
+茏
+茗
+茜
+茧
+茨
+茫
+茬
+茭
+茯
+茱
+茲
+茴
+茵
+茶
+茸
+茹
+茼
+荀
+荃
+荆
+草
+荊
+荏
+荐
+荒
+荔
+荖
+荘
+荚
+荞
+荟
+荠
+荡
+荣
+荤
+荥
+荧
+荨
+荪
+荫
+药
+荳
+荷
+荸
+荻
+荼
+荽
+莅
+莆
+莉
+莊
+莎
+莒
+莓
+莖
+莘
+莞
+莠
+莢
+莧
+莪
+莫
+莱
+莲
+莴
+获
+莹
+莺
+莽
+莿
+菀
+菁
+菅
+菇
+菈
+菊
+菌
+菏
+菓
+菖
+菘
+菜
+菟
+菠
+菡
+菩
+華
+菱
+菲
+菸
+菽
+萁
+萃
+萄
+萊
+萋
+萌
+萍
+萎
+萘
+萝
+萤
+营
+萦
+萧
+萨
+萩
+萬
+萱
+萵
+萸
+萼
+落
+葆
+葉
+著
+葚
+葛
+葡
+董
+葦
+葩
+葫
+葬
+葭
+葯
+葱
+葳
+葵
+葷
+葺
+蒂
+蒋
+蒐
+蒔
+蒙
+蒜
+蒞
+蒟
+蒡
+蒨
+蒲
+蒸
+蒹
+蒻
+蒼
+蒿
+蓁
+蓄
+蓆
+蓉
+蓋
+蓑
+蓓
+蓖
+蓝
+蓟
+蓦
+蓬
+蓮
+蓼
+蓿
+蔑
+蔓
+蔔
+蔗
+蔘
+蔚
+蔡
+蔣
+蔥
+蔫
+蔬
+蔭
+蔵
+蔷
+蔺
+蔻
+蔼
+蔽
+蕁
+蕃
+蕈
+蕉
+蕊
+蕎
+蕙
+蕤
+蕨
+蕩
+蕪
+蕭
+蕲
+蕴
+蕻
+蕾
+薄
+薅
+薇
+薈
+薊
+薏
+薑
+薔
+薙
+薛
+薦
+薨
+薩
+薪
+薬
+薯
+薰
+薹
+藉
+藍
+藏
+藐
+藓
+藕
+藜
+藝
+藤
+藥
+藩
+藹
+藻
+藿
+蘆
+蘇
+蘊
+蘋
+蘑
+蘚
+蘭
+蘸
+蘼
+蘿
+虎
+虏
+虐
+虑
+虔
+處
+虚
+虛
+虜
+虞
+號
+虢
+虧
+虫
+虬
+虱
+虹
+虻
+虽
+虾
+蚀
+蚁
+蚂
+蚊
+蚌
+蚓
+蚕
+蚜
+蚝
+蚣
+蚤
+蚩
+蚪
+蚯
+蚱
+蚵
+蛀
+蛆
+蛇
+蛊
+蛋
+蛎
+蛐
+蛔
+蛙
+蛛
+蛟
+蛤
+蛭
+蛮
+蛰
+蛳
+蛹
+蛻
+蛾
+蜀
+蜂
+蜃
+蜆
+蜇
+蜈
+蜊
+蜍
+蜒
+蜓
+蜕
+蜗
+蜘
+蜚
+蜜
+蜡
+蜢
+蜥
+蜱
+蜴
+蜷
+蜻
+蜿
+蝇
+蝈
+蝉
+蝌
+蝎
+蝕
+蝗
+蝙
+蝟
+蝠
+蝦
+蝨
+蝴
+蝶
+蝸
+蝼
+螂
+螃
+融
+螞
+螢
+螨
+螯
+螳
+螺
+蟀
+蟄
+蟆
+蟋
+蟎
+蟑
+蟒
+蟠
+蟬
+蟲
+蟹
+蟻
+蟾
+蠅
+蠍
+蠔
+蠕
+蠛
+蠟
+蠡
+蠢
+蠣
+蠱
+蠶
+蠹
+蠻
+血
+衄
+衅
+衆
+行
+衍
+術
+衔
+街
+衙
+衛
+衝
+衞
+衡
+衢
+衣
+补
+表
+衩
+衫
+衬
+衮
+衰
+衲
+衷
+衹
+衾
+衿
+袁
+袂
+袄
+袅
+袈
+袋
+袍
+袒
+袖
+袜
+袞
+袤
+袪
+被
+袭
+袱
+裁
+裂
+装
+裆
+裊
+裏
+裔
+裕
+裘
+裙
+補
+裝
+裟
+裡
+裤
+裨
+裱
+裳
+裴
+裸
+裹
+製
+裾
+褂
+複
+褐
+褒
+褓
+褔
+褚
+褥
+褪
+褫
+褲
+褶
+褻
+襁
+襄
+襟
+襠
+襪
+襬
+襯
+襲
+西
+要
+覃
+覆
+覇
+見
+規
+覓
+視
+覚
+覦
+覧
+親
+覬
+観
+覷
+覺
+覽
+觀
+见
+观
+规
+觅
+视
+览
+觉
+觊
+觎
+觐
+觑
+角
+觞
+解
+觥
+触
+觸
+言
+訂
+計
+訊
+討
+訓
+訕
+訖
+託
+記
+訛
+訝
+訟
+訣
+訥
+訪
+設
+許
+訳
+訴
+訶
+診
+註
+証
+詆
+詐
+詔
+評
+詛
+詞
+詠
+詡
+詢
+詣
+試
+詩
+詫
+詬
+詭
+詮
+詰
+話
+該
+詳
+詹
+詼
+誅
+誇
+誉
+誌
+認
+誓
+誕
+誘
+語
+誠
+誡
+誣
+誤
+誥
+誦
+誨
+說
+説
+読
+誰
+課
+誹
+誼
+調
+諄
+談
+請
+諏
+諒
+論
+諗
+諜
+諡
+諦
+諧
+諫
+諭
+諮
+諱
+諳
+諷
+諸
+諺
+諾
+謀
+謁
+謂
+謄
+謊
+謎
+謐
+謔
+謗
+謙
+講
+謝
+謠
+謨
+謬
+謹
+謾
+譁
+證
+譎
+譏
+識
+譙
+譚
+譜
+警
+譬
+譯
+議
+譲
+譴
+護
+譽
+讀
+變
+讓
+讚
+讞
+计
+订
+认
+讥
+讧
+讨
+让
+讪
+讫
+训
+议
+讯
+记
+讲
+讳
+讴
+讶
+讷
+许
+讹
+论
+讼
+讽
+设
+访
+诀
+证
+诃
+评
+诅
+识
+诈
+诉
+诊
+诋
+词
+诏
+译
+试
+诗
+诘
+诙
+诚
+诛
+话
+诞
+诟
+诠
+诡
+询
+诣
+诤
+该
+详
+诧
+诩
+诫
+诬
+语
+误
+诰
+诱
+诲
+说
+诵
+诶
+请
+诸
+诺
+读
+诽
+课
+诿
+谀
+谁
+调
+谄
+谅
+谆
+谈
+谊
+谋
+谌
+谍
+谎
+谏
+谐
+谑
+谒
+谓
+谔
+谕
+谗
+谘
+谙
+谚
+谛
+谜
+谟
+谢
+谣
+谤
+谥
+谦
+谧
+谨
+谩
+谪
+谬
+谭
+谯
+谱
+谲
+谴
+谶
+谷
+豁
+豆
+豇
+豈
+豉
+豊
+豌
+豎
+豐
+豔
+豚
+象
+豢
+豪
+豫
+豬
+豹
+豺
+貂
+貅
+貌
+貓
+貔
+貘
+貝
+貞
+負
+財
+貢
+貧
+貨
+販
+貪
+貫
+責
+貯
+貰
+貳
+貴
+貶
+買
+貸
+費
+貼
+貽
+貿
+賀
+賁
+賂
+賃
+賄
+資
+賈
+賊
+賑
+賓
+賜
+賞
+賠
+賡
+賢
+賣
+賤
+賦
+質
+賬
+賭
+賴
+賺
+購
+賽
+贅
+贈
+贊
+贍
+贏
+贓
+贖
+贛
+贝
+贞
+负
+贡
+财
+责
+贤
+败
+账
+货
+质
+贩
+贪
+贫
+贬
+购
+贮
+贯
+贰
+贱
+贲
+贴
+贵
+贷
+贸
+费
+贺
+贻
+贼
+贾
+贿
+赁
+赂
+赃
+资
+赅
+赈
+赊
+赋
+赌
+赎
+赏
+赐
+赓
+赔
+赖
+赘
+赚
+赛
+赝
+赞
+赠
+赡
+赢
+赣
+赤
+赦
+赧
+赫
+赭
+走
+赳
+赴
+赵
+赶
+起
+趁
+超
+越
+趋
+趕
+趙
+趟
+趣
+趨
+足
+趴
+趵
+趸
+趺
+趾
+跃
+跄
+跆
+跋
+跌
+跎
+跑
+跖
+跚
+跛
+距
+跟
+跡
+跤
+跨
+跩
+跪
+路
+跳
+践
+跷
+跹
+跺
+跻
+踉
+踊
+踌
+踏
+踐
+踝
+踞
+踟
+踢
+踩
+踪
+踮
+踱
+踴
+踵
+踹
+蹂
+蹄
+蹇
+蹈
+蹉
+蹊
+蹋
+蹑
+蹒
+蹙
+蹟
+蹣
+蹤
+蹦
+蹩
+蹬
+蹭
+蹲
+蹴
+蹶
+蹺
+蹼
+蹿
+躁
+躇
+躉
+躊
+躋
+躍
+躏
+躪
+身
+躬
+躯
+躲
+躺
+軀
+車
+軋
+軌
+軍
+軒
+軟
+転
+軸
+軼
+軽
+軾
+較
+載
+輒
+輓
+輔
+輕
+輛
+輝
+輟
+輩
+輪
+輯
+輸
+輻
+輾
+輿
+轄
+轅
+轆
+轉
+轍
+轎
+轟
+车
+轧
+轨
+轩
+转
+轭
+轮
+软
+轰
+轲
+轴
+轶
+轻
+轼
+载
+轿
+较
+辄
+辅
+辆
+辇
+辈
+辉
+辊
+辍
+辐
+辑
+输
+辕
+辖
+辗
+辘
+辙
+辛
+辜
+辞
+辟
+辣
+辦
+辨
+辩
+辫
+辭
+辮
+辯
+辰
+辱
+農
+边
+辺
+辻
+込
+辽
+达
+迁
+迂
+迄
+迅
+过
+迈
+迎
+运
+近
+返
+还
+这
+进
+远
+违
+连
+迟
+迢
+迤
+迥
+迦
+迩
+迪
+迫
+迭
+述
+迴
+迷
+迸
+迹
+迺
+追
+退
+送
+适
+逃
+逅
+逆
+选
+逊
+逍
+透
+逐
+递
+途
+逕
+逗
+這
+通
+逛
+逝
+逞
+速
+造
+逢
+連
+逮
+週
+進
+逵
+逶
+逸
+逻
+逼
+逾
+遁
+遂
+遅
+遇
+遊
+運
+遍
+過
+遏
+遐
+遑
+遒
+道
+達
+違
+遗
+遙
+遛
+遜
+遞
+遠
+遢
+遣
+遥
+遨
+適
+遭
+遮
+遲
+遴
+遵
+遶
+遷
+選
+遺
+遼
+遽
+避
+邀
+邁
+邂
+邃
+還
+邇
+邈
+邊
+邋
+邏
+邑
+邓
+邕
+邛
+邝
+邢
+那
+邦
+邨
+邪
+邬
+邮
+邯
+邰
+邱
+邳
+邵
+邸
+邹
+邺
+邻
+郁
+郅
+郊
+郎
+郑
+郜
+郝
+郡
+郢
+郤
+郦
+郧
+部
+郫
+郭
+郴
+郵
+郷
+郸
+都
+鄂
+鄉
+鄒
+鄔
+鄙
+鄞
+鄢
+鄧
+鄭
+鄰
+鄱
+鄲
+鄺
+酉
+酊
+酋
+酌
+配
+酐
+酒
+酗
+酚
+酝
+酢
+酣
+酥
+酩
+酪
+酬
+酮
+酯
+酰
+酱
+酵
+酶
+酷
+酸
+酿
+醃
+醇
+醉
+醋
+醍
+醐
+醒
+醚
+醛
+醜
+醞
+醣
+醪
+醫
+醬
+醮
+醯
+醴
+醺
+釀
+釁
+采
+釉
+释
+釋
+里
+重
+野
+量
+釐
+金
+釗
+釘
+釜
+針
+釣
+釦
+釧
+釵
+鈀
+鈉
+鈍
+鈎
+鈔
+鈕
+鈞
+鈣
+鈦
+鈪
+鈴
+鈺
+鈾
+鉀
+鉄
+鉅
+鉉
+鉑
+鉗
+鉚
+鉛
+鉤
+鉴
+鉻
+銀
+銃
+銅
+銑
+銓
+銖
+銘
+銜
+銬
+銭
+銮
+銳
+銷
+銹
+鋁
+鋅
+鋒
+鋤
+鋪
+鋰
+鋸
+鋼
+錄
+錐
+錘
+錚
+錠
+錢
+錦
+錨
+錫
+錮
+錯
+録
+錳
+錶
+鍊
+鍋
+鍍
+鍛
+鍥
+鍰
+鍵
+鍺
+鍾
+鎂
+鎊
+鎌
+鎏
+鎔
+鎖
+鎗
+鎚
+鎧
+鎬
+鎮
+鎳
+鏈
+鏖
+鏗
+鏘
+鏞
+鏟
+鏡
+鏢
+鏤
+鏽
+鐘
+鐮
+鐲
+鐳
+鐵
+鐸
+鐺
+鑄
+鑊
+鑑
+鑒
+鑣
+鑫
+鑰
+鑲
+鑼
+鑽
+鑾
+鑿
+针
+钉
+钊
+钎
+钏
+钒
+钓
+钗
+钙
+钛
+钜
+钝
+钞
+钟
+钠
+钡
+钢
+钣
+钤
+钥
+钦
+钧
+钨
+钩
+钮
+钯
+钰
+钱
+钳
+钴
+钵
+钺
+钻
+钼
+钾
+钿
+铀
+铁
+铂
+铃
+铄
+铅
+铆
+铉
+铎
+铐
+铛
+铜
+铝
+铠
+铡
+铢
+铣
+铤
+铨
+铩
+铬
+铭
+铮
+铰
+铲
+铵
+银
+铸
+铺
+链
+铿
+销
+锁
+锂
+锄
+锅
+锆
+锈
+锉
+锋
+锌
+锏
+锐
+锑
+错
+锚
+锟
+锡
+锢
+锣
+锤
+锥
+锦
+锭
+键
+锯
+锰
+锲
+锵
+锹
+锺
+锻
+镀
+镁
+镂
+镇
+镉
+镌
+镍
+镐
+镑
+镕
+镖
+镗
+镛
+镜
+镣
+镭
+镯
+镰
+镳
+镶
+長
+长
+門
+閃
+閉
+開
+閎
+閏
+閑
+閒
+間
+閔
+閘
+閡
+関
+閣
+閥
+閨
+閩
+閱
+閲
+閹
+閻
+閾
+闆
+闇
+闊
+闌
+闍
+闔
+闕
+闖
+闘
+關
+闡
+闢
+门
+闪
+闫
+闭
+问
+闯
+闰
+闲
+间
+闵
+闷
+闸
+闹
+闺
+闻
+闽
+闾
+阀
+阁
+阂
+阅
+阆
+阇
+阈
+阉
+阎
+阐
+阑
+阔
+阕
+阖
+阙
+阚
+阜
+队
+阡
+阪
+阮
+阱
+防
+阳
+阴
+阵
+阶
+阻
+阿
+陀
+陂
+附
+际
+陆
+陇
+陈
+陋
+陌
+降
+限
+陕
+陛
+陝
+陞
+陟
+陡
+院
+陣
+除
+陨
+险
+陪
+陰
+陲
+陳
+陵
+陶
+陷
+陸
+険
+陽
+隅
+隆
+隈
+隊
+隋
+隍
+階
+随
+隐
+隔
+隕
+隘
+隙
+際
+障
+隠
+隣
+隧
+隨
+險
+隱
+隴
+隶
+隸
+隻
+隼
+隽
+难
+雀
+雁
+雄
+雅
+集
+雇
+雉
+雋
+雌
+雍
+雎
+雏
+雑
+雒
+雕
+雖
+雙
+雛
+雜
+雞
+離
+難
+雨
+雪
+雯
+雰
+雲
+雳
+零
+雷
+雹
+電
+雾
+需
+霁
+霄
+霆
+震
+霈
+霉
+霊
+霍
+霎
+霏
+霑
+霓
+霖
+霜
+霞
+霧
+霭
+霰
+露
+霸
+霹
+霽
+霾
+靂
+靄
+靈
+青
+靓
+靖
+静
+靚
+靛
+靜
+非
+靠
+靡
+面
+靥
+靦
+革
+靳
+靴
+靶
+靼
+鞅
+鞋
+鞍
+鞏
+鞑
+鞘
+鞠
+鞣
+鞦
+鞭
+韆
+韋
+韌
+韓
+韜
+韦
+韧
+韩
+韬
+韭
+音
+韵
+韶
+韻
+響
+頁
+頂
+頃
+項
+順
+須
+頌
+預
+頑
+頒
+頓
+頗
+領
+頜
+頡
+頤
+頫
+頭
+頰
+頷
+頸
+頹
+頻
+頼
+顆
+題
+額
+顎
+顏
+顔
+願
+顛
+類
+顧
+顫
+顯
+顱
+顴
+页
+顶
+顷
+项
+顺
+须
+顼
+顽
+顾
+顿
+颁
+颂
+预
+颅
+领
+颇
+颈
+颉
+颊
+颌
+颍
+颐
+频
+颓
+颔
+颖
+颗
+题
+颚
+颛
+颜
+额
+颞
+颠
+颡
+颢
+颤
+颦
+颧
+風
+颯
+颱
+颳
+颶
+颼
+飄
+飆
+风
+飒
+飓
+飕
+飘
+飙
+飚
+飛
+飞
+食
+飢
+飨
+飩
+飪
+飯
+飲
+飼
+飽
+飾
+餃
+餅
+餉
+養
+餌
+餐
+餒
+餓
+餘
+餚
+餛
+餞
+餡
+館
+餮
+餵
+餾
+饅
+饈
+饋
+饌
+饍
+饑
+饒
+饕
+饗
+饞
+饥
+饨
+饪
+饬
+饭
+饮
+饯
+饰
+饱
+饲
+饴
+饵
+饶
+饷
+饺
+饼
+饽
+饿
+馀
+馁
+馄
+馅
+馆
+馈
+馋
+馍
+馏
+馒
+馔
+首
+馗
+香
+馥
+馨
+馬
+馭
+馮
+馳
+馴
+駁
+駄
+駅
+駆
+駐
+駒
+駕
+駛
+駝
+駭
+駱
+駿
+騁
+騎
+騏
+験
+騙
+騨
+騰
+騷
+驀
+驅
+驊
+驍
+驒
+驕
+驗
+驚
+驛
+驟
+驢
+驥
+马
+驭
+驮
+驯
+驰
+驱
+驳
+驴
+驶
+驷
+驸
+驹
+驻
+驼
+驾
+驿
+骁
+骂
+骄
+骅
+骆
+骇
+骈
+骊
+骋
+验
+骏
+骐
+骑
+骗
+骚
+骛
+骜
+骞
+骠
+骡
+骤
+骥
+骧
+骨
+骯
+骰
+骶
+骷
+骸
+骼
+髂
+髅
+髋
+髏
+髒
+髓
+體
+髖
+高
+髦
+髪
+髮
+髯
+髻
+鬃
+鬆
+鬍
+鬓
+鬚
+鬟
+鬢
+鬣
+鬥
+鬧
+鬱
+鬼
+魁
+魂
+魄
+魅
+魇
+魍
+魏
+魔
+魘
+魚
+魯
+魷
+鮑
+鮨
+鮪
+鮭
+鮮
+鯉
+鯊
+鯖
+鯛
+鯨
+鯰
+鯽
+鰍
+鰓
+鰭
+鰲
+鰻
+鰾
+鱈
+鱉
+鱔
+鱗
+鱷
+鱸
+鱼
+鱿
+鲁
+鲈
+鲍
+鲑
+鲛
+鲜
+鲟
+鲢
+鲤
+鲨
+鲫
+鲱
+鲲
+鲶
+鲷
+鲸
+鳃
+鳄
+鳅
+鳌
+鳍
+鳕
+鳖
+鳗
+鳝
+鳞
+鳥
+鳩
+鳳
+鳴
+鳶
+鴉
+鴕
+鴛
+鴦
+鴨
+鴻
+鴿
+鵑
+鵜
+鵝
+鵡
+鵬
+鵰
+鵲
+鶘
+鶩
+鶯
+鶴
+鷗
+鷲
+鷹
+鷺
+鸚
+鸞
+鸟
+鸠
+鸡
+鸢
+鸣
+鸥
+鸦
+鸨
+鸪
+鸭
+鸯
+鸳
+鸵
+鸽
+鸾
+鸿
+鹂
+鹃
+鹄
+鹅
+鹈
+鹉
+鹊
+鹌
+鹏
+鹑
+鹕
+鹘
+鹜
+鹞
+鹤
+鹦
+鹧
+鹫
+鹭
+鹰
+鹳
+鹵
+鹹
+鹼
+鹽
+鹿
+麂
+麋
+麒
+麓
+麗
+麝
+麟
+麥
+麦
+麩
+麴
+麵
+麸
+麺
+麻
+麼
+麽
+麾
+黃
+黄
+黍
+黎
+黏
+黑
+黒
+黔
+默
+黛
+黜
+黝
+點
+黠
+黨
+黯
+黴
+鼋
+鼎
+鼐
+鼓
+鼠
+鼬
+鼹
+鼻
+鼾
+齁
+齊
+齋
+齐
+齒
+齡
+齢
+齣
+齦
+齿
+龄
+龅
+龈
+龊
+龋
+龌
+龍
+龐
+龔
+龕
+龙
+龚
+龛
+龜
+龟
+︰
+︱
+︶
+︿
+﹁
+﹂
+﹍
+﹏
+﹐
+﹑
+﹒
+﹔
+﹕
+﹖
+﹗
+﹙
+﹚
+﹝
+﹞
+﹡
+﹣
+！
+＂
+＃
+＄
+％
+＆
+＇
+（
+）
+＊
+＋
+，
+－
+．
+／
+０
+１
+２
+３
+４
+５
+６
+７
+８
+９
+：
+；
+＜
+＝
+＞
+？
+＠
+［
+＼
+］
+＾
+＿
+｀
+ａ
+ｂ
+ｃ
+ｄ
+ｅ
+ｆ
+ｇ
+ｈ
+ｉ
+ｊ
+ｋ
+ｌ
+ｍ
+ｎ
+ｏ
+ｐ
+ｑ
+ｒ
+ｓ
+ｔ
+ｕ
+ｖ
+ｗ
+ｘ
+ｙ
+ｚ
+｛
+｜
+｝
+～
+｡
+｢
+｣
+､
+･
+ｯ
+ｰ
+ｲ
+ｸ
+ｼ
+ｽ
+ﾄ
+ﾉ
+ﾌ
+ﾗ
+ﾙ
+ﾝ
+ﾞ
+ﾟ
+￣
+￥
+👍
+🔥
+😂
+😎
+...
+yam
+10
+2017
+12
+11
+2016
+20
+30
+15
+06
+lofter
+##s
+2015
+by
+16
+14
+18
+13
+24
+17
+2014
+21
+##0
+22
+19
+25
+23
+com
+100
+00
+05
+2013
+##a
+03
+09
+08
+28
+##2
+50
+01
+04
+##1
+27
+02
+2012
+##3
+26
+##e
+07
+##8
+##5
+##6
+##4
+##9
+##7
+29
+2011
+40
+##t
+2010
+##o
+##d
+##i
+2009
+##n
+app
+www
+the
+##m
+31
+##c
+##l
+##y
+##r
+##g
+2008
+60
+http
+200
+qq
+##p
+80
+##f
+google
+pixnet
+90
+cookies
+tripadvisor
+500
+##er
+##k
+35
+##h
+facebook
+2007
+2000
+70
+##b
+of
+##x
+##u
+45
+300
+iphone
+32
+1000
+2006
+48
+ip
+36
+in
+38
+3d
+##w
+##ing
+55
+ctrip
+##on
+##v
+33
+##の
+to
+34
+400
+id
+2005
+it
+37
+windows
+llc
+top
+99
+42
+39
+000
+led
+at
+##an
+41
+51
+52
+46
+49
+43
+53
+44
+##z
+android
+58
+and
+59
+2004
+56
+vr
+##か
+5000
+2003
+47
+blogthis
+twitter
+54
+##le
+150
+ok
+2018
+57
+75
+cn
+no
+ios
+##in
+##mm
+##00
+800
+on
+te
+3000
+65
+2001
+360
+95
+ig
+lv
+120
+##ng
+##を
+##us
+##に
+pc
+てす
+──
+600
+##te
+85
+2002
+88
+##ed
+html
+ncc
+wifi
+email
+64
+blog
+is
+##10
+##て
+mail
+online
+##al
+dvd
+##ic
+studio
+##は
+##℃
+##ia
+##と
+line
+vip
+72
+##q
+98
+##ce
+##en
+for
+##is
+##ra
+##es
+##j
+usb
+net
+cp
+1999
+asia
+4g
+##cm
+diy
+new
+3c
+##お
+ta
+66
+language
+vs
+apple
+tw
+86
+web
+##ne
+ipad
+62
+you
+##re
+101
+68
+##tion
+ps
+de
+bt
+pony
+atm
+##2017
+1998
+67
+##ch
+ceo
+##or
+go
+##na
+av
+pro
+cafe
+96
+pinterest
+97
+63
+pixstyleme3c
+##ta
+more
+said
+##2016
+1997
+mp3
+700
+##ll
+nba
+jun
+##20
+92
+tv
+1995
+pm
+61
+76
+nbsp
+250
+##ie
+linux
+##ma
+cd
+110
+hd
+##17
+78
+##ion
+77
+6000
+am
+##th
+##st
+94
+##se
+##et
+69
+180
+gdp
+my
+105
+81
+abc
+89
+flash
+79
+one
+93
+1990
+1996
+##ck
+gps
+##も
+##ly
+web885
+106
+2020
+91
+##ge
+4000
+1500
+xd
+boss
+isbn
+1994
+org
+##ry
+me
+love
+##11
+0fork
+73
+##12
+3g
+##ter
+##ar
+71
+82
+##la
+hotel
+130
+1970
+pk
+83
+87
+140
+ie
+##os
+##30
+##el
+74
+##50
+seo
+cpu
+##ml
+p2p
+84
+may
+##る
+sun
+tue
+internet
+cc
+posted
+youtube
+##at
+##ン
+##man
+ii
+##ル
+##15
+abs
+nt
+pdf
+yahoo
+ago
+1980
+##it
+news
+mac
+104
+##てす
+##me
+##り
+java
+1992
+spa
+##de
+##nt
+hk
+all
+plus
+la
+1993
+##mb
+##16
+##ve
+west
+##da
+160
+air
+##い
+##ps
+から
+##to
+1989
+logo
+htc
+php
+https
+fi
+momo
+##son
+sat
+##ke
+##80
+ebd
+suv
+wi
+day
+apk
+##88
+##um
+mv
+galaxy
+wiki
+or
+brake
+##ス
+1200
+する
+this
+1991
+mon
+##こ
+❤2017
+po
+##ない
+javascript
+life
+home
+june
+##ss
+system
+900
+##ー
+##０
+pp
+1988
+world
+fb
+4k
+br
+##as
+ic
+ai
+leonardo
+safari
+##60
+live
+free
+xx
+wed
+win7
+kiehl
+##co
+lg
+o2o
+##go
+us
+235
+1949
+mm
+しい
+vfm
+kanye
+##90
+##2015
+##id
+jr
+##ey
+123
+rss
+##sa
+##ro
+##am
+##no
+thu
+fri
+350
+##sh
+##ki
+103
+comments
+name
+##のて
+##pe
+##ine
+max
+1987
+8000
+uber
+##mi
+##ton
+wordpress
+office
+1986
+1985
+##ment
+107
+bd
+win10
+##ld
+##li
+gmail
+bb
+dior
+##rs
+##ri
+##rd
+##ます
+up
+cad
+##®
+dr
+して
+read
+##21
+をお
+##io
+##99
+url
+1984
+pvc
+paypal
+show
+policy
+##40
+##ty
+##18
+with
+##★
+##01
+txt
+102
+##ba
+dna
+from
+post
+mini
+ar
+taiwan
+john
+##ga
+privacy
+agoda
+##13
+##ny
+word
+##24
+##22
+##by
+##ur
+##hz
+1982
+##ang
+265
+cookie
+netscape
+108
+##ka
+##～
+##ad
+house
+share
+note
+ibm
+code
+hello
+nike
+sim
+survey
+##016
+1979
+1950
+wikia
+##32
+##017
+5g
+cbc
+##tor
+##kg
+1983
+##rt
+##14
+campaign
+store
+2500
+os
+##ct
+##ts
+##°
+170
+api
+##ns
+365
+excel
+##な
+##ao
+##ら
+##し
+～～
+##nd
+university
+163
+には
+518
+##70
+##ya
+##il
+##25
+pierre
+ipo
+0020
+897
+##23
+hotels
+##ian
+のお
+125
+years
+6606
+##ers
+##26
+high
+##day
+time
+##ay
+bug
+##line
+##く
+##す
+##be
+xp
+talk2yam
+yamservice
+10000
+coco
+##dy
+sony
+##ies
+1978
+microsoft
+david
+people
+##ha
+1960
+instagram
+intel
+その
+##ot
+iso
+1981
+##va
+115
+##mo
+##land
+xxx
+man
+co
+ltxsw
+##ation
+baby
+220
+##pa
+##ol
+1945
+7000
+tag
+450
+##ue
+msn
+##31
+oppo
+##ト
+##ca
+control
+##om
+st
+chrome
+##ure
+##ん
+be
+##き
+lol
+##19
+した
+##bo
+240
+lady
+##100
+##way
+##から
+4600
+##ko
+##do
+##un
+4s
+corporation
+168
+##ni
+herme
+##28
+ｃｐ
+978
+##up
+##06
+ui
+##ds
+ppt
+admin
+three
+します
+bbc
+re
+128
+##48
+ca
+##015
+##35
+hp
+##ee
+tpp
+##た
+##ive
+××
+root
+##cc
+##ました
+##ble
+##ity
+adobe
+park
+114
+et
+oled
+city
+##ex
+##ler
+##ap
+china
+##book
+20000
+view
+##ice
+global
+##km
+your
+hong
+##mg
+out
+##ms
+ng
+ebay
+##29
+menu
+ubuntu
+##cy
+rom
+##view
+open
+ktv
+do
+server
+##lo
+if
+english
+##ね
+##５
+##oo
+1600
+##02
+step1
+kong
+club
+135
+july
+inc
+1976
+mr
+hi
+##net
+touch
+##ls
+##ii
+michael
+lcd
+##05
+##33
+phone
+james
+step2
+1300
+ios9
+##box
+dc
+##２
+##ley
+samsung
+111
+280
+pokemon
+css
+##ent
+##les
+いいえ
+##１
+s8
+atom
+play
+bmw
+##said
+sa
+etf
+ctrl
+♥yoyo♥
+##55
+2025
+##2014
+##66
+adidas
+amazon
+1958
+##ber
+##ner
+visa
+##77
+##der
+1800
+connectivity
+##hi
+firefox
+109
+118
+hr
+so
+style
+mark
+pop
+ol
+skip
+1975
+as
+##27
+##ir
+##61
+190
+mba
+##う
+##ai
+le
+##ver
+1900
+cafe2017
+lte
+super
+113
+129
+##ron
+amd
+like
+##☆
+are
+##ster
+we
+##sk
+paul
+data
+international
+##ft
+longchamp
+ssd
+good
+##ート
+##ti
+reply
+##my
+↓↓↓
+apr
+star
+##ker
+source
+136
+js
+112
+get
+force
+photo
+##one
+126
+##2013
+##ow
+link
+bbs
+1972
+goods
+##lin
+python
+119
+##ip
+game
+##ics
+##ません
+blue
+##●
+520
+##45
+page
+itunes
+##03
+1955
+260
+1968
+gt
+gif
+618
+##ff
+##47
+group
+くたさい
+about
+bar
+ganji
+##nce
+music
+lee
+not
+1977
+1971
+1973
+##per
+an
+faq
+comment
+##って
+days
+##ock
+116
+##bs
+1974
+1969
+v1
+player
+1956
+xbox
+sql
+fm
+f1
+139
+##ah
+210
+##lv
+##mp
+##000
+melody
+1957
+##３
+550
+17life
+199
+1966
+xml
+market
+##au
+##71
+999
+##04
+what
+gl
+##95
+##age
+tips
+##68
+book
+##ting
+mysql
+can
+1959
+230
+##ung
+wonderland
+watch
+10℃
+##ction
+9000
+mar
+mobile
+1946
+1962
+article
+##db
+part
+▲top
+party
+って
+1967
+1964
+1948
+##07
+##ore
+##op
+この
+dj
+##78
+##38
+010
+main
+225
+1965
+##ong
+art
+320
+ad
+134
+020
+##73
+117
+pm2
+japan
+228
+##08
+ts
+1963
+##ica
+der
+sm
+##36
+2019
+##wa
+ct
+##７
+##や
+##64
+1937
+homemesh
+search
+##85
+##れは
+##tv
+##di
+macbook
+##９
+##くたさい
+service
+##♥
+type
+った
+750
+##ier
+##si
+##75
+##います
+##ok
+best
+##ット
+goris
+lock
+##った
+cf
+3m
+big
+##ut
+ftp
+carol
+##vi
+１０
+1961
+happy
+sd
+##ac
+122
+anti
+pe
+cnn
+iii
+1920
+138
+##ラ
+1940
+esp
+jan
+tags
+##98
+##51
+august
+vol
+##86
+154
+##™
+##fs
+##れ
+##sion
+design
+ac
+##ム
+press
+jordan
+ppp
+that
+key
+check
+##６
+##tt
+##㎡
+1080p
+##lt
+power
+##42
+1952
+##bc
+vivi
+##ック
+he
+133
+121
+jpg
+##rry
+201
+175
+3500
+1947
+nb
+##ted
+##rn
+しています
+1954
+usd
+##t00
+master
+##ンク
+001
+model
+##58
+al
+##09
+1953
+##34
+ram
+goo
+ても
+##ui
+127
+1930
+red
+##ary
+rpg
+item
+##pm
+##41
+270
+##za
+project
+##2012
+hot
+td
+blogabstract
+##ger
+##62
+650
+##44
+gr2
+##します
+##ｍ
+black
+electronic
+nfc
+year
+asus
+また
+html5
+cindy
+##hd
+m3
+132
+esc
+##od
+booking
+##53
+fed
+tvb
+##81
+##ina
+mit
+165
+##いる
+chan
+192
+distribution
+next
+になる
+peter
+bios
+steam
+cm
+1941
+にも
+pk10
+##ix
+##65
+##91
+dec
+nasa
+##ana
+icecat
+00z
+b1
+will
+##46
+li
+se
+##ji
+##み
+##ard
+oct
+##ain
+jp
+##ze
+##bi
+cio
+##56
+smart
+h5
+##39
+##port
+curve
+vpn
+##nm
+##dia
+utc
+##あり
+12345678910
+##52
+rmvb
+chanel
+a4
+miss
+##and
+##im
+media
+who
+##63
+she
+girl
+5s
+124
+vera
+##して
+class
+vivo
+king
+##フ
+##ei
+national
+ab
+1951
+5cm
+888
+145
+ipod
+ap
+1100
+5mm
+211
+ms
+2756
+##69
+mp4
+msci
+##po
+##89
+131
+mg
+index
+380
+##bit
+##out
+##zz
+##97
+##67
+158
+apec
+##８
+photoshop
+opec
+￥799
+ては
+##96
+##tes
+##ast
+2g
+○○
+##ール
+￥2899
+##ling
+##よ
+##ory
+1938
+##ical
+kitty
+content
+##43
+step3
+##cn
+win8
+155
+vc
+1400
+iphone7
+robert
+##した
+tcl
+137
+beauty
+##87
+en
+dollars
+##ys
+##oc
+step
+pay
+yy
+a1
+##2011
+##lly
+##ks
+##♪
+1939
+188
+download
+1944
+sep
+exe
+ph
+います
+school
+gb
+center
+pr
+street
+##board
+uv
+##37
+##lan
+winrar
+##que
+##ua
+##com
+1942
+1936
+480
+gpu
+##４
+ettoday
+fu
+tom
+##54
+##ren
+##via
+149
+##72
+b2b
+144
+##79
+##tch
+rose
+arm
+mb
+##49
+##ial
+##nn
+nvidia
+step4
+mvp
+00㎡
+york
+156
+##イ
+how
+cpi
+591
+2765
+gov
+kg
+joe
+##xx
+mandy
+pa
+##ser
+copyright
+fashion
+1935
+don
+##け
+ecu
+##ist
+##art
+erp
+wap
+have
+##lm
+talk
+##ek
+##ning
+##if
+ch
+##ite
+video
+1943
+cs
+san
+iot
+look
+##84
+##2010
+##ku
+october
+##ux
+trump
+##hs
+##ide
+box
+141
+first
+##ins
+april
+##ight
+##83
+185
+angel
+protected
+aa
+151
+162
+x1
+m2
+##fe
+##×
+##ho
+size
+143
+min
+ofo
+fun
+gomaji
+ex
+hdmi
+food
+dns
+march
+chris
+kevin
+##のか
+##lla
+##pp
+##ec
+ag
+ems
+6s
+720p
+##rm
+##ham
+off
+##92
+asp
+team
+fandom
+ed
+299
+▌♥
+##ell
+info
+されています
+##82
+sina
+4066
+161
+##able
+##ctor
+330
+399
+315
+dll
+rights
+ltd
+idc
+jul
+3kg
+1927
+142
+ma
+surface
+##76
+##ク
+～～～
+304
+mall
+eps
+146
+green
+##59
+map
+space
+donald
+v2
+sodu
+##light
+1931
+148
+1700
+まて
+310
+reserved
+htm
+##han
+##57
+2d
+178
+mod
+##ise
+##tions
+152
+ti
+##shi
+doc
+1933
+icp
+055
+wang
+##ram
+shopping
+aug
+##pi
+##well
+now
+wam
+b2
+からお
+##hu
+236
+1928
+##gb
+266
+f2
+##93
+153
+mix
+##ef
+##uan
+bwl
+##plus
+##res
+core
+##ess
+tea
+5℃
+hktvmall
+nhk
+##ate
+list
+##ese
+301
+feb
+4m
+inn
+ての
+nov
+159
+12345
+daniel
+##ci
+pass
+##bet
+##nk
+coffee
+202
+ssl
+airbnb
+##ute
+fbi
+woshipm
+skype
+ea
+cg
+sp
+##fc
+##www
+yes
+edge
+alt
+007
+##94
+fpga
+##ght
+##gs
+iso9001
+さい
+##ile
+##wood
+##uo
+image
+lin
+icon
+american
+##em
+1932
+set
+says
+##king
+##tive
+blogger
+##74
+なと
+256
+147
+##ox
+##zy
+##red
+##ium
+##lf
+nokia
+claire
+##リ
+##ding
+november
+lohas
+##500
+##tic
+##マ
+##cs
+##ある
+##che
+##ire
+##gy
+##ult
+db
+january
+win
+##カ
+166
+road
+ptt
+##ま
+##つ
+198
+##fa
+##mer
+anna
+pchome
+はい
+udn
+ef
+420
+##time
+##tte
+2030
+##ア
+g20
+white
+かかります
+1929
+308
+garden
+eleven
+di
+##おります
+chen
+309b
+777
+172
+young
+cosplay
+ちてない
+4500
+bat
+##123
+##tra
+##ては
+kindle
+npc
+steve
+etc
+##ern
+##｜
+call
+xperia
+ces
+travel
+sk
+s7
+##ous
+1934
+##int
+みいたたけます
+183
+edu
+file
+cho
+qr
+##car
+##our
+186
+##ant
+##ｄ
+eric
+1914
+rends
+##jo
+##する
+mastercard
+##2000
+kb
+##min
+290
+##ino
+vista
+##ris
+##ud
+jack
+2400
+##set
+169
+pos
+1912
+##her
+##ou
+taipei
+しく
+205
+beta
+##ませんか
+232
+##fi
+express
+255
+body
+##ill
+aphojoy
+user
+december
+meiki
+##ick
+tweet
+richard
+##av
+##ᆫ
+iphone6
+##dd
+ちてすか
+views
+##mark
+321
+pd
+##００
+times
+##▲
+level
+##ash
+10g
+point
+5l
+##ome
+208
+koreanmall
+##ak
+george
+q2
+206
+wma
+tcp
+##200
+スタッフ
+full
+mlb
+##lle
+##watch
+tm
+run
+179
+911
+smith
+business
+##und
+1919
+color
+##tal
+222
+171
+##less
+moon
+4399
+##rl
+update
+pcb
+shop
+499
+157
+little
+なし
+end
+##mhz
+van
+dsp
+easy
+660
+##house
+##key
+history
+##ｏ
+oh
+##001
+##hy
+##web
+oem
+let
+was
+##2009
+##gg
+review
+##wan
+182
+##°c
+203
+uc
+title
+##val
+united
+233
+2021
+##ons
+doi
+trivago
+overdope
+sbs
+##ance
+##ち
+grand
+special
+573032185
+imf
+216
+wx17house
+##so
+##ーム
+audi
+##he
+london
+william
+##rp
+##ake
+science
+beach
+cfa
+amp
+ps4
+880
+##800
+##link
+##hp
+crm
+ferragamo
+bell
+make
+##eng
+195
+under
+zh
+photos
+2300
+##style
+##ント
+via
+176
+da
+##gi
+company
+i7
+##ray
+thomas
+370
+ufo
+i5
+##max
+plc
+ben
+back
+research
+8g
+173
+mike
+##pc
+##ッフ
+september
+189
+##ace
+vps
+february
+167
+pantos
+wp
+lisa
+1921
+★★
+jquery
+night
+long
+offer
+##berg
+##news
+1911
+##いて
+ray
+fks
+wto
+せます
+over
+164
+340
+##all
+##rus
+1924
+##888
+##works
+blogtitle
+loftpermalink
+##→
+187
+martin
+test
+ling
+km
+##め
+15000
+fda
+v3
+##ja
+##ロ
+ｗedding
+かある
+outlet
+family
+##ea
+をこ
+##top
+story
+##ness
+salvatore
+##lu
+204
+swift
+215
+room
+している
+oracle
+##ul
+1925
+sam
+b2c
+week
+pi
+rock
+##のは
+##ａ
+##けと
+##ean
+##300
+##gle
+cctv
+after
+chinese
+##back
+powered
+x2
+##tan
+1918
+##nes
+##イン
+canon
+only
+181
+##zi
+##las
+say
+##oe
+184
+##sd
+221
+##bot
+##world
+##zo
+sky
+made
+top100
+just
+1926
+pmi
+802
+234
+gap
+##vr
+177
+les
+174
+▲topoct
+ball
+vogue
+vi
+ing
+ofweek
+cos
+##list
+##ort
+▲topmay
+##なら
+##lon
+として
+last
+##tc
+##of
+##bus
+##gen
+real
+eva
+##コ
+a3
+nas
+##lie
+##ria
+##coin
+##bt
+▲topapr
+his
+212
+cat
+nata
+vive
+health
+⋯⋯
+drive
+sir
+▲topmar
+du
+cup
+##カー
+##ook
+##よう
+##sy
+alex
+msg
+tour
+しました
+3ce
+##word
+193
+ebooks
+r8
+block
+318
+##より
+2200
+nice
+pvp
+207
+months
+1905
+rewards
+##ther
+1917
+0800
+##xi
+##チ
+##sc
+micro
+850
+gg
+blogfp
+op
+1922
+daily
+m1
+264
+true
+##bb
+ml
+##tar
+##のお
+##ky
+anthony
+196
+253
+##yo
+state
+218
+##ara
+##aa
+##rc
+##tz
+##ston
+より
+gear
+##eo
+##ade
+ge
+see
+1923
+##win
+##ura
+ss
+heart
+##den
+##ita
+down
+##sm
+el
+png
+2100
+610
+rakuten
+whatsapp
+bay
+dream
+add
+##use
+680
+311
+pad
+gucci
+mpv
+##ode
+##fo
+island
+▲topjun
+##▼
+223
+jason
+214
+chicago
+##❤
+しの
+##hone
+io
+##れる
+##ことか
+sogo
+be2
+##ology
+990
+cloud
+vcd
+##con
+2～3
+##ford
+##joy
+##kb
+##こさいます
+##rade
+but
+##ach
+docker
+##ful
+rfid
+ul
+##ase
+hit
+ford
+##star
+580
+##○
+１１
+a2
+sdk
+reading
+edited
+##are
+cmos
+##mc
+238
+siri
+light
+##ella
+##ため
+bloomberg
+##read
+pizza
+##ison
+jimmy
+##vm
+college
+node
+journal
+ba
+18k
+##play
+245
+##cer
+２０
+magic
+##yu
+191
+jump
+288
+tt
+##ings
+asr
+##lia
+3200
+step5
+network
+##cd
+mc
+いします
+1234
+pixstyleme
+273
+##600
+2800
+money
+★★★★★
+1280
+１２
+430
+bl
+みの
+act
+##tus
+tokyo
+##rial
+##life
+emba
+##ae
+saas
+tcs
+##rk
+##wang
+summer
+##sp
+ko
+##ving
+390
+premium
+##その
+netflix
+##ヒ
+uk
+mt
+##lton
+right
+frank
+two
+209
+える
+##ple
+##cal
+021
+##んな
+##sen
+##ville
+hold
+nexus
+dd
+##ius
+てお
+##mah
+##なく
+tila
+zero
+820
+ce
+##tin
+resort
+##ws
+charles
+old
+p10
+5d
+report
+##360
+##ru
+##には
+bus
+vans
+lt
+##est
+pv
+##レ
+links
+rebecca
+##ツ
+##dm
+azure
+##365
+きな
+limited
+bit
+4gb
+##mon
+1910
+moto
+##eam
+213
+1913
+var
+eos
+なとの
+226
+blogspot
+された
+699
+e3
+dos
+dm
+fc
+##ments
+##ik
+##kw
+boy
+##bin
+##ata
+960
+er
+##せ
+219
+##vin
+##tu
+##ula
+194
+##∥
+station
+##ろ
+##ature
+835
+files
+zara
+hdr
+top10
+nature
+950
+magazine
+s6
+marriott
+##シ
+avira
+case
+##っと
+tab
+##ran
+tony
+##home
+oculus
+im
+##ral
+jean
+saint
+cry
+307
+rosie
+##force
+##ini
+ice
+##bert
+のある
+##nder
+##mber
+pet
+2600
+##◆
+plurk
+▲topdec
+##sis
+00kg
+▲topnov
+720
+##ence
+tim
+##ω
+##nc
+##ても
+##name
+log
+ips
+great
+ikea
+malaysia
+unix
+##イト
+3600
+##ncy
+##nie
+12000
+akb48
+##ye
+##oid
+404
+##chi
+##いた
+oa
+xuehai
+##1000
+##orm
+##rf
+275
+さん
+##ware
+##リー
+980
+ho
+##pro
+text
+##era
+560
+bob
+227
+##ub
+##2008
+8891
+scp
+avi
+##zen
+2022
+mi
+wu
+museum
+qvod
+apache
+lake
+jcb
+▲topaug
+★★★
+ni
+##hr
+hill
+302
+ne
+weibo
+490
+ruby
+##ーシ
+##ヶ
+##row
+4d
+▲topjul
+iv
+##ish
+github
+306
+mate
+312
+##スト
+##lot
+##ane
+andrew
+のハイト
+##tina
+t1
+rf
+ed2k
+##vel
+##900
+way
+final
+りの
+ns
+5a
+705
+197
+##メ
+sweet
+bytes
+##ene
+▲topjan
+231
+##cker
+##2007
+##px
+100g
+topapp
+229
+helpapp
+rs
+low
+14k
+g4g
+care
+630
+ldquo
+あり
+##fork
+leave
+rm
+edition
+##gan
+##zon
+##qq
+▲topsep
+##google
+##ism
+gold
+224
+explorer
+##zer
+toyota
+category
+select
+visual
+##labels
+restaurant
+##md
+posts
+s1
+##ico
+もっと
+angelababy
+123456
+217
+sports
+s3
+mbc
+1915
+してくたさい
+shell
+x86
+candy
+##new
+kbs
+face
+xl
+470
+##here
+4a
+swissinfo
+v8
+▲topfeb
+dram
+##ual
+##vice
+3a
+##wer
+sport
+q1
+ios10
+public
+int
+card
+##ｃ
+ep
+au
+rt
+##れた
+1080
+bill
+##mll
+kim
+３０
+460
+wan
+##uk
+##ミ
+x3
+298
+0t
+scott
+##ming
+239
+e5
+##3d
+h7n9
+worldcat
+brown
+##あります
+##vo
+##led
+##580
+##ax
+249
+410
+##ert
+paris
+##～6
+polo
+925
+##lr
+599
+##ナ
+capital
+##hing
+bank
+cv
+1g
+##chat
+##ｓ
+##たい
+adc
+##ule
+2m
+##ｅ
+digital
+hotmail
+268
+##pad
+870
+bbq
+quot
+##ring
+before
+wali
+##まて
+mcu
+2k
+2b
+という
+costco
+316
+north
+333
+switch
+##city
+##ｐ
+philips
+##mann
+management
+panasonic
+##cl
+##vd
+##ping
+##rge
+alice
+##lk
+##ましょう
+css3
+##ney
+vision
+alpha
+##ular
+##400
+##tter
+lz
+にお
+##ありません
+mode
+gre
+1916
+pci
+##tm
+237
+1～2
+##yan
+##そ
+について
+##let
+##キ
+work
+war
+coach
+ah
+mary
+##ᅵ
+huang
+##pt
+a8
+pt
+follow
+##berry
+1895
+##ew
+a5
+ghost
+##ション
+##wn
+##og
+south
+##code
+girls
+##rid
+action
+villa
+git
+r11
+table
+games
+##cket
+error
+##anonymoussaid
+##ag
+here
+##ame
+##gc
+qa
+##■
+##lis
+gmp
+##gin
+vmalife
+##cher
+yu
+wedding
+##tis
+demo
+dragon
+530
+soho
+social
+bye
+##rant
+river
+orz
+acer
+325
+##↑
+##ース
+##ats
+261
+del
+##ven
+440
+ups
+##ように
+##ター
+305
+value
+macd
+yougou
+##dn
+661
+##ano
+ll
+##urt
+##rent
+continue
+script
+##wen
+##ect
+paper
+263
+319
+shift
+##chel
+##フト
+##cat
+258
+x5
+fox
+243
+##さん
+car
+aaa
+##blog
+loading
+##yn
+##tp
+kuso
+799
+si
+sns
+イカせるテンマ
+ヒンクテンマ3
+rmb
+vdc
+forest
+central
+prime
+help
+ultra
+##rmb
+##ような
+241
+square
+688
+##しい
+のないフロクに
+##field
+##reen
+##ors
+##ju
+c1
+start
+510
+##air
+##map
+cdn
+##wo
+cba
+stephen
+m8
+100km
+##get
+opera
+##base
+##ood
+vsa
+com™
+##aw
+##ail
+251
+なのて
+count
+t2
+##ᅡ
+##een
+2700
+hop
+##gp
+vsc
+tree
+##eg
+##ose
+816
+285
+##ories
+##shop
+alphago
+v4
+1909
+simon
+##ᆼ
+fluke62max
+zip
+スホンサー
+##sta
+louis
+cr
+bas
+##～10
+bc
+##yer
+hadoop
+##ube
+##wi
+1906
+0755
+hola
+##low
+place
+centre
+5v
+d3
+##fer
+252
+##750
+##media
+281
+540
+0l
+exchange
+262
+series
+##ハー
+##san
+eb
+##bank
+##ｋ
+q3
+##nge
+##mail
+take
+##lp
+259
+1888
+client
+east
+cache
+event
+vincent
+##ールを
+きを
+##nse
+sui
+855
+adchoice
+##и
+##stry
+##なたの
+246
+##zone
+ga
+apps
+sea
+##ab
+248
+cisco
+##タ
+##rner
+kymco
+##care
+dha
+##pu
+##yi
+minkoff
+royal
+p1
+への
+annie
+269
+collection
+kpi
+playstation
+257
+になります
+866
+bh
+##bar
+queen
+505
+radio
+1904
+andy
+armani
+##xy
+manager
+iherb
+##ery
+##share
+spring
+raid
+johnson
+1908
+##ob
+volvo
+hall
+##ball
+v6
+our
+taylor
+##hk
+bi
+242
+##cp
+kate
+bo
+water
+technology
+##rie
+サイトは
+277
+##ona
+##sl
+hpv
+303
+gtx
+hip
+rdquo
+jayz
+stone
+##lex
+##rum
+namespace
+##やり
+620
+##ale
+##atic
+des
+##erson
+##ql
+##ves
+##type
+enter
+##この
+##てきます
+d2
+##168
+##mix
+##bian
+との
+a9
+jj
+ky
+##lc
+access
+movie
+##hc
+リストに
+tower
+##ration
+##mit
+ます
+##nch
+ua
+tel
+prefix
+##o2
+1907
+##point
+1901
+ott
+～10
+##http
+##ury
+baidu
+##ink
+member
+##logy
+bigbang
+nownews
+##js
+##shot
+##tb
+##こと
+247
+eba
+##tics
+##lus
+ける
+v5
+spark
+##ama
+there
+##ions
+god
+##lls
+##down
+hiv
+##ress
+burberry
+day2
+##kv
+◆◆
+jeff
+related
+film
+edit
+joseph
+283
+##ark
+cx
+32gb
+order
+g9
+30000
+##ans
+##tty
+s5
+##bee
+かあります
+thread
+xr
+buy
+sh
+005
+land
+spotify
+mx
+##ari
+276
+##verse
+×email
+sf
+why
+##ことて
+244
+7headlines
+nego
+sunny
+dom
+exo
+401
+666
+positioning
+fit
+rgb
+##tton
+278
+kiss
+alexa
+adam
+lp
+みリストを
+##ｇ
+mp
+##ties
+##llow
+amy
+##du
+np
+002
+institute
+271
+##rth
+##lar
+2345
+590
+##des
+sidebar
+１５
+imax
+site
+##cky
+##kit
+##ime
+##009
+season
+323
+##fun
+##ンター
+##ひ
+gogoro
+a7
+pu
+lily
+fire
+twd600
+##ッセーシを
+いて
+##vis
+30ml
+##cture
+##をお
+information
+##オ
+close
+friday
+##くれる
+yi
+nick
+てすか
+##tta
+##tel
+6500
+##lock
+cbd
+economy
+254
+かお
+267
+tinker
+double
+375
+8gb
+voice
+##app
+oops
+channel
+today
+985
+##right
+raw
+xyz
+##＋
+jim
+edm
+##cent
+7500
+supreme
+814
+ds
+##its
+##asia
+dropbox
+##てすか
+##tti
+books
+272
+100ml
+##tle
+##ller
+##ken
+##more
+##boy
+sex
+309
+##dom
+t3
+##ider
+##なります
+##unch
+1903
+810
+feel
+5500
+##かった
+##put
+により
+s2
+mo
+##gh
+men
+ka
+amoled
+div
+##tr
+##n1
+port
+howard
+##tags
+ken
+dnf
+##nus
+adsense
+##а
+ide
+##へ
+buff
+thunder
+##town
+##ique
+has
+##body
+auto
+pin
+##erry
+tee
+てした
+295
+number
+##the
+##013
+object
+psp
+cool
+udnbkk
+16gb
+##mic
+miui
+##tro
+most
+r2
+##alk
+##nity
+1880
+±0
+##いました
+428
+s4
+law
+version
+##oa
+n1
+sgs
+docomo
+##tf
+##ack
+henry
+fc2
+##ded
+##sco
+##014
+##rite
+286
+0mm
+linkedin
+##ada
+##now
+wii
+##ndy
+ucbug
+##◎
+sputniknews
+legalminer
+##ika
+##xp
+2gb
+##bu
+q10
+oo
+b6
+come
+##rman
+cheese
+ming
+maker
+##gm
+nikon
+##fig
+ppi
+kelly
+##ります
+jchere
+てきます
+ted
+md
+003
+fgo
+tech
+##tto
+dan
+soc
+##gl
+##len
+hair
+earth
+640
+521
+img
+##pper
+##a1
+##てきる
+##ロク
+acca
+##ition
+##ference
+suite
+##ig
+outlook
+##mond
+##cation
+398
+##pr
+279
+101vip
+358
+##999
+282
+64gb
+3800
+345
+airport
+##over
+284
+##おり
+jones
+##ith
+lab
+##su
+##いるのて
+co2
+town
+piece
+##llo
+no1
+vmware
+24h
+##qi
+focus
+reader
+##admin
+##ora
+tb
+false
+##log
+1898
+know
+lan
+838
+##ces
+f4
+##ume
+motel
+stop
+##oper
+na
+flickr
+netcomponents
+##af
+##─
+pose
+williams
+local
+##ound
+##cg
+##site
+##iko
+いお
+274
+5m
+gsm
+con
+##ath
+1902
+friends
+##hip
+cell
+317
+##rey
+780
+cream
+##cks
+012
+##dp
+facebooktwitterpinterestgoogle
+sso
+324
+shtml
+song
+swiss
+##mw
+##キンク
+lumia
+xdd
+string
+tiffany
+522
+marc
+られた
+insee
+russell
+sc
+dell
+##ations
+ｏｋ
+camera
+289
+##vs
+##flow
+##late
+classic
+287
+##nter
+stay
+g1
+mtv
+512
+##ever
+##lab
+##nger
+qe
+sata
+ryan
+d1
+50ml
+cms
+##cing
+su
+292
+3300
+editor
+296
+##nap
+security
+sunday
+association
+##ens
+##700
+##bra
+acg
+##かり
+sofascore
+とは
+mkv
+##ign
+jonathan
+gary
+build
+labels
+##oto
+tesla
+moba
+qi
+gohappy
+general
+ajax
+1024
+##かる
+サイト
+society
+##test
+##urs
+wps
+fedora
+##ich
+mozilla
+328
+##480
+##dr
+usa
+urn
+##lina
+##ｒ
+grace
+##die
+##try
+##ader
+1250
+##なり
+elle
+570
+##chen
+##ᆯ
+price
+##ten
+uhz
+##ough
+eq
+##hen
+states
+push
+session
+balance
+wow
+506
+##cus
+##py
+when
+##ward
+##ep
+34e
+wong
+library
+prada
+##サイト
+##cle
+running
+##ree
+313
+ck
+date
+q4
+##ctive
+##ool
+##＞
+mk
+##ira
+##163
+388
+die
+secret
+rq
+dota
+buffet
+は１ヶ
+e6
+##ez
+pan
+368
+ha
+##card
+##cha
+2a
+##さ
+alan
+day3
+eye
+f3
+##end
+france
+keep
+adi
+rna
+tvbs
+##ala
+solo
+nova
+##え
+##tail
+##ょう
+support
+##ries
+##なる
+##ved
+base
+copy
+iis
+fps
+##ways
+hero
+hgih
+profile
+fish
+mu
+ssh
+entertainment
+chang
+##wd
+click
+cake
+##ond
+pre
+##tom
+kic
+pixel
+##ov
+##fl
+product
+6a
+##pd
+dear
+##gate
+es
+yumi
+audio
+##²
+##sky
+echo
+bin
+where
+##ture
+329
+##ape
+find
+sap
+isis
+##なと
+nand
+##101
+##load
+##ream
+band
+a6
+525
+never
+##post
+festival
+50cm
+##we
+555
+guide
+314
+zenfone
+##ike
+335
+gd
+forum
+jessica
+strong
+alexander
+##ould
+software
+allen
+##ious
+program
+360°
+else
+lohasthree
+##gar
+することかてきます
+please
+##れます
+rc
+##ggle
+##ric
+bim
+50000
+##own
+eclipse
+355
+brian
+3ds
+##side
+061
+361
+##other
+##ける
+##tech
+##ator
+485
+engine
+##ged
+##ｔ
+plaza
+##fit
+cia
+ngo
+westbrook
+shi
+tbs
+50mm
+##みませんか
+sci
+291
+reuters
+##ily
+contextlink
+##hn
+af
+##cil
+bridge
+very
+##cel
+1890
+cambridge
+##ize
+15g
+##aid
+##data
+790
+frm
+##head
+award
+butler
+##sun
+meta
+##mar
+america
+ps3
+puma
+pmid
+##すか
+lc
+670
+kitchen
+##lic
+オーフン5
+きなしソフトサーヒス
+そして
+day1
+future
+★★★★
+##text
+##page
+##rris
+pm1
+##ket
+fans
+##っています
+1001
+christian
+bot
+kids
+trackback
+##hai
+c3
+display
+##hl
+n2
+1896
+idea
+さんも
+##sent
+airmail
+##ug
+##men
+pwm
+けます
+028
+##lution
+369
+852
+awards
+schemas
+354
+asics
+wikipedia
+font
+##tional
+##vy
+c2
+293
+##れている
+##dget
+##ein
+っている
+contact
+pepper
+スキル
+339
+##～5
+294
+##uel
+##ument
+730
+##hang
+みてす
+q5
+##sue
+rain
+##ndi
+wei
+swatch
+##cept
+わせ
+331
+popular
+##ste
+##tag
+p2
+501
+trc
+1899
+##west
+##live
+justin
+honda
+ping
+messenger
+##rap
+v9
+543
+##とは
+unity
+appqq
+はすへて
+025
+leo
+##tone
+##テ
+##ass
+uniqlo
+##010
+502
+her
+jane
+memory
+moneydj
+##tical
+human
+12306
+していると
+##m2
+coc
+miacare
+##mn
+tmt
+##core
+vim
+kk
+##may
+fan
+target
+use
+too
+338
+435
+2050
+867
+737
+fast
+##2c
+services
+##ope
+omega
+energy
+##わ
+pinkoi
+1a
+##なから
+##rain
+jackson
+##ement
+##シャンルの
+374
+366
+そんな
+p9
+rd
+##ᆨ
+1111
+##tier
+##vic
+zone
+##│
+385
+690
+dl
+isofix
+cpa
+m4
+322
+kimi
+めて
+davis
+##lay
+lulu
+##uck
+050
+weeks
+qs
+##hop
+920
+##ｎ
+ae
+##ear
+～5
+eia
+405
+##fly
+korea
+jpeg
+boost
+##ship
+small
+##リア
+1860
+eur
+297
+425
+valley
+##iel
+simple
+##ude
+rn
+k2
+##ena
+されます
+non
+patrick
+しているから
+##ナー
+feed
+5757
+30g
+process
+well
+qqmei
+##thing
+they
+aws
+lu
+pink
+##ters
+##kin
+または
+board
+##vertisement
+wine
+##ien
+unicode
+##dge
+r1
+359
+##tant
+いを
+##twitter
+##3c
+cool1
+される
+##れて
+##ｌ
+isp
+##012
+standard
+45㎡2
+402
+##150
+matt
+##fu
+326
+##iner
+googlemsn
+pixnetfacebookyahoo
+##ラン
+x7
+886
+##uce
+メーカー
+sao
+##ev
+##きました
+##file
+9678
+403
+xddd
+shirt
+6l
+##rio
+##hat
+3mm
+givenchy
+ya
+bang
+##lio
+monday
+crystal
+ロクイン
+##abc
+336
+head
+890
+ubuntuforumwikilinuxpastechat
+##vc
+##～20
+##rity
+cnc
+7866
+ipv6
+null
+1897
+##ost
+yang
+imsean
+tiger
+##fet
+##ンス
+352
+##＝
+dji
+327
+ji
+maria
+##come
+##んて
+foundation
+3100
+##beth
+##なった
+1m
+601
+active
+##aft
+##don
+3p
+sr
+349
+emma
+##khz
+living
+415
+353
+1889
+341
+709
+457
+sas
+x6
+##face
+pptv
+x4
+##mate
+han
+sophie
+##jing
+337
+fifa
+##mand
+other
+sale
+inwedding
+##gn
+てきちゃいます
+##mmy
+##pmlast
+bad
+nana
+nbc
+してみてくたさいね
+なとはお
+##wu
+##かあります
+##あ
+note7
+single
+##340
+せからこ
+してくたさい♪この
+しにはとんとんワークケートを
+するとあなたにもっとマッチした
+ならワークケートへ
+もみつかっちゃうかも
+ワークケートの
+##bel
+window
+##dio
+##ht
+union
+age
+382
+１４
+##ivity
+##ｙ
+コメント
+domain
+neo
+##isa
+##lter
+5k
+f5
+steven
+##cts
+powerpoint
+tft
+self
+g2
+ft
+##テル
+zol
+##act
+mwc
+381
+343
+もう
+nbapop
+408
+てある
+eds
+ace
+##room
+previous
+author
+tomtom
+il
+##ets
+hu
+financial
+☆☆☆
+っています
+bp
+5t
+chi
+1gb
+##hg
+fairmont
+cross
+008
+gay
+h2
+function
+##けて
+356
+also
+1b
+625
+##ータ
+##raph
+1894
+3～5
+##ils
+i3
+334
+avenue
+##host
+による
+##bon
+##tsu
+message
+navigation
+50g
+fintech
+h6
+##ことを
+8cm
+##ject
+##vas
+##firm
+credit
+##wf
+xxxx
+form
+##nor
+##space
+huawei
+plan
+json
+sbl
+##dc
+machine
+921
+392
+wish
+##120
+##sol
+windows7
+edward
+##ために
+development
+washington
+##nsis
+lo
+818
+##sio
+##ym
+##bor
+planet
+##～8
+##wt
+ieee
+gpa
+##めて
+camp
+ann
+gm
+##tw
+##oka
+connect
+##rss
+##work
+##atus
+wall
+chicken
+soul
+2mm
+##times
+fa
+##ather
+##cord
+009
+##eep
+hitachi
+gui
+harry
+##pan
+e1
+disney
+##press
+##ーション
+wind
+386
+frigidaire
+##tl
+liu
+hsu
+332
+basic
+von
+ev
+いた
+てきる
+スホンサーサイト
+learning
+##ull
+expedia
+archives
+change
+##wei
+santa
+cut
+ins
+6gb
+turbo
+brand
+cf1
+508
+004
+return
+747
+##rip
+h1
+##nis
+##をこ
+128gb
+##にお
+3t
+application
+しており
+emc
+rx
+##oon
+384
+quick
+412
+15058
+wilson
+wing
+chapter
+##bug
+beyond
+##cms
+##dar
+##oh
+zoom
+e2
+trip
+sb
+##nba
+rcep
+342
+aspx
+ci
+080
+gc
+gnu
+める
+##count
+advanced
+dance
+dv
+##url
+##ging
+367
+8591
+am09
+shadow
+battle
+346
+##ｉ
+##cia
+##という
+emily
+##のてす
+##tation
+host
+ff
+techorz
+sars
+##mini
+##mporary
+##ering
+nc
+4200
+798
+##next
+cma
+##mbps
+##gas
+##ift
+##dot
+##ィ
+455
+##～17
+amana
+##りの
+426
+##ros
+ir
+00㎡1
+##eet
+##ible
+##↓
+710
+ˋ▽ˊ
+##aka
+dcs
+iq
+##ｖ
+l1
+##lor
+maggie
+##011
+##iu
+588
+##～1
+830
+##gt
+1tb
+articles
+create
+##burg
+##iki
+database
+fantasy
+##rex
+##cam
+dlc
+dean
+##you
+hard
+path
+gaming
+victoria
+maps
+cb
+##lee
+##itor
+overchicstoretvhome
+systems
+##xt
+416
+p3
+sarah
+760
+##nan
+407
+486
+x9
+install
+second
+626
+##ann
+##ph
+##rcle
+##nic
+860
+##nar
+ec
+##とう
+768
+metro
+chocolate
+##rian
+～4
+##table
+##しています
+skin
+##sn
+395
+mountain
+##0mm
+inparadise
+6m
+7x24
+ib
+4800
+##jia
+eeworld
+creative
+g5
+g3
+357
+parker
+ecfa
+village
+からの
+18000
+sylvia
+サーヒス
+hbl
+##ques
+##onsored
+##x2
+##きます
+##v4
+##tein
+ie6
+383
+##stack
+389
+ver
+##ads
+##baby
+sound
+bbe
+##110
+##lone
+##uid
+ads
+022
+gundam
+351
+thinkpad
+006
+scrum
+match
+##ave
+mems
+##470
+##oy
+##なりました
+##talk
+glass
+lamigo
+span
+##eme
+job
+##a5
+jay
+wade
+kde
+498
+##lace
+ocean
+tvg
+##covery
+##r3
+##ners
+##rea
+junior
+think
+##aine
+cover
+##ision
+##sia
+↓↓
+##bow
+msi
+413
+458
+406
+##love
+711
+801
+soft
+z2
+##pl
+456
+1840
+mobil
+mind
+##uy
+427
+nginx
+##oi
+めた
+##rr
+6221
+##mple
+##sson
+##ーシてす
+371
+##nts
+91tv
+comhd
+crv3000
+##uard
+1868
+397
+deep
+lost
+field
+gallery
+##bia
+rate
+spf
+redis
+traction
+930
+icloud
+011
+なら
+fe
+jose
+372
+##tory
+into
+sohu
+fx
+899
+379
+kicstart2
+##hia
+すく
+##～3
+##sit
+ra
+２４
+##walk
+##xure
+500g
+##pact
+pacific
+xa
+natural
+carlo
+##250
+##walker
+1850
+##can
+cto
+gigi
+516
+##サー
+pen
+##hoo
+ob
+matlab
+##ｂ
+##yy
+13913459
+##iti
+mango
+##bbs
+sense
+c5
+oxford
+##ニア
+walker
+jennifer
+##ola
+course
+##bre
+701
+##pus
+##rder
+lucky
+075
+##ぁ
+ivy
+なお
+##nia
+sotheby
+side
+##ugh
+joy
+##orage
+##ush
+##bat
+##dt
+364
+r9
+##2d
+##gio
+511
+country
+wear
+##lax
+##～7
+##moon
+393
+seven
+study
+411
+348
+lonzo
+8k
+##ェ
+evolution
+##イフ
+##kk
+gs
+kd
+##レス
+arduino
+344
+b12
+##lux
+arpg
+##rdon
+cook
+##x5
+dark
+five
+##als
+##ida
+とても
+sign
+362
+##ちの
+something
+20mm
+##nda
+387
+##posted
+fresh
+tf
+1870
+422
+cam
+##mine
+##skip
+##form
+##ssion
+education
+394
+##tee
+dyson
+stage
+##jie
+want
+##night
+epson
+pack
+あります
+##ppy
+テリヘル
+##█
+wd
+##eh
+##rence
+left
+##lvin
+golden
+mhz
+discovery
+##trix
+##n2
+loft
+##uch
+##dra
+##sse
+speed
+～1
+1mdb
+sorry
+welcome
+##urn
+wave
+gaga
+##lmer
+teddy
+##160
+トラックハック
+せよ
+611
+##f2016
+378
+rp
+##sha
+rar
+##あなたに
+##きた
+840
+holiday
+##ュー
+373
+074
+##vg
+##nos
+##rail
+gartner
+gi
+6p
+##dium
+kit
+488
+b3
+eco
+##ろう
+20g
+sean
+##stone
+autocad
+nu
+##np
+f16
+write
+029
+m5
+##ias
+images
+atp
+##dk
+fsm
+504
+1350
+ve
+52kb
+##xxx
+##のに
+##cake
+414
+unit
+lim
+ru
+1v
+##ification
+published
+angela
+16g
+analytics
+ak
+##ｑ
+##nel
+gmt
+##icon
+again
+##₂
+##bby
+ios11
+445
+かこさいます
+waze
+いてす
+##ハ
+9985
+##ust
+##ティー
+framework
+##007
+iptv
+delete
+52sykb
+cl
+wwdc
+027
+30cm
+##fw
+##ての
+1389
+##xon
+brandt
+##ses
+##dragon
+tc
+vetements
+anne
+monte
+modern
+official
+##へて
+##ere
+##nne
+##oud
+もちろん
+５０
+etnews
+##a2
+##graphy
+421
+863
+##ちゃん
+444
+##rtex
+##てお
+l2
+##gma
+mount
+ccd
+たと
+archive
+morning
+tan
+ddos
+e7
+##ホ
+day4
+##ウ
+gis
+453
+its
+495
+factory
+bruce
+pg
+##ito
+ってくたさい
+guest
+cdma
+##lling
+536
+n3
+しかし
+3～4
+mega
+eyes
+ro
+１３
+women
+dac
+church
+##jun
+singapore
+##facebook
+6991
+starbucks
+##tos
+##stin
+##shine
+zen
+##mu
+tina
+20℃
+1893
+##たけて
+503
+465
+request
+##gence
+qt
+##っ
+1886
+347
+363
+q7
+##zzi
+diary
+##tore
+409
+##ead
+468
+cst
+##osa
+canada
+agent
+va
+##jiang
+##ちは
+##ーク
+##lam
+sg
+##nix
+##sday
+##よって
+g6
+##master
+bing
+##zl
+charlie
+１６
+8mm
+nb40
+##ーン
+thai
+##ルフ
+ln284ct
+##itz
+##2f
+bonnie
+##food
+##lent
+originals
+##stro
+##lts
+418
+∟∣
+##bscribe
+children
+ntd
+yesstyle
+##かも
+hmv
+##tment
+d5
+2cm
+arts
+sms
+##pn
+##я
+##いい
+topios9
+539
+lifestyle
+virtual
+##ague
+xz
+##deo
+muji
+024
+unt
+##nnis
+##ᅩ
+faq1
+1884
+396
+##ette
+fly
+64㎡
+はしめまして
+441
+curry
+##pop
+のこ
+release
+##←
+##◆◆
+##cast
+073
+ありな
+500ml
+##ews
+5c
+##stle
+ios7
+##ima
+787
+dog
+lenovo
+##r4
+roger
+013
+cbs
+vornado
+100m
+417
+##desk
+##クok
+##ald
+1867
+9595
+2900
+##van
+oil
+##ｘ
+some
+break
+common
+##jy
+##lines
+g7
+twice
+419
+ella
+nano
+belle
+にこ
+##mes
+##self
+##note
+jb
+##ことかてきます
+benz
+##との
+##ova
+451
+save
+##wing
+##ますのて
+kai
+りは
+##hua
+##rect
+rainer
+##unge
+448
+##0m
+adsl
+##かな
+guestname
+##uma
+##kins
+##zu
+tokichoi
+##price
+county
+##med
+##mus
+rmk
+391
+address
+vm
+えて
+openload
+##group
+##hin
+##iginal
+amg
+urban
+##oz
+jobs
+emi
+##public
+beautiful
+##sch
+album
+##dden
+##bell
+jerry
+works
+hostel
+miller
+##drive
+##rmin
+##１０
+376
+boot
+828
+##370
+##fx
+##cm～
+1885
+##nome
+##ctionary
+##oman
+##lish
+##cr
+##hm
+433
+##how
+432
+francis
+xi
+c919
+b5
+evernote
+##uc
+vga
+##3000
+coupe
+##urg
+##cca
+##uality
+019
+6g
+れる
+multi
+##また
+##ett
+em
+hey
+##ani
+##tax
+##rma
+inside
+than
+740
+leonnhurt
+##jin
+ict
+れた
+bird
+notes
+200mm
+くの
+##dical
+##lli
+result
+442
+iu
+ee
+438
+smap
+gopro
+##last
+yin
+pure
+998
+32g
+けた
+5kg
+##dan
+##rame
+mama
+##oot
+bean
+marketing
+##hur
+2l
+bella
+sync
+xuite
+##ground
+515
+discuz
+##getrelax
+##ince
+##bay
+##5s
+cj
+##イス
+gmat
+apt
+##pass
+jing
+##rix
+c4
+rich
+##とても
+niusnews
+##ello
+bag
+770
+##eting
+##mobile
+１８
+culture
+015
+##のてすか
+377
+1020
+area
+##ience
+616
+details
+gp
+universal
+silver
+dit
+はお
+private
+ddd
+u11
+kanshu
+##ified
+fung
+##nny
+dx
+##520
+tai
+475
+023
+##fr
+##lean
+3s
+##pin
+429
+##rin
+25000
+ly
+rick
+##bility
+usb3
+banner
+##baru
+##gion
+metal
+dt
+vdf
+1871
+karl
+qualcomm
+bear
+1010
+oldid
+ian
+jo
+##tors
+population
+##ernel
+1882
+mmorpg
+##mv
+##bike
+603
+##©
+ww
+friend
+##ager
+exhibition
+##del
+##pods
+fpx
+structure
+##free
+##tings
+kl
+##rley
+##copyright
+##mma
+california
+3400
+orange
+yoga
+4l
+canmake
+honey
+##anda
+##コメント
+595
+nikkie
+##ルハイト
+dhl
+publishing
+##mall
+##gnet
+20cm
+513
+##クセス
+##┅
+e88
+970
+##dog
+fishbase
+##!
+##"
+###
+##$
+##%
+##&
+##'
+##(
+##)
+##*
+##+
+##,
+##-
+##.
+##/
+##:
+##;
+##<
+##=
+##>
+##?
+##@
+##[
+##\
+##]
+##^
+##_
+##{
+##|
+##}
+##~
+##£
+##¤
+##¥
+##§
+##«
+##±
+##³
+##µ
+##·
+##¹
+##º
+##»
+##¼
+##ß
+##æ
+##÷
+##ø
+##đ
+##ŋ
+##ɔ
+##ə
+##ɡ
+##ʰ
+##ˇ
+##ˈ
+##ˊ
+##ˋ
+##ˍ
+##ː
+##˙
+##˚
+##ˢ
+##α
+##β
+##γ
+##δ
+##ε
+##η
+##θ
+##ι
+##κ
+##λ
+##μ
+##ν
+##ο
+##π
+##ρ
+##ς
+##σ
+##τ
+##υ
+##φ
+##χ
+##ψ
+##б
+##в
+##г
+##д
+##е
+##ж
+##з
+##к
+##л
+##м
+##н
+##о
+##п
+##р
+##с
+##т
+##у
+##ф
+##х
+##ц
+##ч
+##ш
+##ы
+##ь
+##і
+##ا
+##ب
+##ة
+##ت
+##د
+##ر
+##س
+##ع
+##ل
+##م
+##ن
+##ه
+##و
+##ي
+##۩
+##ก
+##ง
+##น
+##ม
+##ย
+##ร
+##อ
+##า
+##เ
+##๑
+##་
+##ღ
+##ᄀ
+##ᄁ
+##ᄂ
+##ᄃ
+##ᄅ
+##ᄆ
+##ᄇ
+##ᄈ
+##ᄉ
+##ᄋ
+##ᄌ
+##ᄎ
+##ᄏ
+##ᄐ
+##ᄑ
+##ᄒ
+##ᅢ
+##ᅣ
+##ᅥ
+##ᅦ
+##ᅧ
+##ᅨ
+##ᅪ
+##ᅬ
+##ᅭ
+##ᅮ
+##ᅯ
+##ᅲ
+##ᅳ
+##ᅴ
+##ᆷ
+##ᆸ
+##ᆺ
+##ᆻ
+##ᗜ
+##ᵃ
+##ᵉ
+##ᵍ
+##ᵏ
+##ᵐ
+##ᵒ
+##ᵘ
+##‖
+##„
+##†
+##•
+##‥
+##‧
+## 
+##‰
+##′
+##″
+##‹
+##›
+##※
+##‿
+##⁄
+##ⁱ
+##⁺
+##ⁿ
+##₁
+##₃
+##₄
+##€
+##№
+##ⅰ
+##ⅱ
+##ⅲ
+##ⅳ
+##ⅴ
+##↔
+##↗
+##↘
+##⇒
+##∀
+##−
+##∕
+##∙
+##√
+##∞
+##∟
+##∠
+##∣
+##∩
+##∮
+##∶
+##∼
+##∽
+##≈
+##≒
+##≡
+##≤
+##≥
+##≦
+##≧
+##≪
+##≫
+##⊙
+##⋅
+##⋈
+##⋯
+##⌒
+##①
+##②
+##③
+##④
+##⑤
+##⑥
+##⑦
+##⑧
+##⑨
+##⑩
+##⑴
+##⑵
+##⑶
+##⑷
+##⑸
+##⒈
+##⒉
+##⒊
+##⒋
+##ⓒ
+##ⓔ
+##ⓘ
+##━
+##┃
+##┆
+##┊
+##┌
+##└
+##├
+##┣
+##═
+##║
+##╚
+##╞
+##╠
+##╭
+##╮
+##╯
+##╰
+##╱
+##╳
+##▂
+##▃
+##▅
+##▇
+##▉
+##▋
+##▌
+##▍
+##▎
+##□
+##▪
+##▫
+##▬
+##△
+##▶
+##►
+##▽
+##◇
+##◕
+##◠
+##◢
+##◤
+##☀
+##☕
+##☞
+##☺
+##☼
+##♀
+##♂
+##♠
+##♡
+##♣
+##♦
+##♫
+##♬
+##✈
+##✔
+##✕
+##✖
+##✦
+##✨
+##✪
+##✰
+##✿
+##❀
+##➜
+##➤
+##⦿
+##、
+##。
+##〃
+##々
+##〇
+##〈
+##〉
+##《
+##》
+##「
+##」
+##『
+##』
+##【
+##】
+##〓
+##〔
+##〕
+##〖
+##〗
+##〜
+##〝
+##〞
+##ぃ
+##ぇ
+##ぬ
+##ふ
+##ほ
+##む
+##ゃ
+##ゅ
+##ゆ
+##ょ
+##゜
+##ゝ
+##ァ
+##ゥ
+##エ
+##ォ
+##ケ
+##サ
+##セ
+##ソ
+##ッ
+##ニ
+##ヌ
+##ネ
+##ノ
+##ヘ
+##モ
+##ャ
+##ヤ
+##ュ
+##ユ
+##ョ
+##ヨ
+##ワ
+##ヲ
+##・
+##ヽ
+##ㄅ
+##ㄆ
+##ㄇ
+##ㄉ
+##ㄋ
+##ㄌ
+##ㄍ
+##ㄎ
+##ㄏ
+##ㄒ
+##ㄚ
+##ㄛ
+##ㄞ
+##ㄟ
+##ㄢ
+##ㄤ
+##ㄥ
+##ㄧ
+##ㄨ
+##ㆍ
+##㈦
+##㊣
+##㗎
+##一
+##丁
+##七
+##万
+##丈
+##三
+##上
+##下
+##不
+##与
+##丐
+##丑
+##专
+##且
+##丕
+##世
+##丘
+##丙
+##业
+##丛
+##东
+##丝
+##丞
+##丟
+##両
+##丢
+##两
+##严
+##並
+##丧
+##丨
+##个
+##丫
+##中
+##丰
+##串
+##临
+##丶
+##丸
+##丹
+##为
+##主
+##丼
+##丽
+##举
+##丿
+##乂
+##乃
+##久
+##么
+##义
+##之
+##乌
+##乍
+##乎
+##乏
+##乐
+##乒
+##乓
+##乔
+##乖
+##乗
+##乘
+##乙
+##乜
+##九
+##乞
+##也
+##习
+##乡
+##书
+##乩
+##买
+##乱
+##乳
+##乾
+##亀
+##亂
+##了
+##予
+##争
+##事
+##二
+##于
+##亏
+##云
+##互
+##五
+##井
+##亘
+##亙
+##亚
+##些
+##亜
+##亞
+##亟
+##亡
+##亢
+##交
+##亥
+##亦
+##产
+##亨
+##亩
+##享
+##京
+##亭
+##亮
+##亲
+##亳
+##亵
+##人
+##亿
+##什
+##仁
+##仃
+##仄
+##仅
+##仆
+##仇
+##今
+##介
+##仍
+##从
+##仏
+##仑
+##仓
+##仔
+##仕
+##他
+##仗
+##付
+##仙
+##仝
+##仞
+##仟
+##代
+##令
+##以
+##仨
+##仪
+##们
+##仮
+##仰
+##仲
+##件
+##价
+##任
+##份
+##仿
+##企
+##伉
+##伊
+##伍
+##伎
+##伏
+##伐
+##休
+##伕
+##众
+##优
+##伙
+##会
+##伝
+##伞
+##伟
+##传
+##伢
+##伤
+##伦
+##伪
+##伫
+##伯
+##估
+##伴
+##伶
+##伸
+##伺
+##似
+##伽
+##佃
+##但
+##佇
+##佈
+##位
+##低
+##住
+##佐
+##佑
+##体
+##佔
+##何
+##佗
+##佘
+##余
+##佚
+##佛
+##作
+##佝
+##佞
+##佟
+##你
+##佢
+##佣
+##佤
+##佥
+##佩
+##佬
+##佯
+##佰
+##佳
+##併
+##佶
+##佻
+##佼
+##使
+##侃
+##侄
+##來
+##侈
+##例
+##侍
+##侏
+##侑
+##侖
+##侗
+##供
+##依
+##侠
+##価
+##侣
+##侥
+##侦
+##侧
+##侨
+##侬
+##侮
+##侯
+##侵
+##侶
+##侷
+##便
+##係
+##促
+##俄
+##俊
+##俎
+##俏
+##俐
+##俑
+##俗
+##俘
+##俚
+##保
+##俞
+##俟
+##俠
+##信
+##俨
+##俩
+##俪
+##俬
+##俭
+##修
+##俯
+##俱
+##俳
+##俸
+##俺
+##俾
+##倆
+##倉
+##個
+##倌
+##倍
+##倏
+##們
+##倒
+##倔
+##倖
+##倘
+##候
+##倚
+##倜
+##借
+##倡
+##値
+##倦
+##倩
+##倪
+##倫
+##倬
+##倭
+##倶
+##债
+##值
+##倾
+##偃
+##假
+##偈
+##偉
+##偌
+##偎
+##偏
+##偕
+##做
+##停
+##健
+##側
+##偵
+##偶
+##偷
+##偻
+##偽
+##偿
+##傀
+##傅
+##傍
+##傑
+##傘
+##備
+##傚
+##傢
+##傣
+##傥
+##储
+##傩
+##催
+##傭
+##傲
+##傳
+##債
+##傷
+##傻
+##傾
+##僅
+##働
+##像
+##僑
+##僕
+##僖
+##僚
+##僥
+##僧
+##僭
+##僮
+##僱
+##僵
+##價
+##僻
+##儀
+##儂
+##億
+##儆
+##儉
+##儋
+##儒
+##儕
+##儘
+##償
+##儡
+##優
+##儲
+##儷
+##儼
+##儿
+##兀
+##允
+##元
+##兄
+##充
+##兆
+##兇
+##先
+##光
+##克
+##兌
+##免
+##児
+##兑
+##兒
+##兔
+##兖
+##党
+##兜
+##兢
+##入
+##內
+##全
+##兩
+##八
+##公
+##六
+##兮
+##兰
+##共
+##兲
+##关
+##兴
+##兵
+##其
+##具
+##典
+##兹
+##养
+##兼
+##兽
+##冀
+##内
+##円
+##冇
+##冈
+##冉
+##冊
+##册
+##再
+##冏
+##冒
+##冕
+##冗
+##写
+##军
+##农
+##冠
+##冢
+##冤
+##冥
+##冨
+##冪
+##冬
+##冯
+##冰
+##冲
+##决
+##况
+##冶
+##冷
+##冻
+##冼
+##冽
+##冾
+##净
+##凄
+##准
+##凇
+##凈
+##凉
+##凋
+##凌
+##凍
+##减
+##凑
+##凛
+##凜
+##凝
+##几
+##凡
+##凤
+##処
+##凪
+##凭
+##凯
+##凰
+##凱
+##凳
+##凶
+##凸
+##凹
+##出
+##击
+##函
+##凿
+##刀
+##刁
+##刃
+##分
+##切
+##刈
+##刊
+##刍
+##刎
+##刑
+##划
+##列
+##刘
+##则
+##刚
+##创
+##初
+##删
+##判
+##別
+##刨
+##利
+##刪
+##别
+##刮
+##到
+##制
+##刷
+##券
+##刹
+##刺
+##刻
+##刽
+##剁
+##剂
+##剃
+##則
+##剉
+##削
+##剋
+##剌
+##前
+##剎
+##剐
+##剑
+##剔
+##剖
+##剛
+##剜
+##剝
+##剣
+##剤
+##剥
+##剧
+##剩
+##剪
+##副
+##割
+##創
+##剷
+##剽
+##剿
+##劃
+##劇
+##劈
+##劉
+##劊
+##劍
+##劏
+##劑
+##力
+##劝
+##办
+##功
+##加
+##务
+##劣
+##动
+##助
+##努
+##劫
+##劭
+##励
+##劲
+##劳
+##労
+##劵
+##効
+##劾
+##势
+##勁
+##勃
+##勇
+##勉
+##勋
+##勐
+##勒
+##動
+##勖
+##勘
+##務
+##勛
+##勝
+##勞
+##募
+##勢
+##勤
+##勧
+##勳
+##勵
+##勸
+##勺
+##勻
+##勾
+##勿
+##匀
+##包
+##匆
+##匈
+##匍
+##匐
+##匕
+##化
+##北
+##匙
+##匝
+##匠
+##匡
+##匣
+##匪
+##匮
+##匯
+##匱
+##匹
+##区
+##医
+##匾
+##匿
+##區
+##十
+##千
+##卅
+##升
+##午
+##卉
+##半
+##卍
+##华
+##协
+##卑
+##卒
+##卓
+##協
+##单
+##卖
+##南
+##単
+##博
+##卜
+##卞
+##卟
+##占
+##卡
+##卢
+##卤
+##卦
+##卧
+##卫
+##卮
+##卯
+##印
+##危
+##即
+##却
+##卵
+##卷
+##卸
+##卻
+##卿
+##厂
+##厄
+##厅
+##历
+##厉
+##压
+##厌
+##厕
+##厘
+##厚
+##厝
+##原
+##厢
+##厥
+##厦
+##厨
+##厩
+##厭
+##厮
+##厲
+##厳
+##去
+##县
+##叁
+##参
+##參
+##又
+##叉
+##及
+##友
+##双
+##反
+##収
+##发
+##叔
+##取
+##受
+##变
+##叙
+##叛
+##叟
+##叠
+##叡
+##叢
+##口
+##古
+##句
+##另
+##叨
+##叩
+##只
+##叫
+##召
+##叭
+##叮
+##可
+##台
+##叱
+##史
+##右
+##叵
+##叶
+##号
+##司
+##叹
+##叻
+##叼
+##叽
+##吁
+##吃
+##各
+##吆
+##合
+##吉
+##吊
+##吋
+##同
+##名
+##后
+##吏
+##吐
+##向
+##吒
+##吓
+##吕
+##吖
+##吗
+##君
+##吝
+##吞
+##吟
+##吠
+##吡
+##否
+##吧
+##吨
+##吩
+##含
+##听
+##吭
+##吮
+##启
+##吱
+##吳
+##吴
+##吵
+##吶
+##吸
+##吹
+##吻
+##吼
+##吽
+##吾
+##呀
+##呂
+##呃
+##呆
+##呈
+##告
+##呋
+##呎
+##呐
+##呓
+##呕
+##呗
+##员
+##呛
+##呜
+##呢
+##呤
+##呦
+##周
+##呱
+##呲
+##味
+##呵
+##呷
+##呸
+##呻
+##呼
+##命
+##咀
+##咁
+##咂
+##咄
+##咆
+##咋
+##和
+##咎
+##咏
+##咐
+##咒
+##咔
+##咕
+##咖
+##咗
+##咘
+##咙
+##咚
+##咛
+##咣
+##咤
+##咦
+##咧
+##咨
+##咩
+##咪
+##咫
+##咬
+##咭
+##咯
+##咱
+##咲
+##咳
+##咸
+##咻
+##咽
+##咿
+##哀
+##品
+##哂
+##哄
+##哆
+##哇
+##哈
+##哉
+##哋
+##哌
+##响
+##哎
+##哏
+##哐
+##哑
+##哒
+##哔
+##哗
+##哟
+##員
+##哥
+##哦
+##哧
+##哨
+##哩
+##哪
+##哭
+##哮
+##哲
+##哺
+##哼
+##哽
+##唁
+##唄
+##唆
+##唇
+##唉
+##唏
+##唐
+##唑
+##唔
+##唠
+##唤
+##唧
+##唬
+##售
+##唯
+##唰
+##唱
+##唳
+##唷
+##唸
+##唾
+##啃
+##啄
+##商
+##啉
+##啊
+##問
+##啓
+##啕
+##啖
+##啜
+##啞
+##啟
+##啡
+##啤
+##啥
+##啦
+##啧
+##啪
+##啫
+##啬
+##啮
+##啰
+##啱
+##啲
+##啵
+##啶
+##啷
+##啸
+##啻
+##啼
+##啾
+##喀
+##喂
+##喃
+##善
+##喆
+##喇
+##喉
+##喊
+##喋
+##喎
+##喏
+##喔
+##喘
+##喙
+##喚
+##喜
+##喝
+##喟
+##喧
+##喪
+##喫
+##喬
+##單
+##喰
+##喱
+##喲
+##喳
+##喵
+##営
+##喷
+##喹
+##喺
+##喻
+##喽
+##嗅
+##嗆
+##嗇
+##嗎
+##嗑
+##嗒
+##嗓
+##嗔
+##嗖
+##嗚
+##嗜
+##嗝
+##嗟
+##嗡
+##嗣
+##嗤
+##嗦
+##嗨
+##嗪
+##嗬
+##嗯
+##嗰
+##嗲
+##嗳
+##嗶
+##嗷
+##嗽
+##嘀
+##嘅
+##嘆
+##嘈
+##嘉
+##嘌
+##嘍
+##嘎
+##嘔
+##嘖
+##嘗
+##嘘
+##嘚
+##嘛
+##嘜
+##嘞
+##嘟
+##嘢
+##嘣
+##嘤
+##嘧
+##嘩
+##嘭
+##嘮
+##嘯
+##嘰
+##嘱
+##嘲
+##嘴
+##嘶
+##嘸
+##嘹
+##嘻
+##嘿
+##噁
+##噌
+##噎
+##噓
+##噔
+##噗
+##噙
+##噜
+##噠
+##噢
+##噤
+##器
+##噩
+##噪
+##噬
+##噱
+##噴
+##噶
+##噸
+##噹
+##噻
+##噼
+##嚀
+##嚇
+##嚎
+##嚏
+##嚐
+##嚓
+##嚕
+##嚟
+##嚣
+##嚥
+##嚨
+##嚮
+##嚴
+##嚷
+##嚼
+##囂
+##囉
+##囊
+##囍
+##囑
+##囔
+##囗
+##囚
+##四
+##囝
+##回
+##囟
+##因
+##囡
+##团
+##団
+##囤
+##囧
+##囪
+##囫
+##园
+##困
+##囱
+##囲
+##図
+##围
+##囹
+##固
+##国
+##图
+##囿
+##圃
+##圄
+##圆
+##圈
+##國
+##圍
+##圏
+##園
+##圓
+##圖
+##團
+##圜
+##土
+##圣
+##圧
+##在
+##圩
+##圭
+##地
+##圳
+##场
+##圻
+##圾
+##址
+##坂
+##均
+##坊
+##坍
+##坎
+##坏
+##坐
+##坑
+##块
+##坚
+##坛
+##坝
+##坞
+##坟
+##坠
+##坡
+##坤
+##坦
+##坨
+##坪
+##坯
+##坳
+##坵
+##坷
+##垂
+##垃
+##垄
+##型
+##垒
+##垚
+##垛
+##垠
+##垢
+##垣
+##垦
+##垩
+##垫
+##垭
+##垮
+##垵
+##埂
+##埃
+##埋
+##城
+##埔
+##埕
+##埗
+##域
+##埠
+##埤
+##埵
+##執
+##埸
+##培
+##基
+##埼
+##堀
+##堂
+##堃
+##堅
+##堆
+##堇
+##堑
+##堕
+##堙
+##堡
+##堤
+##堪
+##堯
+##堰
+##報
+##場
+##堵
+##堺
+##堿
+##塊
+##塌
+##塑
+##塔
+##塗
+##塘
+##塚
+##塞
+##塢
+##塩
+##填
+##塬
+##塭
+##塵
+##塾
+##墀
+##境
+##墅
+##墉
+##墊
+##墒
+##墓
+##増
+##墘
+##墙
+##墜
+##增
+##墟
+##墨
+##墩
+##墮
+##墳
+##墻
+##墾
+##壁
+##壅
+##壆
+##壇
+##壊
+##壑
+##壓
+##壕
+##壘
+##壞
+##壟
+##壢
+##壤
+##壩
+##士
+##壬
+##壮
+##壯
+##声
+##売
+##壳
+##壶
+##壹
+##壺
+##壽
+##处
+##备
+##変
+##复
+##夏
+##夔
+##夕
+##外
+##夙
+##多
+##夜
+##够
+##夠
+##夢
+##夥
+##大
+##天
+##太
+##夫
+##夭
+##央
+##夯
+##失
+##头
+##夷
+##夸
+##夹
+##夺
+##夾
+##奂
+##奄
+##奇
+##奈
+##奉
+##奋
+##奎
+##奏
+##奐
+##契
+##奔
+##奕
+##奖
+##套
+##奘
+##奚
+##奠
+##奢
+##奥
+##奧
+##奪
+##奬
+##奮
+##女
+##奴
+##奶
+##奸
+##她
+##好
+##如
+##妃
+##妄
+##妆
+##妇
+##妈
+##妊
+##妍
+##妒
+##妓
+##妖
+##妘
+##妙
+##妝
+##妞
+##妣
+##妤
+##妥
+##妨
+##妩
+##妪
+##妮
+##妲
+##妳
+##妹
+##妻
+##妾
+##姆
+##姉
+##姊
+##始
+##姍
+##姐
+##姑
+##姒
+##姓
+##委
+##姗
+##姚
+##姜
+##姝
+##姣
+##姥
+##姦
+##姨
+##姪
+##姫
+##姬
+##姹
+##姻
+##姿
+##威
+##娃
+##娄
+##娅
+##娆
+##娇
+##娉
+##娑
+##娓
+##娘
+##娛
+##娜
+##娟
+##娠
+##娣
+##娥
+##娩
+##娱
+##娲
+##娴
+##娶
+##娼
+##婀
+##婁
+##婆
+##婉
+##婊
+##婕
+##婚
+##婢
+##婦
+##婧
+##婪
+##婭
+##婴
+##婵
+##婶
+##婷
+##婺
+##婿
+##媒
+##媚
+##媛
+##媞
+##媧
+##媲
+##媳
+##媽
+##媾
+##嫁
+##嫂
+##嫉
+##嫌
+##嫑
+##嫔
+##嫖
+##嫘
+##嫚
+##嫡
+##嫣
+##嫦
+##嫩
+##嫲
+##嫵
+##嫻
+##嬅
+##嬉
+##嬌
+##嬗
+##嬛
+##嬢
+##嬤
+##嬪
+##嬰
+##嬴
+##嬷
+##嬸
+##嬿
+##孀
+##孃
+##子
+##孑
+##孔
+##孕
+##孖
+##字
+##存
+##孙
+##孚
+##孛
+##孜
+##孝
+##孟
+##孢
+##季
+##孤
+##学
+##孩
+##孪
+##孫
+##孬
+##孰
+##孱
+##孳
+##孵
+##學
+##孺
+##孽
+##孿
+##宁
+##它
+##宅
+##宇
+##守
+##安
+##宋
+##完
+##宏
+##宓
+##宕
+##宗
+##官
+##宙
+##定
+##宛
+##宜
+##宝
+##实
+##実
+##宠
+##审
+##客
+##宣
+##室
+##宥
+##宦
+##宪
+##宫
+##宮
+##宰
+##害
+##宴
+##宵
+##家
+##宸
+##容
+##宽
+##宾
+##宿
+##寂
+##寄
+##寅
+##密
+##寇
+##富
+##寐
+##寒
+##寓
+##寛
+##寝
+##寞
+##察
+##寡
+##寢
+##寥
+##實
+##寧
+##寨
+##審
+##寫
+##寬
+##寮
+##寰
+##寵
+##寶
+##寸
+##对
+##寺
+##寻
+##导
+##対
+##寿
+##封
+##専
+##射
+##将
+##將
+##專
+##尉
+##尊
+##尋
+##對
+##導
+##小
+##少
+##尔
+##尕
+##尖
+##尘
+##尚
+##尝
+##尤
+##尧
+##尬
+##就
+##尴
+##尷
+##尸
+##尹
+##尺
+##尻
+##尼
+##尽
+##尾
+##尿
+##局
+##屁
+##层
+##屄
+##居
+##屆
+##屈
+##屉
+##届
+##屋
+##屌
+##屍
+##屎
+##屏
+##屐
+##屑
+##展
+##屜
+##属
+##屠
+##屡
+##屢
+##層
+##履
+##屬
+##屯
+##山
+##屹
+##屿
+##岀
+##岁
+##岂
+##岌
+##岐
+##岑
+##岔
+##岖
+##岗
+##岘
+##岙
+##岚
+##岛
+##岡
+##岩
+##岫
+##岬
+##岭
+##岱
+##岳
+##岷
+##岸
+##峇
+##峋
+##峒
+##峙
+##峡
+##峤
+##峥
+##峦
+##峨
+##峪
+##峭
+##峯
+##峰
+##峴
+##島
+##峻
+##峽
+##崁
+##崂
+##崆
+##崇
+##崎
+##崑
+##崔
+##崖
+##崗
+##崙
+##崛
+##崧
+##崩
+##崭
+##崴
+##崽
+##嵇
+##嵊
+##嵋
+##嵌
+##嵐
+##嵘
+##嵩
+##嵬
+##嵯
+##嶂
+##嶄
+##嶇
+##嶋
+##嶙
+##嶺
+##嶼
+##嶽
+##巅
+##巍
+##巒
+##巔
+##巖
+##川
+##州
+##巡
+##巢
+##工
+##左
+##巧
+##巨
+##巩
+##巫
+##差
+##己
+##已
+##巳
+##巴
+##巷
+##巻
+##巽
+##巾
+##巿
+##币
+##市
+##布
+##帅
+##帆
+##师
+##希
+##帐
+##帑
+##帕
+##帖
+##帘
+##帚
+##帛
+##帜
+##帝
+##帥
+##带
+##帧
+##師
+##席
+##帮
+##帯
+##帰
+##帳
+##帶
+##帷
+##常
+##帼
+##帽
+##幀
+##幂
+##幄
+##幅
+##幌
+##幔
+##幕
+##幟
+##幡
+##幢
+##幣
+##幫
+##干
+##平
+##年
+##并
+##幸
+##幹
+##幺
+##幻
+##幼
+##幽
+##幾
+##广
+##庁
+##広
+##庄
+##庆
+##庇
+##床
+##序
+##庐
+##库
+##应
+##底
+##庖
+##店
+##庙
+##庚
+##府
+##庞
+##废
+##庠
+##度
+##座
+##庫
+##庭
+##庵
+##庶
+##康
+##庸
+##庹
+##庾
+##廁
+##廂
+##廃
+##廈
+##廉
+##廊
+##廓
+##廖
+##廚
+##廝
+##廟
+##廠
+##廢
+##廣
+##廬
+##廳
+##延
+##廷
+##建
+##廿
+##开
+##弁
+##异
+##弃
+##弄
+##弈
+##弊
+##弋
+##式
+##弑
+##弒
+##弓
+##弔
+##引
+##弗
+##弘
+##弛
+##弟
+##张
+##弥
+##弦
+##弧
+##弩
+##弭
+##弯
+##弱
+##張
+##強
+##弹
+##强
+##弼
+##弾
+##彅
+##彆
+##彈
+##彌
+##彎
+##归
+##当
+##录
+##彗
+##彙
+##彝
+##形
+##彤
+##彥
+##彦
+##彧
+##彩
+##彪
+##彫
+##彬
+##彭
+##彰
+##影
+##彷
+##役
+##彻
+##彼
+##彿
+##往
+##征
+##径
+##待
+##徇
+##很
+##徉
+##徊
+##律
+##後
+##徐
+##徑
+##徒
+##従
+##徕
+##得
+##徘
+##徙
+##徜
+##從
+##徠
+##御
+##徨
+##復
+##循
+##徬
+##微
+##徳
+##徴
+##徵
+##德
+##徹
+##徼
+##徽
+##心
+##必
+##忆
+##忌
+##忍
+##忏
+##忐
+##忑
+##忒
+##忖
+##志
+##忘
+##忙
+##応
+##忠
+##忡
+##忤
+##忧
+##忪
+##快
+##忱
+##念
+##忻
+##忽
+##忿
+##怀
+##态
+##怂
+##怅
+##怆
+##怎
+##怏
+##怒
+##怔
+##怕
+##怖
+##怙
+##怜
+##思
+##怠
+##怡
+##急
+##怦
+##性
+##怨
+##怪
+##怯
+##怵
+##总
+##怼
+##恁
+##恃
+##恆
+##恋
+##恍
+##恐
+##恒
+##恕
+##恙
+##恚
+##恢
+##恣
+##恤
+##恥
+##恨
+##恩
+##恪
+##恫
+##恬
+##恭
+##息
+##恰
+##恳
+##恵
+##恶
+##恸
+##恺
+##恻
+##恼
+##恿
+##悄
+##悅
+##悉
+##悌
+##悍
+##悔
+##悖
+##悚
+##悟
+##悠
+##患
+##悦
+##您
+##悩
+##悪
+##悬
+##悯
+##悱
+##悲
+##悴
+##悵
+##悶
+##悸
+##悻
+##悼
+##悽
+##情
+##惆
+##惇
+##惊
+##惋
+##惑
+##惕
+##惘
+##惚
+##惜
+##惟
+##惠
+##惡
+##惦
+##惧
+##惨
+##惩
+##惫
+##惬
+##惭
+##惮
+##惯
+##惰
+##惱
+##想
+##惴
+##惶
+##惹
+##惺
+##愁
+##愆
+##愈
+##愉
+##愍
+##意
+##愕
+##愚
+##愛
+##愜
+##感
+##愣
+##愤
+##愧
+##愫
+##愷
+##愿
+##慄
+##慈
+##態
+##慌
+##慎
+##慑
+##慕
+##慘
+##慚
+##慟
+##慢
+##慣
+##慧
+##慨
+##慫
+##慮
+##慰
+##慳
+##慵
+##慶
+##慷
+##慾
+##憂
+##憊
+##憋
+##憎
+##憐
+##憑
+##憔
+##憚
+##憤
+##憧
+##憨
+##憩
+##憫
+##憬
+##憲
+##憶
+##憾
+##懂
+##懇
+##懈
+##應
+##懊
+##懋
+##懑
+##懒
+##懦
+##懲
+##懵
+##懶
+##懷
+##懸
+##懺
+##懼
+##懾
+##懿
+##戀
+##戈
+##戊
+##戌
+##戍
+##戎
+##戏
+##成
+##我
+##戒
+##戕
+##或
+##战
+##戚
+##戛
+##戟
+##戡
+##戦
+##截
+##戬
+##戮
+##戰
+##戲
+##戳
+##戴
+##戶
+##户
+##戸
+##戻
+##戾
+##房
+##所
+##扁
+##扇
+##扈
+##扉
+##手
+##才
+##扎
+##扑
+##扒
+##打
+##扔
+##払
+##托
+##扛
+##扣
+##扦
+##执
+##扩
+##扪
+##扫
+##扬
+##扭
+##扮
+##扯
+##扰
+##扱
+##扳
+##扶
+##批
+##扼
+##找
+##承
+##技
+##抄
+##抉
+##把
+##抑
+##抒
+##抓
+##投
+##抖
+##抗
+##折
+##抚
+##抛
+##抜
+##択
+##抟
+##抠
+##抡
+##抢
+##护
+##报
+##抨
+##披
+##抬
+##抱
+##抵
+##抹
+##押
+##抽
+##抿
+##拂
+##拄
+##担
+##拆
+##拇
+##拈
+##拉
+##拋
+##拌
+##拍
+##拎
+##拐
+##拒
+##拓
+##拔
+##拖
+##拗
+##拘
+##拙
+##拚
+##招
+##拜
+##拟
+##拡
+##拢
+##拣
+##拥
+##拦
+##拧
+##拨
+##择
+##括
+##拭
+##拮
+##拯
+##拱
+##拳
+##拴
+##拷
+##拼
+##拽
+##拾
+##拿
+##持
+##挂
+##指
+##挈
+##按
+##挎
+##挑
+##挖
+##挙
+##挚
+##挛
+##挝
+##挞
+##挟
+##挠
+##挡
+##挣
+##挤
+##挥
+##挨
+##挪
+##挫
+##振
+##挲
+##挹
+##挺
+##挽
+##挾
+##捂
+##捅
+##捆
+##捉
+##捋
+##捌
+##捍
+##捎
+##捏
+##捐
+##捕
+##捞
+##损
+##捡
+##换
+##捣
+##捧
+##捨
+##捩
+##据
+##捱
+##捲
+##捶
+##捷
+##捺
+##捻
+##掀
+##掂
+##掃
+##掇
+##授
+##掉
+##掌
+##掏
+##掐
+##排
+##掖
+##掘
+##掙
+##掛
+##掠
+##採
+##探
+##掣
+##接
+##控
+##推
+##掩
+##措
+##掬
+##掰
+##掲
+##掳
+##掴
+##掷
+##掸
+##掺
+##揀
+##揃
+##揄
+##揆
+##揉
+##揍
+##描
+##提
+##插
+##揖
+##揚
+##換
+##握
+##揣
+##揩
+##揪
+##揭
+##揮
+##援
+##揶
+##揸
+##揹
+##揽
+##搀
+##搁
+##搂
+##搅
+##損
+##搏
+##搐
+##搓
+##搔
+##搖
+##搗
+##搜
+##搞
+##搡
+##搪
+##搬
+##搭
+##搵
+##搶
+##携
+##搽
+##摀
+##摁
+##摄
+##摆
+##摇
+##摈
+##摊
+##摒
+##摔
+##摘
+##摞
+##摟
+##摧
+##摩
+##摯
+##摳
+##摸
+##摹
+##摺
+##摻
+##撂
+##撃
+##撅
+##撇
+##撈
+##撐
+##撑
+##撒
+##撓
+##撕
+##撚
+##撞
+##撤
+##撥
+##撩
+##撫
+##撬
+##播
+##撮
+##撰
+##撲
+##撵
+##撷
+##撸
+##撻
+##撼
+##撿
+##擀
+##擁
+##擂
+##擄
+##擅
+##擇
+##擊
+##擋
+##操
+##擎
+##擒
+##擔
+##擘
+##據
+##擞
+##擠
+##擡
+##擢
+##擦
+##擬
+##擰
+##擱
+##擲
+##擴
+##擷
+##擺
+##擼
+##擾
+##攀
+##攏
+##攒
+##攔
+##攘
+##攙
+##攜
+##攝
+##攞
+##攢
+##攣
+##攤
+##攥
+##攪
+##攫
+##攬
+##支
+##收
+##攸
+##改
+##攻
+##放
+##政
+##故
+##效
+##敌
+##敍
+##敎
+##敏
+##救
+##敕
+##敖
+##敗
+##敘
+##教
+##敛
+##敝
+##敞
+##敢
+##散
+##敦
+##敬
+##数
+##敲
+##整
+##敵
+##敷
+##數
+##斂
+##斃
+##文
+##斋
+##斌
+##斎
+##斐
+##斑
+##斓
+##斗
+##料
+##斛
+##斜
+##斟
+##斡
+##斤
+##斥
+##斧
+##斩
+##斫
+##斬
+##断
+##斯
+##新
+##斷
+##方
+##於
+##施
+##旁
+##旃
+##旅
+##旋
+##旌
+##旎
+##族
+##旖
+##旗
+##无
+##既
+##日
+##旦
+##旧
+##旨
+##早
+##旬
+##旭
+##旮
+##旱
+##时
+##旷
+##旺
+##旻
+##昀
+##昂
+##昆
+##昇
+##昉
+##昊
+##昌
+##明
+##昏
+##易
+##昔
+##昕
+##昙
+##星
+##映
+##春
+##昧
+##昨
+##昭
+##是
+##昱
+##昴
+##昵
+##昶
+##昼
+##显
+##晁
+##時
+##晃
+##晉
+##晋
+##晌
+##晏
+##晒
+##晓
+##晔
+##晕
+##晖
+##晗
+##晚
+##晝
+##晞
+##晟
+##晤
+##晦
+##晨
+##晩
+##普
+##景
+##晰
+##晴
+##晶
+##晷
+##智
+##晾
+##暂
+##暄
+##暇
+##暈
+##暉
+##暌
+##暐
+##暑
+##暖
+##暗
+##暝
+##暢
+##暧
+##暨
+##暫
+##暮
+##暱
+##暴
+##暸
+##暹
+##曄
+##曆
+##曇
+##曉
+##曖
+##曙
+##曜
+##曝
+##曠
+##曦
+##曬
+##曰
+##曲
+##曳
+##更
+##書
+##曹
+##曼
+##曾
+##替
+##最
+##會
+##月
+##有
+##朋
+##服
+##朐
+##朔
+##朕
+##朗
+##望
+##朝
+##期
+##朦
+##朧
+##木
+##未
+##末
+##本
+##札
+##朮
+##术
+##朱
+##朴
+##朵
+##机
+##朽
+##杀
+##杂
+##权
+##杆
+##杈
+##杉
+##李
+##杏
+##材
+##村
+##杓
+##杖
+##杜
+##杞
+##束
+##杠
+##条
+##来
+##杨
+##杭
+##杯
+##杰
+##東
+##杳
+##杵
+##杷
+##杼
+##松
+##板
+##极
+##构
+##枇
+##枉
+##枋
+##析
+##枕
+##林
+##枚
+##果
+##枝
+##枢
+##枣
+##枪
+##枫
+##枭
+##枯
+##枰
+##枱
+##枳
+##架
+##枷
+##枸
+##柄
+##柏
+##某
+##柑
+##柒
+##染
+##柔
+##柘
+##柚
+##柜
+##柞
+##柠
+##柢
+##查
+##柩
+##柬
+##柯
+##柱
+##柳
+##柴
+##柵
+##査
+##柿
+##栀
+##栃
+##栄
+##栅
+##标
+##栈
+##栉
+##栋
+##栎
+##栏
+##树
+##栓
+##栖
+##栗
+##校
+##栩
+##株
+##样
+##核
+##根
+##格
+##栽
+##栾
+##桀
+##桁
+##桂
+##桃
+##桅
+##框
+##案
+##桉
+##桌
+##桎
+##桐
+##桑
+##桓
+##桔
+##桜
+##桠
+##桡
+##桢
+##档
+##桥
+##桦
+##桧
+##桨
+##桩
+##桶
+##桿
+##梁
+##梅
+##梆
+##梏
+##梓
+##梗
+##條
+##梟
+##梢
+##梦
+##梧
+##梨
+##梭
+##梯
+##械
+##梳
+##梵
+##梶
+##检
+##棂
+##棄
+##棉
+##棋
+##棍
+##棒
+##棕
+##棗
+##棘
+##棚
+##棟
+##棠
+##棣
+##棧
+##森
+##棱
+##棲
+##棵
+##棹
+##棺
+##椁
+##椅
+##椋
+##植
+##椎
+##椒
+##検
+##椪
+##椭
+##椰
+##椹
+##椽
+##椿
+##楂
+##楊
+##楓
+##楔
+##楚
+##楝
+##楞
+##楠
+##楣
+##楨
+##楫
+##業
+##楮
+##極
+##楷
+##楸
+##楹
+##楼
+##楽
+##概
+##榄
+##榆
+##榈
+##榉
+##榔
+##榕
+##榖
+##榛
+##榜
+##榨
+##榫
+##榭
+##榮
+##榱
+##榴
+##榷
+##榻
+##槁
+##槃
+##構
+##槌
+##槍
+##槎
+##槐
+##槓
+##様
+##槛
+##槟
+##槤
+##槭
+##槲
+##槳
+##槻
+##槽
+##槿
+##樁
+##樂
+##樊
+##樑
+##樓
+##標
+##樞
+##樟
+##模
+##樣
+##権
+##横
+##樫
+##樯
+##樱
+##樵
+##樸
+##樹
+##樺
+##樽
+##樾
+##橄
+##橇
+##橋
+##橐
+##橘
+##橙
+##機
+##橡
+##橢
+##橫
+##橱
+##橹
+##橼
+##檀
+##檄
+##檎
+##檐
+##檔
+##檗
+##檜
+##檢
+##檬
+##檯
+##檳
+##檸
+##檻
+##櫃
+##櫚
+##櫛
+##櫥
+##櫸
+##櫻
+##欄
+##權
+##欒
+##欖
+##欠
+##次
+##欢
+##欣
+##欧
+##欲
+##欸
+##欺
+##欽
+##款
+##歆
+##歇
+##歉
+##歌
+##歎
+##歐
+##歓
+##歙
+##歛
+##歡
+##止
+##正
+##此
+##步
+##武
+##歧
+##歩
+##歪
+##歯
+##歲
+##歳
+##歴
+##歷
+##歸
+##歹
+##死
+##歼
+##殁
+##殃
+##殆
+##殇
+##殉
+##殊
+##残
+##殒
+##殓
+##殖
+##殘
+##殞
+##殡
+##殤
+##殭
+##殯
+##殲
+##殴
+##段
+##殷
+##殺
+##殼
+##殿
+##毀
+##毁
+##毂
+##毅
+##毆
+##毋
+##母
+##毎
+##每
+##毒
+##毓
+##比
+##毕
+##毗
+##毘
+##毙
+##毛
+##毡
+##毫
+##毯
+##毽
+##氈
+##氏
+##氐
+##民
+##氓
+##气
+##氖
+##気
+##氙
+##氛
+##氟
+##氡
+##氢
+##氣
+##氤
+##氦
+##氧
+##氨
+##氪
+##氫
+##氮
+##氯
+##氰
+##氲
+##水
+##氷
+##永
+##氹
+##氾
+##汀
+##汁
+##求
+##汆
+##汇
+##汉
+##汎
+##汐
+##汕
+##汗
+##汙
+##汛
+##汝
+##汞
+##江
+##池
+##污
+##汤
+##汨
+##汩
+##汪
+##汰
+##汲
+##汴
+##汶
+##汹
+##決
+##汽
+##汾
+##沁
+##沂
+##沃
+##沅
+##沈
+##沉
+##沌
+##沏
+##沐
+##沒
+##沓
+##沖
+##沙
+##沛
+##沟
+##没
+##沢
+##沣
+##沥
+##沦
+##沧
+##沪
+##沫
+##沭
+##沮
+##沱
+##河
+##沸
+##油
+##治
+##沼
+##沽
+##沾
+##沿
+##況
+##泄
+##泉
+##泊
+##泌
+##泓
+##法
+##泗
+##泛
+##泞
+##泠
+##泡
+##波
+##泣
+##泥
+##注
+##泪
+##泫
+##泮
+##泯
+##泰
+##泱
+##泳
+##泵
+##泷
+##泸
+##泻
+##泼
+##泽
+##泾
+##洁
+##洄
+##洋
+##洒
+##洗
+##洙
+##洛
+##洞
+##津
+##洩
+##洪
+##洮
+##洱
+##洲
+##洵
+##洶
+##洸
+##洹
+##活
+##洼
+##洽
+##派
+##流
+##浃
+##浄
+##浅
+##浆
+##浇
+##浊
+##测
+##济
+##浏
+##浑
+##浒
+##浓
+##浔
+##浙
+##浚
+##浜
+##浣
+##浦
+##浩
+##浪
+##浬
+##浮
+##浯
+##浴
+##海
+##浸
+##涂
+##涅
+##涇
+##消
+##涉
+##涌
+##涎
+##涓
+##涔
+##涕
+##涙
+##涛
+##涝
+##涞
+##涟
+##涠
+##涡
+##涣
+##涤
+##润
+##涧
+##涨
+##涩
+##涪
+##涮
+##涯
+##液
+##涵
+##涸
+##涼
+##涿
+##淀
+##淄
+##淅
+##淆
+##淇
+##淋
+##淌
+##淑
+##淒
+##淖
+##淘
+##淙
+##淚
+##淞
+##淡
+##淤
+##淦
+##淨
+##淩
+##淪
+##淫
+##淬
+##淮
+##深
+##淳
+##淵
+##混
+##淹
+##淺
+##添
+##淼
+##清
+##済
+##渉
+##渊
+##渋
+##渍
+##渎
+##渐
+##渔
+##渗
+##渙
+##渚
+##減
+##渝
+##渠
+##渡
+##渣
+##渤
+##渥
+##渦
+##温
+##測
+##渭
+##港
+##渲
+##渴
+##游
+##渺
+##渾
+##湃
+##湄
+##湊
+##湍
+##湖
+##湘
+##湛
+##湟
+##湧
+##湫
+##湮
+##湯
+##湳
+##湾
+##湿
+##満
+##溃
+##溅
+##溉
+##溏
+##源
+##準
+##溜
+##溝
+##溟
+##溢
+##溥
+##溧
+##溪
+##溫
+##溯
+##溱
+##溴
+##溶
+##溺
+##溼
+##滁
+##滂
+##滄
+##滅
+##滇
+##滋
+##滌
+##滑
+##滓
+##滔
+##滕
+##滙
+##滚
+##滝
+##滞
+##滟
+##满
+##滢
+##滤
+##滥
+##滦
+##滨
+##滩
+##滬
+##滯
+##滲
+##滴
+##滷
+##滸
+##滾
+##滿
+##漁
+##漂
+##漆
+##漉
+##漏
+##漓
+##演
+##漕
+##漠
+##漢
+##漣
+##漩
+##漪
+##漫
+##漬
+##漯
+##漱
+##漲
+##漳
+##漸
+##漾
+##漿
+##潆
+##潇
+##潋
+##潍
+##潑
+##潔
+##潘
+##潛
+##潜
+##潞
+##潟
+##潢
+##潤
+##潦
+##潧
+##潭
+##潮
+##潰
+##潴
+##潸
+##潺
+##潼
+##澀
+##澄
+##澆
+##澈
+##澍
+##澎
+##澗
+##澜
+##澡
+##澤
+##澧
+##澱
+##澳
+##澹
+##激
+##濁
+##濂
+##濃
+##濑
+##濒
+##濕
+##濘
+##濛
+##濟
+##濠
+##濡
+##濤
+##濫
+##濬
+##濮
+##濯
+##濱
+##濺
+##濾
+##瀅
+##瀆
+##瀉
+##瀋
+##瀏
+##瀑
+##瀕
+##瀘
+##瀚
+##瀛
+##瀝
+##瀞
+##瀟
+##瀧
+##瀨
+##瀬
+##瀰
+##瀾
+##灌
+##灏
+##灑
+##灘
+##灝
+##灞
+##灣
+##火
+##灬
+##灭
+##灯
+##灰
+##灵
+##灶
+##灸
+##灼
+##災
+##灾
+##灿
+##炀
+##炁
+##炅
+##炉
+##炊
+##炎
+##炒
+##炔
+##炕
+##炖
+##炙
+##炜
+##炫
+##炬
+##炭
+##炮
+##炯
+##炳
+##炷
+##炸
+##点
+##為
+##炼
+##炽
+##烁
+##烂
+##烃
+##烈
+##烊
+##烏
+##烘
+##烙
+##烛
+##烟
+##烤
+##烦
+##烧
+##烨
+##烩
+##烫
+##烬
+##热
+##烯
+##烷
+##烹
+##烽
+##焉
+##焊
+##焕
+##焖
+##焗
+##焘
+##焙
+##焚
+##焜
+##無
+##焦
+##焯
+##焰
+##焱
+##然
+##焼
+##煅
+##煉
+##煊
+##煌
+##煎
+##煒
+##煖
+##煙
+##煜
+##煞
+##煤
+##煥
+##煦
+##照
+##煨
+##煩
+##煮
+##煲
+##煸
+##煽
+##熄
+##熊
+##熏
+##熒
+##熔
+##熙
+##熟
+##熠
+##熨
+##熬
+##熱
+##熵
+##熹
+##熾
+##燁
+##燃
+##燄
+##燈
+##燉
+##燊
+##燎
+##燒
+##燔
+##燕
+##燙
+##燜
+##營
+##燥
+##燦
+##燧
+##燭
+##燮
+##燴
+##燻
+##燼
+##燿
+##爆
+##爍
+##爐
+##爛
+##爪
+##爬
+##爭
+##爰
+##爱
+##爲
+##爵
+##父
+##爷
+##爸
+##爹
+##爺
+##爻
+##爽
+##爾
+##牆
+##片
+##版
+##牌
+##牍
+##牒
+##牙
+##牛
+##牝
+##牟
+##牠
+##牡
+##牢
+##牦
+##牧
+##物
+##牯
+##牲
+##牴
+##牵
+##特
+##牺
+##牽
+##犀
+##犁
+##犄
+##犊
+##犍
+##犒
+##犢
+##犧
+##犬
+##犯
+##状
+##犷
+##犸
+##犹
+##狀
+##狂
+##狄
+##狈
+##狎
+##狐
+##狒
+##狗
+##狙
+##狞
+##狠
+##狡
+##狩
+##独
+##狭
+##狮
+##狰
+##狱
+##狸
+##狹
+##狼
+##狽
+##猎
+##猕
+##猖
+##猗
+##猙
+##猛
+##猜
+##猝
+##猥
+##猩
+##猪
+##猫
+##猬
+##献
+##猴
+##猶
+##猷
+##猾
+##猿
+##獄
+##獅
+##獎
+##獐
+##獒
+##獗
+##獠
+##獣
+##獨
+##獭
+##獰
+##獲
+##獵
+##獷
+##獸
+##獺
+##獻
+##獼
+##獾
+##玄
+##率
+##玉
+##王
+##玑
+##玖
+##玛
+##玟
+##玠
+##玥
+##玩
+##玫
+##玮
+##环
+##现
+##玲
+##玳
+##玷
+##玺
+##玻
+##珀
+##珂
+##珅
+##珈
+##珉
+##珊
+##珍
+##珏
+##珐
+##珑
+##珙
+##珞
+##珠
+##珣
+##珥
+##珩
+##珪
+##班
+##珮
+##珲
+##珺
+##現
+##球
+##琅
+##理
+##琇
+##琉
+##琊
+##琍
+##琏
+##琐
+##琛
+##琢
+##琥
+##琦
+##琨
+##琪
+##琬
+##琮
+##琰
+##琲
+##琳
+##琴
+##琵
+##琶
+##琺
+##琼
+##瑀
+##瑁
+##瑄
+##瑋
+##瑕
+##瑗
+##瑙
+##瑚
+##瑛
+##瑜
+##瑞
+##瑟
+##瑠
+##瑣
+##瑤
+##瑩
+##瑪
+##瑯
+##瑰
+##瑶
+##瑾
+##璀
+##璁
+##璃
+##璇
+##璉
+##璋
+##璎
+##璐
+##璜
+##璞
+##璟
+##璧
+##璨
+##環
+##璽
+##璿
+##瓊
+##瓏
+##瓒
+##瓜
+##瓢
+##瓣
+##瓤
+##瓦
+##瓮
+##瓯
+##瓴
+##瓶
+##瓷
+##甄
+##甌
+##甕
+##甘
+##甙
+##甚
+##甜
+##生
+##產
+##産
+##甥
+##甦
+##用
+##甩
+##甫
+##甬
+##甭
+##甯
+##田
+##由
+##甲
+##申
+##电
+##男
+##甸
+##町
+##画
+##甾
+##畀
+##畅
+##界
+##畏
+##畑
+##畔
+##留
+##畜
+##畝
+##畢
+##略
+##畦
+##番
+##畫
+##異
+##畲
+##畳
+##畴
+##當
+##畸
+##畹
+##畿
+##疆
+##疇
+##疊
+##疏
+##疑
+##疔
+##疖
+##疗
+##疙
+##疚
+##疝
+##疟
+##疡
+##疣
+##疤
+##疥
+##疫
+##疮
+##疯
+##疱
+##疲
+##疳
+##疵
+##疸
+##疹
+##疼
+##疽
+##疾
+##痂
+##病
+##症
+##痈
+##痉
+##痊
+##痍
+##痒
+##痔
+##痕
+##痘
+##痙
+##痛
+##痞
+##痠
+##痢
+##痣
+##痤
+##痧
+##痨
+##痪
+##痫
+##痰
+##痱
+##痴
+##痹
+##痺
+##痼
+##痿
+##瘀
+##瘁
+##瘋
+##瘍
+##瘓
+##瘘
+##瘙
+##瘟
+##瘠
+##瘡
+##瘢
+##瘤
+##瘦
+##瘧
+##瘩
+##瘪
+##瘫
+##瘴
+##瘸
+##瘾
+##療
+##癇
+##癌
+##癒
+##癖
+##癜
+##癞
+##癡
+##癢
+##癣
+##癥
+##癫
+##癬
+##癮
+##癱
+##癲
+##癸
+##発
+##登
+##發
+##白
+##百
+##皂
+##的
+##皆
+##皇
+##皈
+##皋
+##皎
+##皑
+##皓
+##皖
+##皙
+##皚
+##皮
+##皰
+##皱
+##皴
+##皺
+##皿
+##盂
+##盃
+##盅
+##盆
+##盈
+##益
+##盎
+##盏
+##盐
+##监
+##盒
+##盔
+##盖
+##盗
+##盘
+##盛
+##盜
+##盞
+##盟
+##盡
+##監
+##盤
+##盥
+##盧
+##盪
+##目
+##盯
+##盱
+##盲
+##直
+##相
+##盹
+##盼
+##盾
+##省
+##眈
+##眉
+##看
+##県
+##眙
+##眞
+##真
+##眠
+##眦
+##眨
+##眩
+##眯
+##眶
+##眷
+##眸
+##眺
+##眼
+##眾
+##着
+##睁
+##睇
+##睏
+##睐
+##睑
+##睛
+##睜
+##睞
+##睡
+##睢
+##督
+##睥
+##睦
+##睨
+##睪
+##睫
+##睬
+##睹
+##睽
+##睾
+##睿
+##瞄
+##瞅
+##瞇
+##瞋
+##瞌
+##瞎
+##瞑
+##瞒
+##瞓
+##瞞
+##瞟
+##瞠
+##瞥
+##瞧
+##瞩
+##瞪
+##瞬
+##瞭
+##瞰
+##瞳
+##瞻
+##瞼
+##瞿
+##矇
+##矍
+##矗
+##矚
+##矛
+##矜
+##矢
+##矣
+##知
+##矩
+##矫
+##短
+##矮
+##矯
+##石
+##矶
+##矽
+##矾
+##矿
+##码
+##砂
+##砌
+##砍
+##砒
+##研
+##砖
+##砗
+##砚
+##砝
+##砣
+##砥
+##砧
+##砭
+##砰
+##砲
+##破
+##砷
+##砸
+##砺
+##砼
+##砾
+##础
+##硅
+##硐
+##硒
+##硕
+##硝
+##硫
+##硬
+##确
+##硯
+##硼
+##碁
+##碇
+##碉
+##碌
+##碍
+##碎
+##碑
+##碓
+##碗
+##碘
+##碚
+##碛
+##碟
+##碣
+##碧
+##碩
+##碰
+##碱
+##碳
+##碴
+##確
+##碼
+##碾
+##磁
+##磅
+##磊
+##磋
+##磐
+##磕
+##磚
+##磡
+##磨
+##磬
+##磯
+##磲
+##磷
+##磺
+##礁
+##礎
+##礙
+##礡
+##礦
+##礪
+##礫
+##礴
+##示
+##礼
+##社
+##祀
+##祁
+##祂
+##祇
+##祈
+##祉
+##祎
+##祐
+##祕
+##祖
+##祗
+##祚
+##祛
+##祜
+##祝
+##神
+##祟
+##祠
+##祢
+##祥
+##票
+##祭
+##祯
+##祷
+##祸
+##祺
+##祿
+##禀
+##禁
+##禄
+##禅
+##禍
+##禎
+##福
+##禛
+##禦
+##禧
+##禪
+##禮
+##禱
+##禹
+##禺
+##离
+##禽
+##禾
+##禿
+##秀
+##私
+##秃
+##秆
+##秉
+##秋
+##种
+##科
+##秒
+##秘
+##租
+##秣
+##秤
+##秦
+##秧
+##秩
+##秭
+##积
+##称
+##秸
+##移
+##秽
+##稀
+##稅
+##程
+##稍
+##税
+##稔
+##稗
+##稚
+##稜
+##稞
+##稟
+##稠
+##稣
+##種
+##稱
+##稲
+##稳
+##稷
+##稹
+##稻
+##稼
+##稽
+##稿
+##穀
+##穂
+##穆
+##穌
+##積
+##穎
+##穗
+##穢
+##穩
+##穫
+##穴
+##究
+##穷
+##穹
+##空
+##穿
+##突
+##窃
+##窄
+##窈
+##窍
+##窑
+##窒
+##窓
+##窕
+##窖
+##窗
+##窘
+##窜
+##窝
+##窟
+##窠
+##窥
+##窦
+##窨
+##窩
+##窪
+##窮
+##窯
+##窺
+##窿
+##竄
+##竅
+##竇
+##竊
+##立
+##竖
+##站
+##竜
+##竞
+##竟
+##章
+##竣
+##童
+##竭
+##端
+##競
+##竹
+##竺
+##竽
+##竿
+##笃
+##笆
+##笈
+##笋
+##笏
+##笑
+##笔
+##笙
+##笛
+##笞
+##笠
+##符
+##笨
+##第
+##笹
+##笺
+##笼
+##筆
+##等
+##筊
+##筋
+##筍
+##筏
+##筐
+##筑
+##筒
+##答
+##策
+##筛
+##筝
+##筠
+##筱
+##筲
+##筵
+##筷
+##筹
+##签
+##简
+##箇
+##箋
+##箍
+##箏
+##箐
+##箔
+##箕
+##算
+##箝
+##管
+##箩
+##箫
+##箭
+##箱
+##箴
+##箸
+##節
+##篁
+##範
+##篆
+##篇
+##築
+##篑
+##篓
+##篙
+##篝
+##篠
+##篡
+##篤
+##篩
+##篪
+##篮
+##篱
+##篷
+##簇
+##簌
+##簍
+##簡
+##簦
+##簧
+##簪
+##簫
+##簷
+##簸
+##簽
+##簾
+##簿
+##籁
+##籃
+##籌
+##籍
+##籐
+##籟
+##籠
+##籤
+##籬
+##籮
+##籲
+##米
+##类
+##籼
+##籽
+##粄
+##粉
+##粑
+##粒
+##粕
+##粗
+##粘
+##粟
+##粤
+##粥
+##粧
+##粪
+##粮
+##粱
+##粲
+##粳
+##粵
+##粹
+##粼
+##粽
+##精
+##粿
+##糅
+##糊
+##糍
+##糕
+##糖
+##糗
+##糙
+##糜
+##糞
+##糟
+##糠
+##糧
+##糬
+##糯
+##糰
+##糸
+##系
+##糾
+##紀
+##紂
+##約
+##紅
+##紉
+##紊
+##紋
+##納
+##紐
+##紓
+##純
+##紗
+##紘
+##紙
+##級
+##紛
+##紜
+##素
+##紡
+##索
+##紧
+##紫
+##紮
+##累
+##細
+##紳
+##紹
+##紺
+##終
+##絃
+##組
+##絆
+##経
+##結
+##絕
+##絞
+##絡
+##絢
+##給
+##絨
+##絮
+##統
+##絲
+##絳
+##絵
+##絶
+##絹
+##綁
+##綏
+##綑
+##經
+##継
+##続
+##綜
+##綠
+##綢
+##綦
+##綫
+##綬
+##維
+##綱
+##網
+##綴
+##綵
+##綸
+##綺
+##綻
+##綽
+##綾
+##綿
+##緊
+##緋
+##総
+##緑
+##緒
+##緘
+##線
+##緝
+##緞
+##締
+##緣
+##編
+##緩
+##緬
+##緯
+##練
+##緹
+##緻
+##縁
+##縄
+##縈
+##縛
+##縝
+##縣
+##縫
+##縮
+##縱
+##縴
+##縷
+##總
+##績
+##繁
+##繃
+##繆
+##繇
+##繋
+##織
+##繕
+##繚
+##繞
+##繡
+##繩
+##繪
+##繫
+##繭
+##繳
+##繹
+##繼
+##繽
+##纂
+##續
+##纍
+##纏
+##纓
+##纔
+##纖
+##纜
+##纠
+##红
+##纣
+##纤
+##约
+##级
+##纨
+##纪
+##纫
+##纬
+##纭
+##纯
+##纰
+##纱
+##纲
+##纳
+##纵
+##纶
+##纷
+##纸
+##纹
+##纺
+##纽
+##纾
+##线
+##绀
+##练
+##组
+##绅
+##细
+##织
+##终
+##绊
+##绍
+##绎
+##经
+##绑
+##绒
+##结
+##绔
+##绕
+##绘
+##给
+##绚
+##绛
+##络
+##绝
+##绞
+##统
+##绡
+##绢
+##绣
+##绥
+##绦
+##继
+##绩
+##绪
+##绫
+##续
+##绮
+##绯
+##绰
+##绳
+##维
+##绵
+##绶
+##绷
+##绸
+##绻
+##综
+##绽
+##绾
+##绿
+##缀
+##缄
+##缅
+##缆
+##缇
+##缈
+##缉
+##缎
+##缓
+##缔
+##缕
+##编
+##缘
+##缙
+##缚
+##缜
+##缝
+##缠
+##缢
+##缤
+##缥
+##缨
+##缩
+##缪
+##缭
+##缮
+##缰
+##缱
+##缴
+##缸
+##缺
+##缽
+##罂
+##罄
+##罌
+##罐
+##网
+##罔
+##罕
+##罗
+##罚
+##罡
+##罢
+##罩
+##罪
+##置
+##罰
+##署
+##罵
+##罷
+##罹
+##羁
+##羅
+##羈
+##羊
+##羌
+##美
+##羔
+##羚
+##羞
+##羟
+##羡
+##羣
+##群
+##羥
+##羧
+##羨
+##義
+##羯
+##羲
+##羸
+##羹
+##羽
+##羿
+##翁
+##翅
+##翊
+##翌
+##翎
+##習
+##翔
+##翘
+##翟
+##翠
+##翡
+##翦
+##翩
+##翰
+##翱
+##翳
+##翹
+##翻
+##翼
+##耀
+##老
+##考
+##耄
+##者
+##耆
+##耋
+##而
+##耍
+##耐
+##耒
+##耕
+##耗
+##耘
+##耙
+##耦
+##耨
+##耳
+##耶
+##耷
+##耸
+##耻
+##耽
+##耿
+##聂
+##聆
+##聊
+##聋
+##职
+##聒
+##联
+##聖
+##聘
+##聚
+##聞
+##聪
+##聯
+##聰
+##聲
+##聳
+##聴
+##聶
+##職
+##聽
+##聾
+##聿
+##肃
+##肄
+##肅
+##肆
+##肇
+##肉
+##肋
+##肌
+##肏
+##肓
+##肖
+##肘
+##肚
+##肛
+##肝
+##肠
+##股
+##肢
+##肤
+##肥
+##肩
+##肪
+##肮
+##肯
+##肱
+##育
+##肴
+##肺
+##肽
+##肾
+##肿
+##胀
+##胁
+##胃
+##胄
+##胆
+##背
+##胍
+##胎
+##胖
+##胚
+##胛
+##胜
+##胝
+##胞
+##胡
+##胤
+##胥
+##胧
+##胫
+##胭
+##胯
+##胰
+##胱
+##胳
+##胴
+##胶
+##胸
+##胺
+##能
+##脂
+##脅
+##脆
+##脇
+##脈
+##脉
+##脊
+##脍
+##脏
+##脐
+##脑
+##脓
+##脖
+##脘
+##脚
+##脛
+##脣
+##脩
+##脫
+##脯
+##脱
+##脲
+##脳
+##脸
+##脹
+##脾
+##腆
+##腈
+##腊
+##腋
+##腌
+##腎
+##腐
+##腑
+##腓
+##腔
+##腕
+##腥
+##腦
+##腩
+##腫
+##腭
+##腮
+##腰
+##腱
+##腳
+##腴
+##腸
+##腹
+##腺
+##腻
+##腼
+##腾
+##腿
+##膀
+##膈
+##膊
+##膏
+##膑
+##膘
+##膚
+##膛
+##膜
+##膝
+##膠
+##膦
+##膨
+##膩
+##膳
+##膺
+##膻
+##膽
+##膾
+##膿
+##臀
+##臂
+##臃
+##臆
+##臉
+##臊
+##臍
+##臓
+##臘
+##臟
+##臣
+##臥
+##臧
+##臨
+##自
+##臬
+##臭
+##至
+##致
+##臺
+##臻
+##臼
+##臾
+##舀
+##舂
+##舅
+##舆
+##與
+##興
+##舉
+##舊
+##舌
+##舍
+##舎
+##舐
+##舒
+##舔
+##舖
+##舗
+##舛
+##舜
+##舞
+##舟
+##航
+##舫
+##般
+##舰
+##舱
+##舵
+##舶
+##舷
+##舸
+##船
+##舺
+##舾
+##艇
+##艋
+##艘
+##艙
+##艦
+##艮
+##良
+##艰
+##艱
+##色
+##艳
+##艷
+##艹
+##艺
+##艾
+##节
+##芃
+##芈
+##芊
+##芋
+##芍
+##芎
+##芒
+##芙
+##芜
+##芝
+##芡
+##芥
+##芦
+##芩
+##芪
+##芫
+##芬
+##芭
+##芮
+##芯
+##花
+##芳
+##芷
+##芸
+##芹
+##芻
+##芽
+##芾
+##苁
+##苄
+##苇
+##苋
+##苍
+##苏
+##苑
+##苒
+##苓
+##苔
+##苕
+##苗
+##苛
+##苜
+##苞
+##苟
+##苡
+##苣
+##若
+##苦
+##苫
+##苯
+##英
+##苷
+##苹
+##苻
+##茁
+##茂
+##范
+##茄
+##茅
+##茉
+##茎
+##茏
+##茗
+##茜
+##茧
+##茨
+##茫
+##茬
+##茭
+##茯
+##茱
+##茲
+##茴
+##茵
+##茶
+##茸
+##茹
+##茼
+##荀
+##荃
+##荆
+##草
+##荊
+##荏
+##荐
+##荒
+##荔
+##荖
+##荘
+##荚
+##荞
+##荟
+##荠
+##荡
+##荣
+##荤
+##荥
+##荧
+##荨
+##荪
+##荫
+##药
+##荳
+##荷
+##荸
+##荻
+##荼
+##荽
+##莅
+##莆
+##莉
+##莊
+##莎
+##莒
+##莓
+##莖
+##莘
+##莞
+##莠
+##莢
+##莧
+##莪
+##莫
+##莱
+##莲
+##莴
+##获
+##莹
+##莺
+##莽
+##莿
+##菀
+##菁
+##菅
+##菇
+##菈
+##菊
+##菌
+##菏
+##菓
+##菖
+##菘
+##菜
+##菟
+##菠
+##菡
+##菩
+##華
+##菱
+##菲
+##菸
+##菽
+##萁
+##萃
+##萄
+##萊
+##萋
+##萌
+##萍
+##萎
+##萘
+##萝
+##萤
+##营
+##萦
+##萧
+##萨
+##萩
+##萬
+##萱
+##萵
+##萸
+##萼
+##落
+##葆
+##葉
+##著
+##葚
+##葛
+##葡
+##董
+##葦
+##葩
+##葫
+##葬
+##葭
+##葯
+##葱
+##葳
+##葵
+##葷
+##葺
+##蒂
+##蒋
+##蒐
+##蒔
+##蒙
+##蒜
+##蒞
+##蒟
+##蒡
+##蒨
+##蒲
+##蒸
+##蒹
+##蒻
+##蒼
+##蒿
+##蓁
+##蓄
+##蓆
+##蓉
+##蓋
+##蓑
+##蓓
+##蓖
+##蓝
+##蓟
+##蓦
+##蓬
+##蓮
+##蓼
+##蓿
+##蔑
+##蔓
+##蔔
+##蔗
+##蔘
+##蔚
+##蔡
+##蔣
+##蔥
+##蔫
+##蔬
+##蔭
+##蔵
+##蔷
+##蔺
+##蔻
+##蔼
+##蔽
+##蕁
+##蕃
+##蕈
+##蕉
+##蕊
+##蕎
+##蕙
+##蕤
+##蕨
+##蕩
+##蕪
+##蕭
+##蕲
+##蕴
+##蕻
+##蕾
+##薄
+##薅
+##薇
+##薈
+##薊
+##薏
+##薑
+##薔
+##薙
+##薛
+##薦
+##薨
+##薩
+##薪
+##薬
+##薯
+##薰
+##薹
+##藉
+##藍
+##藏
+##藐
+##藓
+##藕
+##藜
+##藝
+##藤
+##藥
+##藩
+##藹
+##藻
+##藿
+##蘆
+##蘇
+##蘊
+##蘋
+##蘑
+##蘚
+##蘭
+##蘸
+##蘼
+##蘿
+##虎
+##虏
+##虐
+##虑
+##虔
+##處
+##虚
+##虛
+##虜
+##虞
+##號
+##虢
+##虧
+##虫
+##虬
+##虱
+##虹
+##虻
+##虽
+##虾
+##蚀
+##蚁
+##蚂
+##蚊
+##蚌
+##蚓
+##蚕
+##蚜
+##蚝
+##蚣
+##蚤
+##蚩
+##蚪
+##蚯
+##蚱
+##蚵
+##蛀
+##蛆
+##蛇
+##蛊
+##蛋
+##蛎
+##蛐
+##蛔
+##蛙
+##蛛
+##蛟
+##蛤
+##蛭
+##蛮
+##蛰
+##蛳
+##蛹
+##蛻
+##蛾
+##蜀
+##蜂
+##蜃
+##蜆
+##蜇
+##蜈
+##蜊
+##蜍
+##蜒
+##蜓
+##蜕
+##蜗
+##蜘
+##蜚
+##蜜
+##蜡
+##蜢
+##蜥
+##蜱
+##蜴
+##蜷
+##蜻
+##蜿
+##蝇
+##蝈
+##蝉
+##蝌
+##蝎
+##蝕
+##蝗
+##蝙
+##蝟
+##蝠
+##蝦
+##蝨
+##蝴
+##蝶
+##蝸
+##蝼
+##螂
+##螃
+##融
+##螞
+##螢
+##螨
+##螯
+##螳
+##螺
+##蟀
+##蟄
+##蟆
+##蟋
+##蟎
+##蟑
+##蟒
+##蟠
+##蟬
+##蟲
+##蟹
+##蟻
+##蟾
+##蠅
+##蠍
+##蠔
+##蠕
+##蠛
+##蠟
+##蠡
+##蠢
+##蠣
+##蠱
+##蠶
+##蠹
+##蠻
+##血
+##衄
+##衅
+##衆
+##行
+##衍
+##術
+##衔
+##街
+##衙
+##衛
+##衝
+##衞
+##衡
+##衢
+##衣
+##补
+##表
+##衩
+##衫
+##衬
+##衮
+##衰
+##衲
+##衷
+##衹
+##衾
+##衿
+##袁
+##袂
+##袄
+##袅
+##袈
+##袋
+##袍
+##袒
+##袖
+##袜
+##袞
+##袤
+##袪
+##被
+##袭
+##袱
+##裁
+##裂
+##装
+##裆
+##裊
+##裏
+##裔
+##裕
+##裘
+##裙
+##補
+##裝
+##裟
+##裡
+##裤
+##裨
+##裱
+##裳
+##裴
+##裸
+##裹
+##製
+##裾
+##褂
+##複
+##褐
+##褒
+##褓
+##褔
+##褚
+##褥
+##褪
+##褫
+##褲
+##褶
+##褻
+##襁
+##襄
+##襟
+##襠
+##襪
+##襬
+##襯
+##襲
+##西
+##要
+##覃
+##覆
+##覇
+##見
+##規
+##覓
+##視
+##覚
+##覦
+##覧
+##親
+##覬
+##観
+##覷
+##覺
+##覽
+##觀
+##见
+##观
+##规
+##觅
+##视
+##览
+##觉
+##觊
+##觎
+##觐
+##觑
+##角
+##觞
+##解
+##觥
+##触
+##觸
+##言
+##訂
+##計
+##訊
+##討
+##訓
+##訕
+##訖
+##託
+##記
+##訛
+##訝
+##訟
+##訣
+##訥
+##訪
+##設
+##許
+##訳
+##訴
+##訶
+##診
+##註
+##証
+##詆
+##詐
+##詔
+##評
+##詛
+##詞
+##詠
+##詡
+##詢
+##詣
+##試
+##詩
+##詫
+##詬
+##詭
+##詮
+##詰
+##話
+##該
+##詳
+##詹
+##詼
+##誅
+##誇
+##誉
+##誌
+##認
+##誓
+##誕
+##誘
+##語
+##誠
+##誡
+##誣
+##誤
+##誥
+##誦
+##誨
+##說
+##説
+##読
+##誰
+##課
+##誹
+##誼
+##調
+##諄
+##談
+##請
+##諏
+##諒
+##論
+##諗
+##諜
+##諡
+##諦
+##諧
+##諫
+##諭
+##諮
+##諱
+##諳
+##諷
+##諸
+##諺
+##諾
+##謀
+##謁
+##謂
+##謄
+##謊
+##謎
+##謐
+##謔
+##謗
+##謙
+##講
+##謝
+##謠
+##謨
+##謬
+##謹
+##謾
+##譁
+##證
+##譎
+##譏
+##識
+##譙
+##譚
+##譜
+##警
+##譬
+##譯
+##議
+##譲
+##譴
+##護
+##譽
+##讀
+##變
+##讓
+##讚
+##讞
+##计
+##订
+##认
+##讥
+##讧
+##讨
+##让
+##讪
+##讫
+##训
+##议
+##讯
+##记
+##讲
+##讳
+##讴
+##讶
+##讷
+##许
+##讹
+##论
+##讼
+##讽
+##设
+##访
+##诀
+##证
+##诃
+##评
+##诅
+##识
+##诈
+##诉
+##诊
+##诋
+##词
+##诏
+##译
+##试
+##诗
+##诘
+##诙
+##诚
+##诛
+##话
+##诞
+##诟
+##诠
+##诡
+##询
+##诣
+##诤
+##该
+##详
+##诧
+##诩
+##诫
+##诬
+##语
+##误
+##诰
+##诱
+##诲
+##说
+##诵
+##诶
+##请
+##诸
+##诺
+##读
+##诽
+##课
+##诿
+##谀
+##谁
+##调
+##谄
+##谅
+##谆
+##谈
+##谊
+##谋
+##谌
+##谍
+##谎
+##谏
+##谐
+##谑
+##谒
+##谓
+##谔
+##谕
+##谗
+##谘
+##谙
+##谚
+##谛
+##谜
+##谟
+##谢
+##谣
+##谤
+##谥
+##谦
+##谧
+##谨
+##谩
+##谪
+##谬
+##谭
+##谯
+##谱
+##谲
+##谴
+##谶
+##谷
+##豁
+##豆
+##豇
+##豈
+##豉
+##豊
+##豌
+##豎
+##豐
+##豔
+##豚
+##象
+##豢
+##豪
+##豫
+##豬
+##豹
+##豺
+##貂
+##貅
+##貌
+##貓
+##貔
+##貘
+##貝
+##貞
+##負
+##財
+##貢
+##貧
+##貨
+##販
+##貪
+##貫
+##責
+##貯
+##貰
+##貳
+##貴
+##貶
+##買
+##貸
+##費
+##貼
+##貽
+##貿
+##賀
+##賁
+##賂
+##賃
+##賄
+##資
+##賈
+##賊
+##賑
+##賓
+##賜
+##賞
+##賠
+##賡
+##賢
+##賣
+##賤
+##賦
+##質
+##賬
+##賭
+##賴
+##賺
+##購
+##賽
+##贅
+##贈
+##贊
+##贍
+##贏
+##贓
+##贖
+##贛
+##贝
+##贞
+##负
+##贡
+##财
+##责
+##贤
+##败
+##账
+##货
+##质
+##贩
+##贪
+##贫
+##贬
+##购
+##贮
+##贯
+##贰
+##贱
+##贲
+##贴
+##贵
+##贷
+##贸
+##费
+##贺
+##贻
+##贼
+##贾
+##贿
+##赁
+##赂
+##赃
+##资
+##赅
+##赈
+##赊
+##赋
+##赌
+##赎
+##赏
+##赐
+##赓
+##赔
+##赖
+##赘
+##赚
+##赛
+##赝
+##赞
+##赠
+##赡
+##赢
+##赣
+##赤
+##赦
+##赧
+##赫
+##赭
+##走
+##赳
+##赴
+##赵
+##赶
+##起
+##趁
+##超
+##越
+##趋
+##趕
+##趙
+##趟
+##趣
+##趨
+##足
+##趴
+##趵
+##趸
+##趺
+##趾
+##跃
+##跄
+##跆
+##跋
+##跌
+##跎
+##跑
+##跖
+##跚
+##跛
+##距
+##跟
+##跡
+##跤
+##跨
+##跩
+##跪
+##路
+##跳
+##践
+##跷
+##跹
+##跺
+##跻
+##踉
+##踊
+##踌
+##踏
+##踐
+##踝
+##踞
+##踟
+##踢
+##踩
+##踪
+##踮
+##踱
+##踴
+##踵
+##踹
+##蹂
+##蹄
+##蹇
+##蹈
+##蹉
+##蹊
+##蹋
+##蹑
+##蹒
+##蹙
+##蹟
+##蹣
+##蹤
+##蹦
+##蹩
+##蹬
+##蹭
+##蹲
+##蹴
+##蹶
+##蹺
+##蹼
+##蹿
+##躁
+##躇
+##躉
+##躊
+##躋
+##躍
+##躏
+##躪
+##身
+##躬
+##躯
+##躲
+##躺
+##軀
+##車
+##軋
+##軌
+##軍
+##軒
+##軟
+##転
+##軸
+##軼
+##軽
+##軾
+##較
+##載
+##輒
+##輓
+##輔
+##輕
+##輛
+##輝
+##輟
+##輩
+##輪
+##輯
+##輸
+##輻
+##輾
+##輿
+##轄
+##轅
+##轆
+##轉
+##轍
+##轎
+##轟
+##车
+##轧
+##轨
+##轩
+##转
+##轭
+##轮
+##软
+##轰
+##轲
+##轴
+##轶
+##轻
+##轼
+##载
+##轿
+##较
+##辄
+##辅
+##辆
+##辇
+##辈
+##辉
+##辊
+##辍
+##辐
+##辑
+##输
+##辕
+##辖
+##辗
+##辘
+##辙
+##辛
+##辜
+##辞
+##辟
+##辣
+##辦
+##辨
+##辩
+##辫
+##辭
+##辮
+##辯
+##辰
+##辱
+##農
+##边
+##辺
+##辻
+##込
+##辽
+##达
+##迁
+##迂
+##迄
+##迅
+##过
+##迈
+##迎
+##运
+##近
+##返
+##还
+##这
+##进
+##远
+##违
+##连
+##迟
+##迢
+##迤
+##迥
+##迦
+##迩
+##迪
+##迫
+##迭
+##述
+##迴
+##迷
+##迸
+##迹
+##迺
+##追
+##退
+##送
+##适
+##逃
+##逅
+##逆
+##选
+##逊
+##逍
+##透
+##逐
+##递
+##途
+##逕
+##逗
+##這
+##通
+##逛
+##逝
+##逞
+##速
+##造
+##逢
+##連
+##逮
+##週
+##進
+##逵
+##逶
+##逸
+##逻
+##逼
+##逾
+##遁
+##遂
+##遅
+##遇
+##遊
+##運
+##遍
+##過
+##遏
+##遐
+##遑
+##遒
+##道
+##達
+##違
+##遗
+##遙
+##遛
+##遜
+##遞
+##遠
+##遢
+##遣
+##遥
+##遨
+##適
+##遭
+##遮
+##遲
+##遴
+##遵
+##遶
+##遷
+##選
+##遺
+##遼
+##遽
+##避
+##邀
+##邁
+##邂
+##邃
+##還
+##邇
+##邈
+##邊
+##邋
+##邏
+##邑
+##邓
+##邕
+##邛
+##邝
+##邢
+##那
+##邦
+##邨
+##邪
+##邬
+##邮
+##邯
+##邰
+##邱
+##邳
+##邵
+##邸
+##邹
+##邺
+##邻
+##郁
+##郅
+##郊
+##郎
+##郑
+##郜
+##郝
+##郡
+##郢
+##郤
+##郦
+##郧
+##部
+##郫
+##郭
+##郴
+##郵
+##郷
+##郸
+##都
+##鄂
+##鄉
+##鄒
+##鄔
+##鄙
+##鄞
+##鄢
+##鄧
+##鄭
+##鄰
+##鄱
+##鄲
+##鄺
+##酉
+##酊
+##酋
+##酌
+##配
+##酐
+##酒
+##酗
+##酚
+##酝
+##酢
+##酣
+##酥
+##酩
+##酪
+##酬
+##酮
+##酯
+##酰
+##酱
+##酵
+##酶
+##酷
+##酸
+##酿
+##醃
+##醇
+##醉
+##醋
+##醍
+##醐
+##醒
+##醚
+##醛
+##醜
+##醞
+##醣
+##醪
+##醫
+##醬
+##醮
+##醯
+##醴
+##醺
+##釀
+##釁
+##采
+##釉
+##释
+##釋
+##里
+##重
+##野
+##量
+##釐
+##金
+##釗
+##釘
+##釜
+##針
+##釣
+##釦
+##釧
+##釵
+##鈀
+##鈉
+##鈍
+##鈎
+##鈔
+##鈕
+##鈞
+##鈣
+##鈦
+##鈪
+##鈴
+##鈺
+##鈾
+##鉀
+##鉄
+##鉅
+##鉉
+##鉑
+##鉗
+##鉚
+##鉛
+##鉤
+##鉴
+##鉻
+##銀
+##銃
+##銅
+##銑
+##銓
+##銖
+##銘
+##銜
+##銬
+##銭
+##銮
+##銳
+##銷
+##銹
+##鋁
+##鋅
+##鋒
+##鋤
+##鋪
+##鋰
+##鋸
+##鋼
+##錄
+##錐
+##錘
+##錚
+##錠
+##錢
+##錦
+##錨
+##錫
+##錮
+##錯
+##録
+##錳
+##錶
+##鍊
+##鍋
+##鍍
+##鍛
+##鍥
+##鍰
+##鍵
+##鍺
+##鍾
+##鎂
+##鎊
+##鎌
+##鎏
+##鎔
+##鎖
+##鎗
+##鎚
+##鎧
+##鎬
+##鎮
+##鎳
+##鏈
+##鏖
+##鏗
+##鏘
+##鏞
+##鏟
+##鏡
+##鏢
+##鏤
+##鏽
+##鐘
+##鐮
+##鐲
+##鐳
+##鐵
+##鐸
+##鐺
+##鑄
+##鑊
+##鑑
+##鑒
+##鑣
+##鑫
+##鑰
+##鑲
+##鑼
+##鑽
+##鑾
+##鑿
+##针
+##钉
+##钊
+##钎
+##钏
+##钒
+##钓
+##钗
+##钙
+##钛
+##钜
+##钝
+##钞
+##钟
+##钠
+##钡
+##钢
+##钣
+##钤
+##钥
+##钦
+##钧
+##钨
+##钩
+##钮
+##钯
+##钰
+##钱
+##钳
+##钴
+##钵
+##钺
+##钻
+##钼
+##钾
+##钿
+##铀
+##铁
+##铂
+##铃
+##铄
+##铅
+##铆
+##铉
+##铎
+##铐
+##铛
+##铜
+##铝
+##铠
+##铡
+##铢
+##铣
+##铤
+##铨
+##铩
+##铬
+##铭
+##铮
+##铰
+##铲
+##铵
+##银
+##铸
+##铺
+##链
+##铿
+##销
+##锁
+##锂
+##锄
+##锅
+##锆
+##锈
+##锉
+##锋
+##锌
+##锏
+##锐
+##锑
+##错
+##锚
+##锟
+##锡
+##锢
+##锣
+##锤
+##锥
+##锦
+##锭
+##键
+##锯
+##锰
+##锲
+##锵
+##锹
+##锺
+##锻
+##镀
+##镁
+##镂
+##镇
+##镉
+##镌
+##镍
+##镐
+##镑
+##镕
+##镖
+##镗
+##镛
+##镜
+##镣
+##镭
+##镯
+##镰
+##镳
+##镶
+##長
+##长
+##門
+##閃
+##閉
+##開
+##閎
+##閏
+##閑
+##閒
+##間
+##閔
+##閘
+##閡
+##関
+##閣
+##閥
+##閨
+##閩
+##閱
+##閲
+##閹
+##閻
+##閾
+##闆
+##闇
+##闊
+##闌
+##闍
+##闔
+##闕
+##闖
+##闘
+##關
+##闡
+##闢
+##门
+##闪
+##闫
+##闭
+##问
+##闯
+##闰
+##闲
+##间
+##闵
+##闷
+##闸
+##闹
+##闺
+##闻
+##闽
+##闾
+##阀
+##阁
+##阂
+##阅
+##阆
+##阇
+##阈
+##阉
+##阎
+##阐
+##阑
+##阔
+##阕
+##阖
+##阙
+##阚
+##阜
+##队
+##阡
+##阪
+##阮
+##阱
+##防
+##阳
+##阴
+##阵
+##阶
+##阻
+##阿
+##陀
+##陂
+##附
+##际
+##陆
+##陇
+##陈
+##陋
+##陌
+##降
+##限
+##陕
+##陛
+##陝
+##陞
+##陟
+##陡
+##院
+##陣
+##除
+##陨
+##险
+##陪
+##陰
+##陲
+##陳
+##陵
+##陶
+##陷
+##陸
+##険
+##陽
+##隅
+##隆
+##隈
+##隊
+##隋
+##隍
+##階
+##随
+##隐
+##隔
+##隕
+##隘
+##隙
+##際
+##障
+##隠
+##隣
+##隧
+##隨
+##險
+##隱
+##隴
+##隶
+##隸
+##隻
+##隼
+##隽
+##难
+##雀
+##雁
+##雄
+##雅
+##集
+##雇
+##雉
+##雋
+##雌
+##雍
+##雎
+##雏
+##雑
+##雒
+##雕
+##雖
+##雙
+##雛
+##雜
+##雞
+##離
+##難
+##雨
+##雪
+##雯
+##雰
+##雲
+##雳
+##零
+##雷
+##雹
+##電
+##雾
+##需
+##霁
+##霄
+##霆
+##震
+##霈
+##霉
+##霊
+##霍
+##霎
+##霏
+##霑
+##霓
+##霖
+##霜
+##霞
+##霧
+##霭
+##霰
+##露
+##霸
+##霹
+##霽
+##霾
+##靂
+##靄
+##靈
+##青
+##靓
+##靖
+##静
+##靚
+##靛
+##靜
+##非
+##靠
+##靡
+##面
+##靥
+##靦
+##革
+##靳
+##靴
+##靶
+##靼
+##鞅
+##鞋
+##鞍
+##鞏
+##鞑
+##鞘
+##鞠
+##鞣
+##鞦
+##鞭
+##韆
+##韋
+##韌
+##韓
+##韜
+##韦
+##韧
+##韩
+##韬
+##韭
+##音
+##韵
+##韶
+##韻
+##響
+##頁
+##頂
+##頃
+##項
+##順
+##須
+##頌
+##預
+##頑
+##頒
+##頓
+##頗
+##領
+##頜
+##頡
+##頤
+##頫
+##頭
+##頰
+##頷
+##頸
+##頹
+##頻
+##頼
+##顆
+##題
+##額
+##顎
+##顏
+##顔
+##願
+##顛
+##類
+##顧
+##顫
+##顯
+##顱
+##顴
+##页
+##顶
+##顷
+##项
+##顺
+##须
+##顼
+##顽
+##顾
+##顿
+##颁
+##颂
+##预
+##颅
+##领
+##颇
+##颈
+##颉
+##颊
+##颌
+##颍
+##颐
+##频
+##颓
+##颔
+##颖
+##颗
+##题
+##颚
+##颛
+##颜
+##额
+##颞
+##颠
+##颡
+##颢
+##颤
+##颦
+##颧
+##風
+##颯
+##颱
+##颳
+##颶
+##颼
+##飄
+##飆
+##风
+##飒
+##飓
+##飕
+##飘
+##飙
+##飚
+##飛
+##飞
+##食
+##飢
+##飨
+##飩
+##飪
+##飯
+##飲
+##飼
+##飽
+##飾
+##餃
+##餅
+##餉
+##養
+##餌
+##餐
+##餒
+##餓
+##餘
+##餚
+##餛
+##餞
+##餡
+##館
+##餮
+##餵
+##餾
+##饅
+##饈
+##饋
+##饌
+##饍
+##饑
+##饒
+##饕
+##饗
+##饞
+##饥
+##饨
+##饪
+##饬
+##饭
+##饮
+##饯
+##饰
+##饱
+##饲
+##饴
+##饵
+##饶
+##饷
+##饺
+##饼
+##饽
+##饿
+##馀
+##馁
+##馄
+##馅
+##馆
+##馈
+##馋
+##馍
+##馏
+##馒
+##馔
+##首
+##馗
+##香
+##馥
+##馨
+##馬
+##馭
+##馮
+##馳
+##馴
+##駁
+##駄
+##駅
+##駆
+##駐
+##駒
+##駕
+##駛
+##駝
+##駭
+##駱
+##駿
+##騁
+##騎
+##騏
+##験
+##騙
+##騨
+##騰
+##騷
+##驀
+##驅
+##驊
+##驍
+##驒
+##驕
+##驗
+##驚
+##驛
+##驟
+##驢
+##驥
+##马
+##驭
+##驮
+##驯
+##驰
+##驱
+##驳
+##驴
+##驶
+##驷
+##驸
+##驹
+##驻
+##驼
+##驾
+##驿
+##骁
+##骂
+##骄
+##骅
+##骆
+##骇
+##骈
+##骊
+##骋
+##验
+##骏
+##骐
+##骑
+##骗
+##骚
+##骛
+##骜
+##骞
+##骠
+##骡
+##骤
+##骥
+##骧
+##骨
+##骯
+##骰
+##骶
+##骷
+##骸
+##骼
+##髂
+##髅
+##髋
+##髏
+##髒
+##髓
+##體
+##髖
+##高
+##髦
+##髪
+##髮
+##髯
+##髻
+##鬃
+##鬆
+##鬍
+##鬓
+##鬚
+##鬟
+##鬢
+##鬣
+##鬥
+##鬧
+##鬱
+##鬼
+##魁
+##魂
+##魄
+##魅
+##魇
+##魍
+##魏
+##魔
+##魘
+##魚
+##魯
+##魷
+##鮑
+##鮨
+##鮪
+##鮭
+##鮮
+##鯉
+##鯊
+##鯖
+##鯛
+##鯨
+##鯰
+##鯽
+##鰍
+##鰓
+##鰭
+##鰲
+##鰻
+##鰾
+##鱈
+##鱉
+##鱔
+##鱗
+##鱷
+##鱸
+##鱼
+##鱿
+##鲁
+##鲈
+##鲍
+##鲑
+##鲛
+##鲜
+##鲟
+##鲢
+##鲤
+##鲨
+##鲫
+##鲱
+##鲲
+##鲶
+##鲷
+##鲸
+##鳃
+##鳄
+##鳅
+##鳌
+##鳍
+##鳕
+##鳖
+##鳗
+##鳝
+##鳞
+##鳥
+##鳩
+##鳳
+##鳴
+##鳶
+##鴉
+##鴕
+##鴛
+##鴦
+##鴨
+##鴻
+##鴿
+##鵑
+##鵜
+##鵝
+##鵡
+##鵬
+##鵰
+##鵲
+##鶘
+##鶩
+##鶯
+##鶴
+##鷗
+##鷲
+##鷹
+##鷺
+##鸚
+##鸞
+##鸟
+##鸠
+##鸡
+##鸢
+##鸣
+##鸥
+##鸦
+##鸨
+##鸪
+##鸭
+##鸯
+##鸳
+##鸵
+##鸽
+##鸾
+##鸿
+##鹂
+##鹃
+##鹄
+##鹅
+##鹈
+##鹉
+##鹊
+##鹌
+##鹏
+##鹑
+##鹕
+##鹘
+##鹜
+##鹞
+##鹤
+##鹦
+##鹧
+##鹫
+##鹭
+##鹰
+##鹳
+##鹵
+##鹹
+##鹼
+##鹽
+##鹿
+##麂
+##麋
+##麒
+##麓
+##麗
+##麝
+##麟
+##麥
+##麦
+##麩
+##麴
+##麵
+##麸
+##麺
+##麻
+##麼
+##麽
+##麾
+##黃
+##黄
+##黍
+##黎
+##黏
+##黑
+##黒
+##黔
+##默
+##黛
+##黜
+##黝
+##點
+##黠
+##黨
+##黯
+##黴
+##鼋
+##鼎
+##鼐
+##鼓
+##鼠
+##鼬
+##鼹
+##鼻
+##鼾
+##齁
+##齊
+##齋
+##齐
+##齒
+##齡
+##齢
+##齣
+##齦
+##齿
+##龄
+##龅
+##龈
+##龊
+##龋
+##龌
+##龍
+##龐
+##龔
+##龕
+##龙
+##龚
+##龛
+##龜
+##龟
+##︰
+##︱
+##︶
+##︿
+##﹁
+##﹂
+##﹍
+##﹏
+##﹐
+##﹑
+##﹒
+##﹔
+##﹕
+##﹖
+##﹗
+##﹙
+##﹚
+##﹝
+##﹞
+##﹡
+##﹣
+##！
+##＂
+##＃
+##＄
+##％
+##＆
+##＇
+##（
+##）
+##＊
+##，
+##－
+##．
+##／
+##：
+##；
+##＜
+##？
+##＠
+##［
+##＼
+##］
+##＾
+##＿
+##｀
+##ｆ
+##ｈ
+##ｊ
+##ｕ
+##ｗ
+##ｚ
+##｛
+##｝
+##｡
+##｢
+##｣
+##､
+##･
+##ｯ
+##ｰ
+##ｲ
+##ｸ
+##ｼ
+##ｽ
+##ﾄ
+##ﾉ
+##ﾌ
+##ﾗ
+##ﾙ
+##ﾝ
+##ﾞ
+##ﾟ
+##￣
+##￥
+##👍
+##🔥
+##😂
+##😎
diff --git a/example/nlp_to_mindrecord/zhwiki/README.md b/example/nlp_to_mindrecord/zhwiki/README.md
new file mode 100644
index 0000000000..d2c0fd51c0
--- /dev/null
+++ b/example/nlp_to_mindrecord/zhwiki/README.md
@@ -0,0 +1,107 @@
+# Guideline to Convert Training Data zhwiki to MindRecord For Bert Pre Training
+
+<!-- TOC -->
+
+- [What does the example do](#what-does-the-example-do)
+- [Run simple test](#run-simple-test)
+- [How to use the example to process zhwiki](#how-to-use-the-example-to-process-zhwiki)
+    - [Download zhwiki training data](#download-zhwiki-training-data)
+    - [Extract the zhwiki](#extract-the-zhwiki)
+    - [Generate MindRecord](#generate-mindrecord)
+    - [Create MindDataset By MindRecord](#create-minddataset-by-mindrecord)
+
+
+<!-- /TOC -->
+
+## What does the example do
+
+This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training data, generating MindRecord file, and finally used for Bert network training.
+
+1.  run.sh: generate MindRecord entry script.
+    - create_pretraining_data.py: the script from [google-research/bert](https://github.com/google-research/bert), we just change the part of the generated tfrecord to MindRecord.
+    - tokenization.py: the script from [google-research/bert](https://github.com/google-research/bert).
+    - vocab.txt: the file from [huawei-noah/Pretrained-Language-Model](https://github.com/huawei-noah/Pretrained-Language-Model/tree/master/NEZHA-TensorFlow/nezha).
+    - sample_text.txt: the file from [google-research/bert](https://github.com/google-research/bert).
+2.  run_read.py: create MindDataset by MindRecord entry script.
+    - create_dataset.py: use MindDataset to read MindRecord to generate dataset.
+
+## Run simple test
+
+Follow the step:
+
+```bash
+bash run.sh         # generate zhwiki.mindrecord* by sample_text.txt
+bash run_read.sh    # use MindDataset to read zhwiki.mindrecord* 
+```
+
+## How to use the example to process zhwiki
+
+Download zhwikidata, extract it, convert it to MindRecord, use MindDataset to read MindRecord.
+
+### Download zhwiki training data
+
+> [zhwiki dataset download address](https://dumps.wikimedia.org/zhwiki) **-> 20200401 -> zhwiki-20200401-pages-articles-multistream.xml.bz2**
+
+### Extract the zhwiki
+
+1. Download [wikiextractor](https://github.com/attardi/wikiextractor) script.
+
+2. Extract the zhwiki.
+    ```python
+    python WikiExtractor.py -o {output_path}/extract {input_path}/zhwiki-20200401-pages-articles-multistream.xml.bz2
+    ```
+
+3. Generate like this:
+    ```
+    $ ls {output_path}/extract
+    AA AB AC AD AE AF AG AH AI AJ AK AL AM AN
+    ```
+
+### Generate MindRecord
+
+1. Modify the parameters in run.sh: --input_file, --output_file, --partition_number.
+    ```
+    --input_file: Input raw text file (or comma-separated list of files).
+    --output_file: Output MindRecord file.
+    --partition_number: The MindRecord file will be split into the number of partition.
+    ```
+
+2. Run the run.sh script.
+    ```
+    bash run.sh
+    ```
+    > Caution: This process is slow, please wait patiently. Run it on server is recommended.
+
+3. The output like this:
+    ```
+    ...
+    [INFO] ME(23485,python):2020-04-28-17:16:40.670.744 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    [INFO] ME(23485,python):2020-04-28-17:16:40.671.227 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    [INFO] ME(23485,python):2020-04-28-17:16:40.671.660 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    [INFO] ME(23485,python):2020-04-28-17:16:40.672.037 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    [INFO] ME(23485,python):2020-04-28-17:16:40.672.453 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    [INFO] ME(23485,python):2020-04-28-17:16:40.672.833 [mindspore/ccsrc/mindrecord/io/shard_writer.cc:667] WriteRawData] Write 1 records successfully.
+    ...
+    [INFO] ME(23485:140354285963072,MainProcess):2020-04-28-17:16:40.718.039 [mindspore/mindrecord/filewriter.py:313] The list of mindrecord files created are: ['zhwiki.mindrecord0', 'zhwiki.mindrecord1', 'zhwiki.mindrecord2', 'zhwiki.mindrecord3'], and the list of index files are: ['zhwiki.mindrecord0.db', 'zhwiki.mindrecord1.db', 'zhwiki.mindrecord2.db', 'zhwiki.mindrecord3.db']
+    ...
+    ```
+
+### Create MindDataset By MindRecord
+
+1. Run the run_read.sh script.
+    ```bash
+    bash run_read.sh
+    ```
+
+2. The output like this:
+    ```
+    ...
+    example 74: input_ids: [  101  8168   118 12847  8783  9977 15908   117  8256  9245 11643  8168  8847  8588 11575  8154  8228   143  8384  8376  9197 10241   103 10564 11421  8199 12268   112   161  8228 11541  9586  8436  8174  8363  9864  9702   103   103   119   103  9947 10564   103  8436  8806 11479   103  8912   119   103   103   103 12209  8303   103  8757  8824   117  8256   103  8619  8168 11541   102 11684  8196   103  8228  8847 11523   117  9059  9064 12410  8358  8181 10764   117 11167 11706  9920   148  8332 11390  8936  8205 10951 11997   103  8154   117   103  8670 10467   112   161 10951 13139 12413   117 10288   143 10425  8205   152 10795  8472  8196   103   161 12126  9172 13129 12106  8217  8174 12244  8205   143   103  8461  8277 10628   160  8221   119   102]
+    example 74: input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
+    example 74: segment_ids: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
+    example 74: masked_lm_positions: [  6  22  37  38  40  43  47  50  51  52  55  60  67  76  89  92  98 109 120   0]
+    example 74: masked_lm_ids: [ 8118  8165  8329  8890  8554  8458   119  8850  8565 10392  8174 11467  10291  8181  8549 12718 13139   112   158     0]
+    example 74: masked_lm_weights: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.]
+    example 74: next_sentence_labels: [0]
+    ...
+    ```
diff --git a/example/nlp_to_mindrecord/zhwiki/create_dataset.py b/example/nlp_to_mindrecord/zhwiki/create_dataset.py
new file mode 100644
index 0000000000..8404662bd4
--- /dev/null
+++ b/example/nlp_to_mindrecord/zhwiki/create_dataset.py
@@ -0,0 +1,43 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""create MindDataset by MindRecord"""
+import argparse
+import mindspore.dataset as ds
+
+def create_dataset(data_file):
+    """create MindDataset"""
+    num_readers = 4
+    data_set = ds.MindDataset(dataset_file=data_file, num_parallel_workers=num_readers, shuffle=True)
+    index = 0
+    for item in data_set.create_dict_iterator():
+        # print("example {}: {}".format(index, item))
+        print("example {}: input_ids: {}".format(index, item['input_ids']))
+        print("example {}: input_mask: {}".format(index, item['input_mask']))
+        print("example {}: segment_ids: {}".format(index, item['segment_ids']))
+        print("example {}: masked_lm_positions: {}".format(index, item['masked_lm_positions']))
+        print("example {}: masked_lm_ids: {}".format(index, item['masked_lm_ids']))
+        print("example {}: masked_lm_weights: {}".format(index, item['masked_lm_weights']))
+        print("example {}: next_sentence_labels: {}".format(index, item['next_sentence_labels']))
+        index += 1
+        if index % 1000 == 0:
+            print("read rows: {}".format(index))
+    print("total rows: {}".format(index))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_file", type=str, required=True, help='Input mindreord file')
+    args = parser.parse_args()
+
+    create_dataset(args.input_file)
diff --git a/example/nlp_to_mindrecord/zhwiki/create_pretraining_data.py b/example/nlp_to_mindrecord/zhwiki/create_pretraining_data.py
new file mode 100644
index 0000000000..0de852a265
--- /dev/null
+++ b/example/nlp_to_mindrecord/zhwiki/create_pretraining_data.py
@@ -0,0 +1,428 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Create masked LM/next sentence masked_lm MindRecord files for BERT."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import collections
+import logging
+import random
+import tokenization
+
+import numpy as np
+from mindspore.mindrecord import FileWriter
+
+# pylint: skip-file
+
+logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
+                    datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO)
+
+
+class TrainingInstance(object):
+  """A single training instance (sentence pair)."""
+
+  def __init__(self, tokens, segment_ids, masked_lm_positions, masked_lm_labels,
+               is_random_next):
+    self.tokens = tokens
+    self.segment_ids = segment_ids
+    self.is_random_next = is_random_next
+    self.masked_lm_positions = masked_lm_positions
+    self.masked_lm_labels = masked_lm_labels
+
+  def __str__(self):
+    s = ""
+    s += "tokens: %s\n" % (" ".join(
+        [tokenization.printable_text(x) for x in self.tokens]))
+    s += "segment_ids: %s\n" % (" ".join([str(x) for x in self.segment_ids]))
+    s += "is_random_next: %s\n" % self.is_random_next
+    s += "masked_lm_positions: %s\n" % (" ".join(
+        [str(x) for x in self.masked_lm_positions]))
+    s += "masked_lm_labels: %s\n" % (" ".join(
+        [tokenization.printable_text(x) for x in self.masked_lm_labels]))
+    s += "\n"
+    return s
+
+  def __repr__(self):
+    return self.__str__()
+
+
+def write_instance_to_example_files(instances, tokenizer, max_seq_length,
+                                    max_predictions_per_seq, output_file, partition_number):
+  """Create MindRecord files from `TrainingInstance`s."""
+  writer = FileWriter(output_file, int(partition_number))
+
+  data_schema = {"input_ids": {"type": "int64", "shape": [-1]},
+                 "input_mask": {"type": "int64", "shape": [-1]},
+                 "segment_ids": {"type": "int64", "shape": [-1]},
+                 "masked_lm_positions": {"type": "int64", "shape": [-1]},
+                 "masked_lm_ids": {"type": "int64", "shape": [-1]},
+                 "masked_lm_weights": {"type": "float64", "shape": [-1]},
+                 "next_sentence_labels": {"type": "int64", "shape": [-1]},
+                }
+  writer.add_schema(data_schema, "zhwiki schema")
+
+  total_written = 0
+  for (inst_index, instance) in enumerate(instances):
+    input_ids = tokenizer.convert_tokens_to_ids(instance.tokens)
+    input_mask = [1] * len(input_ids)
+    segment_ids = list(instance.segment_ids)
+    assert len(input_ids) <= max_seq_length
+
+    while len(input_ids) < max_seq_length:
+      input_ids.append(0)
+      input_mask.append(0)
+      segment_ids.append(0)
+
+    assert len(input_ids) == max_seq_length
+    assert len(input_mask) == max_seq_length
+    assert len(segment_ids) == max_seq_length
+
+    masked_lm_positions = list(instance.masked_lm_positions)
+    masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels)
+    masked_lm_weights = [1.0] * len(masked_lm_ids)
+
+    while len(masked_lm_positions) < max_predictions_per_seq:
+      masked_lm_positions.append(0)
+      masked_lm_ids.append(0)
+      masked_lm_weights.append(0.0)
+
+    next_sentence_label = 1 if instance.is_random_next else 0
+
+    features = collections.OrderedDict()
+    features["input_ids"] = np.asarray(input_ids)
+    features["input_mask"] = np.asarray(input_mask)
+    features["segment_ids"] = np.asarray(segment_ids)
+    features["masked_lm_positions"] = np.asarray(masked_lm_positions)
+    features["masked_lm_ids"] = np.asarray(masked_lm_ids)
+    features["masked_lm_weights"] = np.asarray(masked_lm_weights)
+    features["next_sentence_labels"] = np.asarray([next_sentence_label])
+
+    total_written += 1
+
+    if inst_index < 20:
+      logging.info("*** Example ***")
+      logging.info("tokens: %s" % " ".join(
+          [tokenization.printable_text(x) for x in instance.tokens]))
+
+      for feature_name in features.keys():
+        feature = features[feature_name]
+        logging.info(
+            "%s: %s" % (feature_name, " ".join([str(x) for x in feature])))
+    writer.write_raw_data([features])
+
+  writer.commit()
+
+  logging.info("Wrote %d total instances", total_written)
+
+
+def create_training_instances(input_files, tokenizer, max_seq_length,
+                              dupe_factor, short_seq_prob, masked_lm_prob,
+                              max_predictions_per_seq, rng, do_whole_word_mask):
+  """Create `TrainingInstance`s from raw text."""
+  all_documents = [[]]
+
+  # Input file format:
+  # (1) One sentence per line. These should ideally be actual sentences, not
+  # entire paragraphs or arbitrary spans of text. (Because we use the
+  # sentence boundaries for the "next sentence prediction" task).
+  # (2) Blank lines between documents. Document boundaries are needed so
+  # that the "next sentence prediction" task doesn't span between documents.
+  for input_file in input_files:
+    with open(input_file, "r") as reader:
+      while True:
+        line = tokenization.convert_to_unicode(reader.readline())
+        if not line:
+          break
+        line = line.strip()
+
+        # Empty lines are used as document delimiters
+        if not line:
+          all_documents.append([])
+        tokens = tokenizer.tokenize(line)
+        if tokens:
+          all_documents[-1].append(tokens)
+
+  # Remove empty documents
+  all_documents = [x for x in all_documents if x]
+  rng.shuffle(all_documents)
+
+  vocab_words = list(tokenizer.vocab.keys())
+  instances = []
+  for _ in range(dupe_factor):
+    for document_index in range(len(all_documents)):
+      instances.extend(
+          create_instances_from_document(
+              all_documents, document_index, max_seq_length, short_seq_prob,
+              masked_lm_prob, max_predictions_per_seq, vocab_words, rng, do_whole_word_mask))
+
+  rng.shuffle(instances)
+  return instances
+
+
+def create_instances_from_document(
+    all_documents, document_index, max_seq_length, short_seq_prob,
+    masked_lm_prob, max_predictions_per_seq, vocab_words, rng, do_whole_word_mask):
+  """Creates `TrainingInstance`s for a single document."""
+  document = all_documents[document_index]
+
+  # Account for [CLS], [SEP], [SEP]
+  max_num_tokens = max_seq_length - 3
+
+  # We *usually* want to fill up the entire sequence since we are padding
+  # to `max_seq_length` anyways, so short sequences are generally wasted
+  # computation. However, we *sometimes*
+  # (i.e., short_seq_prob == 0.1 == 10% of the time) want to use shorter
+  # sequences to minimize the mismatch between pre-training and fine-tuning.
+  # The `target_seq_length` is just a rough target however, whereas
+  # `max_seq_length` is a hard limit.
+  target_seq_length = max_num_tokens
+  if rng.random() < short_seq_prob:
+    target_seq_length = rng.randint(2, max_num_tokens)
+
+  # We DON'T just concatenate all of the tokens from a document into a long
+  # sequence and choose an arbitrary split point because this would make the
+  # next sentence prediction task too easy. Instead, we split the input into
+  # segments "A" and "B" based on the actual "sentences" provided by the user
+  # input.
+  instances = []
+  current_chunk = []
+  current_length = 0
+  i = 0
+  while i < len(document):
+    segment = document[i]
+    current_chunk.append(segment)
+    current_length += len(segment)
+    if i == len(document) - 1 or current_length >= target_seq_length:
+      if current_chunk:
+        # `a_end` is how many segments from `current_chunk` go into the `A`
+        # (first) sentence.
+        a_end = 1
+        if len(current_chunk) >= 2:
+          a_end = rng.randint(1, len(current_chunk) - 1)
+
+        tokens_a = []
+        for j in range(a_end):
+          tokens_a.extend(current_chunk[j])
+
+        tokens_b = []
+        # Random next
+        is_random_next = False
+        if len(current_chunk) == 1 or rng.random() < 0.5:
+          is_random_next = True
+          target_b_length = target_seq_length - len(tokens_a)
+
+          # This should rarely go for more than one iteration for large
+          # corpora. However, just to be careful, we try to make sure that
+          # the random document is not the same as the document
+          # we're processing.
+          for _ in range(10):
+            random_document_index = rng.randint(0, len(all_documents) - 1)
+            if random_document_index != document_index:
+              break
+
+          random_document = all_documents[random_document_index]
+          random_start = rng.randint(0, len(random_document) - 1)
+          for j in range(random_start, len(random_document)):
+            tokens_b.extend(random_document[j])
+            if len(tokens_b) >= target_b_length:
+              break
+          # We didn't actually use these segments so we "put them back" so
+          # they don't go to waste.
+          num_unused_segments = len(current_chunk) - a_end
+          i -= num_unused_segments
+        # Actual next
+        else:
+          is_random_next = False
+          for j in range(a_end, len(current_chunk)):
+            tokens_b.extend(current_chunk[j])
+        truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng)
+
+        assert len(tokens_a) >= 1
+        assert len(tokens_b) >= 1
+
+        tokens = []
+        segment_ids = []
+        tokens.append("[CLS]")
+        segment_ids.append(0)
+        for token in tokens_a:
+          tokens.append(token)
+          segment_ids.append(0)
+
+        tokens.append("[SEP]")
+        segment_ids.append(0)
+
+        for token in tokens_b:
+          tokens.append(token)
+          segment_ids.append(1)
+        tokens.append("[SEP]")
+        segment_ids.append(1)
+
+        (tokens, masked_lm_positions,
+         masked_lm_labels) = create_masked_lm_predictions(
+             tokens, masked_lm_prob, max_predictions_per_seq, vocab_words, rng, do_whole_word_mask)
+        instance = TrainingInstance(
+            tokens=tokens,
+            segment_ids=segment_ids,
+            is_random_next=is_random_next,
+            masked_lm_positions=masked_lm_positions,
+            masked_lm_labels=masked_lm_labels)
+        instances.append(instance)
+      current_chunk = []
+      current_length = 0
+    i += 1
+
+  return instances
+
+
+MaskedLmInstance = collections.namedtuple("MaskedLmInstance",
+                                          ["index", "label"])
+
+
+def create_masked_lm_predictions(tokens, masked_lm_prob,
+                                 max_predictions_per_seq, vocab_words, rng, do_whole_word_mask):
+  """Creates the predictions for the masked LM objective."""
+
+  cand_indexes = []
+  for (i, token) in enumerate(tokens):
+    if token == "[CLS]" or token == "[SEP]":
+      continue
+    # Whole Word Masking means that if we mask all of the wordpieces
+    # corresponding to an original word. When a word has been split into
+    # WordPieces, the first token does not have any marker and any subsequence
+    # tokens are prefixed with ##. So whenever we see the ## token, we
+    # append it to the previous set of word indexes.
+    #
+    # Note that Whole Word Masking does *not* change the training code
+    # at all -- we still predict each WordPiece independently, softmaxed
+    # over the entire vocabulary.
+    if (do_whole_word_mask and len(cand_indexes) >= 1 and
+        token.startswith("##")):
+      cand_indexes[-1].append(i)
+    else:
+      cand_indexes.append([i])
+
+  rng.shuffle(cand_indexes)
+
+  output_tokens = list(tokens)
+
+  num_to_predict = min(max_predictions_per_seq,
+                       max(1, int(round(len(tokens) * masked_lm_prob))))
+
+  masked_lms = []
+  covered_indexes = set()
+  for index_set in cand_indexes:
+    if len(masked_lms) >= num_to_predict:
+      break
+    # If adding a whole-word mask would exceed the maximum number of
+    # predictions, then just skip this candidate.
+    if len(masked_lms) + len(index_set) > num_to_predict:
+      continue
+    is_any_index_covered = False
+    for index in index_set:
+      if index in covered_indexes:
+        is_any_index_covered = True
+        break
+    if is_any_index_covered:
+      continue
+    for index in index_set:
+      covered_indexes.add(index)
+
+      masked_token = None
+      # 80% of the time, replace with [MASK]
+      if rng.random() < 0.8:
+        masked_token = "[MASK]"
+      else:
+        # 10% of the time, keep original
+        if rng.random() < 0.5:
+          masked_token = tokens[index]
+        # 10% of the time, replace with random word
+        else:
+          masked_token = vocab_words[rng.randint(0, len(vocab_words) - 1)]
+
+      output_tokens[index] = masked_token
+
+      masked_lms.append(MaskedLmInstance(index=index, label=tokens[index]))
+  assert len(masked_lms) <= num_to_predict
+  masked_lms = sorted(masked_lms, key=lambda x: x.index)
+
+  masked_lm_positions = []
+  masked_lm_labels = []
+  for p in masked_lms:
+    masked_lm_positions.append(p.index)
+    masked_lm_labels.append(p.label)
+
+  return (output_tokens, masked_lm_positions, masked_lm_labels)
+
+
+def truncate_seq_pair(tokens_a, tokens_b, max_num_tokens, rng):
+  """Truncates a pair of sequences to a maximum sequence length."""
+  while True:
+    total_length = len(tokens_a) + len(tokens_b)
+    if total_length <= max_num_tokens:
+      break
+
+    trunc_tokens = tokens_a if len(tokens_a) > len(tokens_b) else tokens_b
+    assert len(trunc_tokens) >= 1
+
+    # We want to sometimes truncate from the front and sometimes from the
+    # back to add more randomness and avoid biases.
+    if rng.random() < 0.5:
+      del trunc_tokens[0]
+    else:
+      trunc_tokens.pop()
+
+
+def main():
+  parser = argparse.ArgumentParser()
+  parser.add_argument("--input_file", type=str, required=True, help='Input raw text file (or comma-separated list of files).')
+  parser.add_argument("--output_file", type=str, required=True, help='Output MindRecord file.')
+  parser.add_argument("--partition_number", type=int, default=1, help='The MindRecord file will be split into the number of partition.')
+  parser.add_argument("--vocab_file", type=str, required=True, help='The vocabulary file than the BERT model was trained on.')
+  parser.add_argument("--do_lower_case", type=bool, default=False, help='Whether to lower case the input text. Should be True for uncased models and False for cased models.')
+  parser.add_argument("--do_whole_word_mask", type=bool, default=False, help='Whether to use whole word masking rather than per-WordPiece masking.')
+  parser.add_argument("--max_seq_length", type=int, default=128, help='Maximum sequence length.')
+  parser.add_argument("--max_predictions_per_seq", type=int, default=20, help='Maximum number of masked LM predictions per sequence.')
+  parser.add_argument("--random_seed", type=int, default=12345, help='Random seed for data generation.')
+  parser.add_argument("--dupe_factor", type=int, default=10, help='Number of times to duplicate the input data (with diffrent masks).')
+  parser.add_argument("--masked_lm_prob", type=float, default=0.15, help='Masked LM probability.')
+  parser.add_argument("--short_seq_prob", type=float, default=0.1, help='Probability of creating sequences which are shorter than the maximum length.')
+  args = parser.parse_args()
+
+  tokenizer = tokenization.FullTokenizer(
+      vocab_file=args.vocab_file, do_lower_case=args.do_lower_case)
+
+  input_files = []
+  for input_pattern in args.input_file.split(","):
+    input_files.append(input_pattern)
+
+  logging.info("*** Reading from input files ***")
+  for input_file in input_files:
+    logging.info("  %s", input_file)
+
+  rng = random.Random(args.random_seed)
+  instances = create_training_instances(
+      input_files, tokenizer, args.max_seq_length, args.dupe_factor,
+      args.short_seq_prob, args.masked_lm_prob, args.max_predictions_per_seq,
+      rng, args.do_whole_word_mask)
+
+  write_instance_to_example_files(instances, tokenizer, args.max_seq_length,
+                                  args.max_predictions_per_seq, args.output_file, args.partition_number)
+
+
+if __name__ == "__main__":
+  main()
diff --git a/example/nlp_to_mindrecord/zhwiki/run.sh b/example/nlp_to_mindrecord/zhwiki/run.sh
new file mode 100644
index 0000000000..4376ff0ff4
--- /dev/null
+++ b/example/nlp_to_mindrecord/zhwiki/run.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+rm zhwiki.mindrecord*
+
+python create_pretraining_data.py \
+--input_file=./sample_text.txt \
+--output_file=zhwiki.mindrecord \
+--partition_number=4 \
+--vocab_file=./vocab.txt \
+--do_lower_case=True \
+--max_seq_length=128 \
+--max_predictions_per_seq=20 \
+--masked_lm_prob=0.15 \
+--random_seed=12345 \
+--dupe_factor=5
diff --git a/example/nlp_to_mindrecord/zhwiki/run_read.sh b/example/nlp_to_mindrecord/zhwiki/run_read.sh
new file mode 100644
index 0000000000..b2d1cfb662
--- /dev/null
+++ b/example/nlp_to_mindrecord/zhwiki/run_read.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+python create_dataset.py --input_file=zhwiki.mindrecord0
diff --git a/example/nlp_to_mindrecord/zhwiki/sample_text.txt b/example/nlp_to_mindrecord/zhwiki/sample_text.txt
new file mode 100644
index 0000000000..a42812060c
--- /dev/null
+++ b/example/nlp_to_mindrecord/zhwiki/sample_text.txt
@@ -0,0 +1,33 @@
+This text is included to make sure Unicode is handled properly: 力加勝北区ᴵᴺᵀᵃছজটডণত
+Text should be one-sentence-per-line, with empty lines between documents.
+This sample text is public domain and was randomly selected from Project Guttenberg.
+
+The rain had only ceased with the gray streaks of morning at Blazing Star, and the settlement awoke to a moral sense of cleanliness, and the finding of forgotten knives, tin cups, and smaller camp utensils, where the heavy showers had washed away the debris and dust heaps before the cabin doors.
+Indeed, it was recorded in Blazing Star that a fortunate early riser had once picked up on the highway a solid chunk of gold quartz which the rain had freed from its incumbering soil, and washed into immediate and glittering popularity.
+Possibly this may have been the reason why early risers in that locality, during the rainy season, adopted a thoughtful habit of body, and seldom lifted their eyes to the rifted or india-ink washed skies above them.
+"Cass" Beard had risen early that morning, but not with a view to discovery.
+A leak in his cabin roof,--quite consistent with his careless, improvident habits,--had roused him at 4 A. M., with a flooded "bunk" and wet blankets.
+The chips from his wood pile refused to kindle a fire to dry his bed-clothes, and he had recourse to a more provident neighbor's to supply the deficiency.
+This was nearly opposite.
+Mr. Cassius crossed the highway, and stopped suddenly.
+Something glittered in the nearest red pool before him.
+Gold, surely!
+But, wonderful to relate, not an irregular, shapeless fragment of crude ore, fresh from Nature's crucible, but a bit of jeweler's handicraft in the form of a plain gold ring.
+Looking at it more attentively, he saw that it bore the inscription, "May to Cass."
+Like most of his fellow gold-seekers, Cass was superstitious.
+
+The fountain of classic wisdom, Hypatia herself.
+As the ancient sage--the name is unimportant to a monk--pumped water nightly that he might study by day, so I, the guardian of cloaks and parasols, at the sacred doors of her lecture-room, imbibe celestial knowledge.
+From my youth I felt in me a soul above the matter-entangled herd.
+She revealed to me the glorious fact, that I am a spark of Divinity itself.
+A fallen star, I am, sir!' continued he, pensively, stroking his lean stomach--'a fallen star!--fallen, if the dignity of philosophy will allow of the simile, among the hogs of the lower world--indeed, even into the hog-bucket itself. Well, after all, I will show you the way to the Archbishop's.
+There is a philosophic pleasure in opening one's treasures to the modest young.
+Perhaps you will assist me by carrying this basket of fruit?' And the little man jumped up, put his basket on Philammon's head, and trotted off up a neighbouring street.
+Philammon followed, half contemptuous, half wondering at what this philosophy might be, which could feed the self-conceit of anything so abject as his ragged little apish guide;
+but the novel roar and whirl of the street, the perpetual stream of busy faces, the line of curricles, palanquins, laden asses, camels, elephants, which met and passed him, and squeezed him up steps and into doorways, as they threaded their way through the great Moon-gate into the ample street beyond, drove everything from his mind but wondering curiosity, and a vague, helpless dread of that great living wilderness, more terrible than any dead wilderness of sand which he had left behind.
+Already he longed for the repose, the silence of the Laura--for faces which knew him and smiled upon him; but it was too late to turn back now.
+His guide held on for more than a mile up the great main street, crossed in the centre of the city, at right angles, by one equally magnificent, at each end of which, miles away, appeared, dim and distant over the heads of the living stream of passengers, the yellow sand-hills of the desert;
+while at the end of the vista in front of them gleamed the blue harbour, through a network of countless masts.
+At last they reached the quay at the opposite end of the street;
+and there burst on Philammon's astonished eyes a vast semicircle of blue sea, ringed with palaces and towers.
+He stopped involuntarily; and his little guide stopped also, and looked askance at the young monk, to watch the effect which that grand panorama should produce on him.
diff --git a/example/nlp_to_mindrecord/zhwiki/tokenization.py b/example/nlp_to_mindrecord/zhwiki/tokenization.py
new file mode 100644
index 0000000000..50e9445a19
--- /dev/null
+++ b/example/nlp_to_mindrecord/zhwiki/tokenization.py
@@ -0,0 +1,394 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import re
+import unicodedata
+import six
+
+# pylint: skip-file
+
+def validate_case_matches_checkpoint(do_lower_case, init_checkpoint):
+  """Checks whether the casing config is consistent with the checkpoint name."""
+
+  # The casing has to be passed in by the user and there is no explicit check
+  # as to whether it matches the checkpoint. The casing information probably
+  # should have been stored in the bert_config.json file, but it's not, so
+  # we have to heuristically detect it to validate.
+
+  if not init_checkpoint:
+    return
+
+  m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint)
+  if m is None:
+    return
+
+  model_name = m.group(1)
+
+  lower_models = [
+      "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12",
+      "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12"
+  ]
+
+  cased_models = [
+      "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16",
+      "multi_cased_L-12_H-768_A-12"
+  ]
+
+  is_bad_config = False
+  if model_name in lower_models and not do_lower_case:
+    is_bad_config = True
+    actual_flag = "False"
+    case_name = "lowercased"
+    opposite_flag = "True"
+
+  if model_name in cased_models and do_lower_case:
+    is_bad_config = True
+    actual_flag = "True"
+    case_name = "cased"
+    opposite_flag = "False"
+
+  if is_bad_config:
+    raise ValueError(
+        "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. "
+        "However, `%s` seems to be a %s model, so you "
+        "should pass in `--do_lower_case=%s` so that the fine-tuning matches "
+        "how the model was pre-training. If this error is wrong, please "
+        "just comment out this check." % (actual_flag, init_checkpoint,
+                                          model_name, case_name, opposite_flag))
+
+
+def convert_to_unicode(text):
+  """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
+  if six.PY3:
+    if isinstance(text, str):
+      return text
+    elif isinstance(text, bytes):
+      return text.decode("utf-8", "ignore")
+    else:
+      raise ValueError("Unsupported string type: %s" % (type(text)))
+  elif six.PY2:
+    if isinstance(text, str):
+      return text.decode("utf-8", "ignore")
+    elif isinstance(text, unicode):
+      return text
+    else:
+      raise ValueError("Unsupported string type: %s" % (type(text)))
+  else:
+    raise ValueError("Not running on Python2 or Python 3?")
+
+
+def printable_text(text):
+  """Returns text encoded in a way suitable for print or `tf.logging`."""
+
+  # These functions want `str` for both Python2 and Python3, but in one case
+  # it's a Unicode string and in the other it's a byte string.
+  if six.PY3:
+    if isinstance(text, str):
+      return text
+    elif isinstance(text, bytes):
+      return text.decode("utf-8", "ignore")
+    else:
+      raise ValueError("Unsupported string type: %s" % (type(text)))
+  elif six.PY2:
+    if isinstance(text, str):
+      return text
+    elif isinstance(text, unicode):
+      return text.encode("utf-8")
+    else:
+      raise ValueError("Unsupported string type: %s" % (type(text)))
+  else:
+    raise ValueError("Not running on Python2 or Python 3?")
+
+
+def load_vocab(vocab_file):
+  """Loads a vocabulary file into a dictionary."""
+  vocab = collections.OrderedDict()
+  index = 0
+  with open(vocab_file, "r") as reader:
+    while True:
+      token = convert_to_unicode(reader.readline())
+      if not token:
+        break
+      token = token.strip()
+      vocab[token] = index
+      index += 1
+  return vocab
+
+
+def convert_by_vocab(vocab, items):
+  """Converts a sequence of [tokens|ids] using the vocab."""
+  output = []
+  for item in items:
+    output.append(vocab[item])
+  return output
+
+
+def convert_tokens_to_ids(vocab, tokens):
+  return convert_by_vocab(vocab, tokens)
+
+
+def convert_ids_to_tokens(inv_vocab, ids):
+  return convert_by_vocab(inv_vocab, ids)
+
+
+def whitespace_tokenize(text):
+  """Runs basic whitespace cleaning and splitting on a piece of text."""
+  text = text.strip()
+  if not text:
+    return []
+  tokens = text.split()
+  return tokens
+
+
+class FullTokenizer(object):
+  """Runs end-to-end tokenziation."""
+
+  def __init__(self, vocab_file, do_lower_case=True):
+    self.vocab = load_vocab(vocab_file)
+    self.inv_vocab = {v: k for k, v in self.vocab.items()}
+    self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
+    self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+
+  def tokenize(self, text):
+    split_tokens = []
+    for token in self.basic_tokenizer.tokenize(text):
+      for sub_token in self.wordpiece_tokenizer.tokenize(token):
+        split_tokens.append(sub_token)
+
+    return split_tokens
+
+  def convert_tokens_to_ids(self, tokens):
+    return convert_by_vocab(self.vocab, tokens)
+
+  def convert_ids_to_tokens(self, ids):
+    return convert_by_vocab(self.inv_vocab, ids)
+
+
+class BasicTokenizer(object):
+  """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+  def __init__(self, do_lower_case=True):
+    """Constructs a BasicTokenizer.
+    Args:
+      do_lower_case: Whether to lower case the input.
+    """
+    self.do_lower_case = do_lower_case
+
+  def tokenize(self, text):
+    """Tokenizes a piece of text."""
+    text = convert_to_unicode(text)
+    text = self._clean_text(text)
+
+    # This was added on November 1st, 2018 for the multilingual and Chinese
+    # models. This is also applied to the English models now, but it doesn't
+    # matter since the English models were not trained on any Chinese data
+    # and generally don't have any Chinese data in them (there are Chinese
+    # characters in the vocabulary because Wikipedia does have some Chinese
+    # words in the English Wikipedia.).
+    text = self._tokenize_chinese_chars(text)
+
+    orig_tokens = whitespace_tokenize(text)
+    split_tokens = []
+    for token in orig_tokens:
+      if self.do_lower_case:
+        token = token.lower()
+        token = self._run_strip_accents(token)
+      split_tokens.extend(self._run_split_on_punc(token))
+
+    output_tokens = whitespace_tokenize(" ".join(split_tokens))
+    return output_tokens
+
+  def _run_strip_accents(self, text):
+    """Strips accents from a piece of text."""
+    text = unicodedata.normalize("NFD", text)
+    output = []
+    for char in text:
+      cat = unicodedata.category(char)
+      if cat == "Mn":
+        continue
+      output.append(char)
+    return "".join(output)
+
+  def _run_split_on_punc(self, text):
+    """Splits punctuation on a piece of text."""
+    chars = list(text)
+    i = 0
+    start_new_word = True
+    output = []
+    while i < len(chars):
+      char = chars[i]
+      if _is_punctuation(char):
+        output.append([char])
+        start_new_word = True
+      else:
+        if start_new_word:
+          output.append([])
+        start_new_word = False
+        output[-1].append(char)
+      i += 1
+
+    return ["".join(x) for x in output]
+
+  def _tokenize_chinese_chars(self, text):
+    """Adds whitespace around any CJK character."""
+    output = []
+    for char in text:
+      cp = ord(char)
+      if self._is_chinese_char(cp):
+        output.append(" ")
+        output.append(char)
+        output.append(" ")
+      else:
+        output.append(char)
+    return "".join(output)
+
+  def _is_chinese_char(self, cp):
+    """Checks whether CP is the codepoint of a CJK character."""
+    # This defines a "chinese character" as anything in the CJK Unicode block:
+    #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+    #
+    # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+    # despite its name. The modern Korean Hangul alphabet is a different block,
+    # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+    # space-separated words, so they are not treated specially and handled
+    # like the all of the other languages.
+    if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+        (cp >= 0x3400 and cp <= 0x4DBF) or  #
+        (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+        (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+        (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+        (cp >= 0x2B820 and cp <= 0x2CEAF) or
+        (cp >= 0xF900 and cp <= 0xFAFF) or  #
+        (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+      return True
+
+    return False
+
+  def _clean_text(self, text):
+    """Performs invalid character removal and whitespace cleanup on text."""
+    output = []
+    for char in text:
+      cp = ord(char)
+      if cp == 0 or cp == 0xfffd or _is_control(char):
+        continue
+      if _is_whitespace(char):
+        output.append(" ")
+      else:
+        output.append(char)
+    return "".join(output)
+
+
+class WordpieceTokenizer(object):
+  """Runs WordPiece tokenziation."""
+
+  def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200):
+    self.vocab = vocab
+    self.unk_token = unk_token
+    self.max_input_chars_per_word = max_input_chars_per_word
+
+  def tokenize(self, text):
+    """Tokenizes a piece of text into its word pieces.
+    This uses a greedy longest-match-first algorithm to perform tokenization
+    using the given vocabulary.
+    For example:
+      input = "unaffable"
+      output = ["un", "##aff", "##able"]
+    Args:
+      text: A single token or whitespace separated tokens. This should have
+        already been passed through `BasicTokenizer.
+    Returns:
+      A list of wordpiece tokens.
+    """
+
+    text = convert_to_unicode(text)
+
+    output_tokens = []
+    for token in whitespace_tokenize(text):
+      chars = list(token)
+      if len(chars) > self.max_input_chars_per_word:
+        output_tokens.append(self.unk_token)
+        continue
+
+      is_bad = False
+      start = 0
+      sub_tokens = []
+      while start < len(chars):
+        end = len(chars)
+        cur_substr = None
+        while start < end:
+          substr = "".join(chars[start:end])
+          if start > 0:
+            substr = "##" + substr
+          if substr in self.vocab:
+            cur_substr = substr
+            break
+          end -= 1
+        if cur_substr is None:
+          is_bad = True
+          break
+        sub_tokens.append(cur_substr)
+        start = end
+
+      if is_bad:
+        output_tokens.append(self.unk_token)
+      else:
+        output_tokens.extend(sub_tokens)
+    return output_tokens
+
+
+def _is_whitespace(char):
+  """Checks whether `chars` is a whitespace character."""
+  # \t, \n, and \r are technically contorl characters but we treat them
+  # as whitespace since they are generally considered as such.
+  if char == " " or char == "\t" or char == "\n" or char == "\r":
+    return True
+  cat = unicodedata.category(char)
+  if cat == "Zs":
+    return True
+  return False
+
+
+def _is_control(char):
+  """Checks whether `chars` is a control character."""
+  # These are technically control characters but we count them as whitespace
+  # characters.
+  if char == "\t" or char == "\n" or char == "\r":
+    return False
+  cat = unicodedata.category(char)
+  if cat in ("Cc", "Cf"):
+    return True
+  return False
+
+
+def _is_punctuation(char):
+  """Checks whether `chars` is a punctuation character."""
+  cp = ord(char)
+  # We treat all non-letter/number ASCII as punctuation.
+  # Characters such as "^", "$", and "`" are not in the Unicode
+  # Punctuation class but we treat them as punctuation anyways, for
+  # consistency.
+  if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+      (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+    return True
+  cat = unicodedata.category(char)
+  if cat.startswith("P"):
+    return True
+  return False
diff --git a/example/nlp_to_mindrecord/zhwiki/vocab.txt b/example/nlp_to_mindrecord/zhwiki/vocab.txt
new file mode 100644
index 0000000000..ca4f978103
--- /dev/null
+++ b/example/nlp_to_mindrecord/zhwiki/vocab.txt
@@ -0,0 +1,21128 @@
+[PAD]
+[unused1]
+[unused2]
+[unused3]
+[unused4]
+[unused5]
+[unused6]
+[unused7]
+[unused8]
+[unused9]
+[unused10]
+[unused11]
+[unused12]
+[unused13]
+[unused14]
+[unused15]
+[unused16]
+[unused17]
+[unused18]
+[unused19]
+[unused20]
+[unused21]
+[unused22]
+[unused23]
+[unused24]
+[unused25]
+[unused26]
+[unused27]
+[unused28]
+[unused29]
+[unused30]
+[unused31]
+[unused32]
+[unused33]
+[unused34]
+[unused35]
+[unused36]
+[unused37]
+[unused38]
+[unused39]
+[unused40]
+[unused41]
+[unused42]
+[unused43]
+[unused44]
+[unused45]
+[unused46]
+[unused47]
+[unused48]
+[unused49]
+[unused50]
+[unused51]
+[unused52]
+[unused53]
+[unused54]
+[unused55]
+[unused56]
+[unused57]
+[unused58]
+[unused59]
+[unused60]
+[unused61]
+[unused62]
+[unused63]
+[unused64]
+[unused65]
+[unused66]
+[unused67]
+[unused68]
+[unused69]
+[unused70]
+[unused71]
+[unused72]
+[unused73]
+[unused74]
+[unused75]
+[unused76]
+[unused77]
+[unused78]
+[unused79]
+[unused80]
+[unused81]
+[unused82]
+[unused83]
+[unused84]
+[unused85]
+[unused86]
+[unused87]
+[unused88]
+[unused89]
+[unused90]
+[unused91]
+[unused92]
+[unused93]
+[unused94]
+[unused95]
+[unused96]
+[unused97]
+[unused98]
+[unused99]
+[UNK]
+[CLS]
+[SEP]
+[MASK]
+<S>
+<T>
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+^
+_
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+£
+¤
+¥
+§
+©
+«
+®
+°
+±
+²
+³
+µ
+·
+¹
+º
+»
+¼
+×
+ß
+æ
+÷
+ø
+đ
+ŋ
+ɔ
+ə
+ɡ
+ʰ
+ˇ
+ˈ
+ˊ
+ˋ
+ˍ
+ː
+˙
+˚
+ˢ
+α
+β
+γ
+δ
+ε
+η
+θ
+ι
+κ
+λ
+μ
+ν
+ο
+π
+ρ
+ς
+σ
+τ
+υ
+φ
+χ
+ψ
+ω
+а
+б
+в
+г
+д
+е
+ж
+з
+и
+к
+л
+м
+н
+о
+п
+р
+с
+т
+у
+ф
+х
+ц
+ч
+ш
+ы
+ь
+я
+і
+ا
+ب
+ة
+ت
+د
+ر
+س
+ع
+ل
+م
+ن
+ه
+و
+ي
+۩
+ก
+ง
+น
+ม
+ย
+ร
+อ
+า
+เ
+๑
+་
+ღ
+ᄀ
+ᄁ
+ᄂ
+ᄃ
+ᄅ
+ᄆ
+ᄇ
+ᄈ
+ᄉ
+ᄋ
+ᄌ
+ᄎ
+ᄏ
+ᄐ
+ᄑ
+ᄒ
+ᅡ
+ᅢ
+ᅣ
+ᅥ
+ᅦ
+ᅧ
+ᅨ
+ᅩ
+ᅪ
+ᅬ
+ᅭ
+ᅮ
+ᅯ
+ᅲ
+ᅳ
+ᅴ
+ᅵ
+ᆨ
+ᆫ
+ᆯ
+ᆷ
+ᆸ
+ᆺ
+ᆻ
+ᆼ
+ᗜ
+ᵃ
+ᵉ
+ᵍ
+ᵏ
+ᵐ
+ᵒ
+ᵘ
+‖
+„
+†
+•
+‥
+‧
+ 
+‰
+′
+″
+‹
+›
+※
+‿
+⁄
+ⁱ
+⁺
+ⁿ
+₁
+₂
+₃
+₄
+€
+℃
+№
+™
+ⅰ
+ⅱ
+ⅲ
+ⅳ
+ⅴ
+←
+↑
+→
+↓
+↔
+↗
+↘
+⇒
+∀
+−
+∕
+∙
+√
+∞
+∟
+∠
+∣
+∥
+∩
+∮
+∶
+∼
+∽
+≈
+≒
+≡
+≤
+≥
+≦
+≧
+≪
+≫
+⊙
+⋅
+⋈
+⋯
+⌒
+①
+②
+③
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+⑴
+⑵
+⑶
+⑷
+⑸
+⒈
+⒉
+⒊
+⒋
+ⓒ
+ⓔ
+ⓘ
+─
+━
+│
+┃
+┅
+┆
+┊
+┌
+└
+├
+┣
+═
+║
+╚
+╞
+╠
+╭
+╮
+╯
+╰
+╱
+╳
+▂
+▃
+▅
+▇
+█
+▉
+▋
+▌
+▍
+▎
+■
+□
+▪
+▫
+▬
+▲
+△
+▶
+►
+▼
+▽
+◆
+◇
+○
+◎
+●
+◕
+◠
+◢
+◤
+☀
+★
+☆
+☕
+☞
+☺
+☼
+♀
+♂
+♠
+♡
+♣
+♥
+♦
+♪
+♫
+♬
+✈
+✔
+✕
+✖
+✦
+✨
+✪
+✰
+✿
+❀
+❤
+➜
+➤
+⦿
+、
+。
+〃
+々
+〇
+〈
+〉
+《
+》
+「
+」
+『
+』
+【
+】
+〓
+〔
+〕
+〖
+〗
+〜
+〝
+〞
+ぁ
+あ
+ぃ
+い
+う
+ぇ
+え
+お
+か
+き
+く
+け
+こ
+さ
+し
+す
+せ
+そ
+た
+ち
+っ
+つ
+て
+と
+な
+に
+ぬ
+ね
+の
+は
+ひ
+ふ
+へ
+ほ
+ま
+み
+む
+め
+も
+ゃ
+や
+ゅ
+ゆ
+ょ
+よ
+ら
+り
+る
+れ
+ろ
+わ
+を
+ん
+゜
+ゝ
+ァ
+ア
+ィ
+イ
+ゥ
+ウ
+ェ
+エ
+ォ
+オ
+カ
+キ
+ク
+ケ
+コ
+サ
+シ
+ス
+セ
+ソ
+タ
+チ
+ッ
+ツ
+テ
+ト
+ナ
+ニ
+ヌ
+ネ
+ノ
+ハ
+ヒ
+フ
+ヘ
+ホ
+マ
+ミ
+ム
+メ
+モ
+ャ
+ヤ
+ュ
+ユ
+ョ
+ヨ
+ラ
+リ
+ル
+レ
+ロ
+ワ
+ヲ
+ン
+ヶ
+・
+ー
+ヽ
+ㄅ
+ㄆ
+ㄇ
+ㄉ
+ㄋ
+ㄌ
+ㄍ
+ㄎ
+ㄏ
+ㄒ
+ㄚ
+ㄛ
+ㄞ
+ㄟ
+ㄢ
+ㄤ
+ㄥ
+ㄧ
+ㄨ
+ㆍ
+㈦
+㊣
+㎡
+㗎
+一
+丁
+七
+万
+丈
+三
+上
+下
+不
+与
+丐
+丑
+专
+且
+丕
+世
+丘
+丙
+业
+丛
+东
+丝
+丞
+丟
+両
+丢
+两
+严
+並
+丧
+丨
+个
+丫
+中
+丰
+串
+临
+丶
+丸
+丹
+为
+主
+丼
+丽
+举
+丿
+乂
+乃
+久
+么
+义
+之
+乌
+乍
+乎
+乏
+乐
+乒
+乓
+乔
+乖
+乗
+乘
+乙
+乜
+九
+乞
+也
+习
+乡
+书
+乩
+买
+乱
+乳
+乾
+亀
+亂
+了
+予
+争
+事
+二
+于
+亏
+云
+互
+五
+井
+亘
+亙
+亚
+些
+亜
+亞
+亟
+亡
+亢
+交
+亥
+亦
+产
+亨
+亩
+享
+京
+亭
+亮
+亲
+亳
+亵
+人
+亿
+什
+仁
+仃
+仄
+仅
+仆
+仇
+今
+介
+仍
+从
+仏
+仑
+仓
+仔
+仕
+他
+仗
+付
+仙
+仝
+仞
+仟
+代
+令
+以
+仨
+仪
+们
+仮
+仰
+仲
+件
+价
+任
+份
+仿
+企
+伉
+伊
+伍
+伎
+伏
+伐
+休
+伕
+众
+优
+伙
+会
+伝
+伞
+伟
+传
+伢
+伤
+伦
+伪
+伫
+伯
+估
+伴
+伶
+伸
+伺
+似
+伽
+佃
+但
+佇
+佈
+位
+低
+住
+佐
+佑
+体
+佔
+何
+佗
+佘
+余
+佚
+佛
+作
+佝
+佞
+佟
+你
+佢
+佣
+佤
+佥
+佩
+佬
+佯
+佰
+佳
+併
+佶
+佻
+佼
+使
+侃
+侄
+來
+侈
+例
+侍
+侏
+侑
+侖
+侗
+供
+依
+侠
+価
+侣
+侥
+侦
+侧
+侨
+侬
+侮
+侯
+侵
+侶
+侷
+便
+係
+促
+俄
+俊
+俎
+俏
+俐
+俑
+俗
+俘
+俚
+保
+俞
+俟
+俠
+信
+俨
+俩
+俪
+俬
+俭
+修
+俯
+俱
+俳
+俸
+俺
+俾
+倆
+倉
+個
+倌
+倍
+倏
+們
+倒
+倔
+倖
+倘
+候
+倚
+倜
+借
+倡
+値
+倦
+倩
+倪
+倫
+倬
+倭
+倶
+债
+值
+倾
+偃
+假
+偈
+偉
+偌
+偎
+偏
+偕
+做
+停
+健
+側
+偵
+偶
+偷
+偻
+偽
+偿
+傀
+傅
+傍
+傑
+傘
+備
+傚
+傢
+傣
+傥
+储
+傩
+催
+傭
+傲
+傳
+債
+傷
+傻
+傾
+僅
+働
+像
+僑
+僕
+僖
+僚
+僥
+僧
+僭
+僮
+僱
+僵
+價
+僻
+儀
+儂
+億
+儆
+儉
+儋
+儒
+儕
+儘
+償
+儡
+優
+儲
+儷
+儼
+儿
+兀
+允
+元
+兄
+充
+兆
+兇
+先
+光
+克
+兌
+免
+児
+兑
+兒
+兔
+兖
+党
+兜
+兢
+入
+內
+全
+兩
+八
+公
+六
+兮
+兰
+共
+兲
+关
+兴
+兵
+其
+具
+典
+兹
+养
+兼
+兽
+冀
+内
+円
+冇
+冈
+冉
+冊
+册
+再
+冏
+冒
+冕
+冗
+写
+军
+农
+冠
+冢
+冤
+冥
+冨
+冪
+冬
+冯
+冰
+冲
+决
+况
+冶
+冷
+冻
+冼
+冽
+冾
+净
+凄
+准
+凇
+凈
+凉
+凋
+凌
+凍
+减
+凑
+凛
+凜
+凝
+几
+凡
+凤
+処
+凪
+凭
+凯
+凰
+凱
+凳
+凶
+凸
+凹
+出
+击
+函
+凿
+刀
+刁
+刃
+分
+切
+刈
+刊
+刍
+刎
+刑
+划
+列
+刘
+则
+刚
+创
+初
+删
+判
+別
+刨
+利
+刪
+别
+刮
+到
+制
+刷
+券
+刹
+刺
+刻
+刽
+剁
+剂
+剃
+則
+剉
+削
+剋
+剌
+前
+剎
+剐
+剑
+剔
+剖
+剛
+剜
+剝
+剣
+剤
+剥
+剧
+剩
+剪
+副
+割
+創
+剷
+剽
+剿
+劃
+劇
+劈
+劉
+劊
+劍
+劏
+劑
+力
+劝
+办
+功
+加
+务
+劣
+动
+助
+努
+劫
+劭
+励
+劲
+劳
+労
+劵
+効
+劾
+势
+勁
+勃
+勇
+勉
+勋
+勐
+勒
+動
+勖
+勘
+務
+勛
+勝
+勞
+募
+勢
+勤
+勧
+勳
+勵
+勸
+勺
+勻
+勾
+勿
+匀
+包
+匆
+匈
+匍
+匐
+匕
+化
+北
+匙
+匝
+匠
+匡
+匣
+匪
+匮
+匯
+匱
+匹
+区
+医
+匾
+匿
+區
+十
+千
+卅
+升
+午
+卉
+半
+卍
+华
+协
+卑
+卒
+卓
+協
+单
+卖
+南
+単
+博
+卜
+卞
+卟
+占
+卡
+卢
+卤
+卦
+卧
+卫
+卮
+卯
+印
+危
+即
+却
+卵
+卷
+卸
+卻
+卿
+厂
+厄
+厅
+历
+厉
+压
+厌
+厕
+厘
+厚
+厝
+原
+厢
+厥
+厦
+厨
+厩
+厭
+厮
+厲
+厳
+去
+县
+叁
+参
+參
+又
+叉
+及
+友
+双
+反
+収
+发
+叔
+取
+受
+变
+叙
+叛
+叟
+叠
+叡
+叢
+口
+古
+句
+另
+叨
+叩
+只
+叫
+召
+叭
+叮
+可
+台
+叱
+史
+右
+叵
+叶
+号
+司
+叹
+叻
+叼
+叽
+吁
+吃
+各
+吆
+合
+吉
+吊
+吋
+同
+名
+后
+吏
+吐
+向
+吒
+吓
+吕
+吖
+吗
+君
+吝
+吞
+吟
+吠
+吡
+否
+吧
+吨
+吩
+含
+听
+吭
+吮
+启
+吱
+吳
+吴
+吵
+吶
+吸
+吹
+吻
+吼
+吽
+吾
+呀
+呂
+呃
+呆
+呈
+告
+呋
+呎
+呐
+呓
+呕
+呗
+员
+呛
+呜
+呢
+呤
+呦
+周
+呱
+呲
+味
+呵
+呷
+呸
+呻
+呼
+命
+咀
+咁
+咂
+咄
+咆
+咋
+和
+咎
+咏
+咐
+咒
+咔
+咕
+咖
+咗
+咘
+咙
+咚
+咛
+咣
+咤
+咦
+咧
+咨
+咩
+咪
+咫
+咬
+咭
+咯
+咱
+咲
+咳
+咸
+咻
+咽
+咿
+哀
+品
+哂
+哄
+哆
+哇
+哈
+哉
+哋
+哌
+响
+哎
+哏
+哐
+哑
+哒
+哔
+哗
+哟
+員
+哥
+哦
+哧
+哨
+哩
+哪
+哭
+哮
+哲
+哺
+哼
+哽
+唁
+唄
+唆
+唇
+唉
+唏
+唐
+唑
+唔
+唠
+唤
+唧
+唬
+售
+唯
+唰
+唱
+唳
+唷
+唸
+唾
+啃
+啄
+商
+啉
+啊
+問
+啓
+啕
+啖
+啜
+啞
+啟
+啡
+啤
+啥
+啦
+啧
+啪
+啫
+啬
+啮
+啰
+啱
+啲
+啵
+啶
+啷
+啸
+啻
+啼
+啾
+喀
+喂
+喃
+善
+喆
+喇
+喉
+喊
+喋
+喎
+喏
+喔
+喘
+喙
+喚
+喜
+喝
+喟
+喧
+喪
+喫
+喬
+單
+喰
+喱
+喲
+喳
+喵
+営
+喷
+喹
+喺
+喻
+喽
+嗅
+嗆
+嗇
+嗎
+嗑
+嗒
+嗓
+嗔
+嗖
+嗚
+嗜
+嗝
+嗟
+嗡
+嗣
+嗤
+嗦
+嗨
+嗪
+嗬
+嗯
+嗰
+嗲
+嗳
+嗶
+嗷
+嗽
+嘀
+嘅
+嘆
+嘈
+嘉
+嘌
+嘍
+嘎
+嘔
+嘖
+嘗
+嘘
+嘚
+嘛
+嘜
+嘞
+嘟
+嘢
+嘣
+嘤
+嘧
+嘩
+嘭
+嘮
+嘯
+嘰
+嘱
+嘲
+嘴
+嘶
+嘸
+嘹
+嘻
+嘿
+噁
+噌
+噎
+噓
+噔
+噗
+噙
+噜
+噠
+噢
+噤
+器
+噩
+噪
+噬
+噱
+噴
+噶
+噸
+噹
+噻
+噼
+嚀
+嚇
+嚎
+嚏
+嚐
+嚓
+嚕
+嚟
+嚣
+嚥
+嚨
+嚮
+嚴
+嚷
+嚼
+囂
+囉
+囊
+囍
+囑
+囔
+囗
+囚
+四
+囝
+回
+囟
+因
+囡
+团
+団
+囤
+囧
+囪
+囫
+园
+困
+囱
+囲
+図
+围
+囹
+固
+国
+图
+囿
+圃
+圄
+圆
+圈
+國
+圍
+圏
+園
+圓
+圖
+團
+圜
+土
+圣
+圧
+在
+圩
+圭
+地
+圳
+场
+圻
+圾
+址
+坂
+均
+坊
+坍
+坎
+坏
+坐
+坑
+块
+坚
+坛
+坝
+坞
+坟
+坠
+坡
+坤
+坦
+坨
+坪
+坯
+坳
+坵
+坷
+垂
+垃
+垄
+型
+垒
+垚
+垛
+垠
+垢
+垣
+垦
+垩
+垫
+垭
+垮
+垵
+埂
+埃
+埋
+城
+埔
+埕
+埗
+域
+埠
+埤
+埵
+執
+埸
+培
+基
+埼
+堀
+堂
+堃
+堅
+堆
+堇
+堑
+堕
+堙
+堡
+堤
+堪
+堯
+堰
+報
+場
+堵
+堺
+堿
+塊
+塌
+塑
+塔
+塗
+塘
+塚
+塞
+塢
+塩
+填
+塬
+塭
+塵
+塾
+墀
+境
+墅
+墉
+墊
+墒
+墓
+増
+墘
+墙
+墜
+增
+墟
+墨
+墩
+墮
+墳
+墻
+墾
+壁
+壅
+壆
+壇
+壊
+壑
+壓
+壕
+壘
+壞
+壟
+壢
+壤
+壩
+士
+壬
+壮
+壯
+声
+売
+壳
+壶
+壹
+壺
+壽
+处
+备
+変
+复
+夏
+夔
+夕
+外
+夙
+多
+夜
+够
+夠
+夢
+夥
+大
+天
+太
+夫
+夭
+央
+夯
+失
+头
+夷
+夸
+夹
+夺
+夾
+奂
+奄
+奇
+奈
+奉
+奋
+奎
+奏
+奐
+契
+奔
+奕
+奖
+套
+奘
+奚
+奠
+奢
+奥
+奧
+奪
+奬
+奮
+女
+奴
+奶
+奸
+她
+好
+如
+妃
+妄
+妆
+妇
+妈
+妊
+妍
+妒
+妓
+妖
+妘
+妙
+妝
+妞
+妣
+妤
+妥
+妨
+妩
+妪
+妮
+妲
+妳
+妹
+妻
+妾
+姆
+姉
+姊
+始
+姍
+姐
+姑
+姒
+姓
+委
+姗
+姚
+姜
+姝
+姣
+姥
+姦
+姨
+姪
+姫
+姬
+姹
+姻
+姿
+威
+娃
+娄
+娅
+娆
+娇
+娉
+娑
+娓
+娘
+娛
+娜
+娟
+娠
+娣
+娥
+娩
+娱
+娲
+娴
+娶
+娼
+婀
+婁
+婆
+婉
+婊
+婕
+婚
+婢
+婦
+婧
+婪
+婭
+婴
+婵
+婶
+婷
+婺
+婿
+媒
+媚
+媛
+媞
+媧
+媲
+媳
+媽
+媾
+嫁
+嫂
+嫉
+嫌
+嫑
+嫔
+嫖
+嫘
+嫚
+嫡
+嫣
+嫦
+嫩
+嫲
+嫵
+嫻
+嬅
+嬉
+嬌
+嬗
+嬛
+嬢
+嬤
+嬪
+嬰
+嬴
+嬷
+嬸
+嬿
+孀
+孃
+子
+孑
+孔
+孕
+孖
+字
+存
+孙
+孚
+孛
+孜
+孝
+孟
+孢
+季
+孤
+学
+孩
+孪
+孫
+孬
+孰
+孱
+孳
+孵
+學
+孺
+孽
+孿
+宁
+它
+宅
+宇
+守
+安
+宋
+完
+宏
+宓
+宕
+宗
+官
+宙
+定
+宛
+宜
+宝
+实
+実
+宠
+审
+客
+宣
+室
+宥
+宦
+宪
+宫
+宮
+宰
+害
+宴
+宵
+家
+宸
+容
+宽
+宾
+宿
+寂
+寄
+寅
+密
+寇
+富
+寐
+寒
+寓
+寛
+寝
+寞
+察
+寡
+寢
+寥
+實
+寧
+寨
+審
+寫
+寬
+寮
+寰
+寵
+寶
+寸
+对
+寺
+寻
+导
+対
+寿
+封
+専
+射
+将
+將
+專
+尉
+尊
+尋
+對
+導
+小
+少
+尔
+尕
+尖
+尘
+尚
+尝
+尤
+尧
+尬
+就
+尴
+尷
+尸
+尹
+尺
+尻
+尼
+尽
+尾
+尿
+局
+屁
+层
+屄
+居
+屆
+屈
+屉
+届
+屋
+屌
+屍
+屎
+屏
+屐
+屑
+展
+屜
+属
+屠
+屡
+屢
+層
+履
+屬
+屯
+山
+屹
+屿
+岀
+岁
+岂
+岌
+岐
+岑
+岔
+岖
+岗
+岘
+岙
+岚
+岛
+岡
+岩
+岫
+岬
+岭
+岱
+岳
+岷
+岸
+峇
+峋
+峒
+峙
+峡
+峤
+峥
+峦
+峨
+峪
+峭
+峯
+峰
+峴
+島
+峻
+峽
+崁
+崂
+崆
+崇
+崎
+崑
+崔
+崖
+崗
+崙
+崛
+崧
+崩
+崭
+崴
+崽
+嵇
+嵊
+嵋
+嵌
+嵐
+嵘
+嵩
+嵬
+嵯
+嶂
+嶄
+嶇
+嶋
+嶙
+嶺
+嶼
+嶽
+巅
+巍
+巒
+巔
+巖
+川
+州
+巡
+巢
+工
+左
+巧
+巨
+巩
+巫
+差
+己
+已
+巳
+巴
+巷
+巻
+巽
+巾
+巿
+币
+市
+布
+帅
+帆
+师
+希
+帐
+帑
+帕
+帖
+帘
+帚
+帛
+帜
+帝
+帥
+带
+帧
+師
+席
+帮
+帯
+帰
+帳
+帶
+帷
+常
+帼
+帽
+幀
+幂
+幄
+幅
+幌
+幔
+幕
+幟
+幡
+幢
+幣
+幫
+干
+平
+年
+并
+幸
+幹
+幺
+幻
+幼
+幽
+幾
+广
+庁
+広
+庄
+庆
+庇
+床
+序
+庐
+库
+应
+底
+庖
+店
+庙
+庚
+府
+庞
+废
+庠
+度
+座
+庫
+庭
+庵
+庶
+康
+庸
+庹
+庾
+廁
+廂
+廃
+廈
+廉
+廊
+廓
+廖
+廚
+廝
+廟
+廠
+廢
+廣
+廬
+廳
+延
+廷
+建
+廿
+开
+弁
+异
+弃
+弄
+弈
+弊
+弋
+式
+弑
+弒
+弓
+弔
+引
+弗
+弘
+弛
+弟
+张
+弥
+弦
+弧
+弩
+弭
+弯
+弱
+張
+強
+弹
+强
+弼
+弾
+彅
+彆
+彈
+彌
+彎
+归
+当
+录
+彗
+彙
+彝
+形
+彤
+彥
+彦
+彧
+彩
+彪
+彫
+彬
+彭
+彰
+影
+彷
+役
+彻
+彼
+彿
+往
+征
+径
+待
+徇
+很
+徉
+徊
+律
+後
+徐
+徑
+徒
+従
+徕
+得
+徘
+徙
+徜
+從
+徠
+御
+徨
+復
+循
+徬
+微
+徳
+徴
+徵
+德
+徹
+徼
+徽
+心
+必
+忆
+忌
+忍
+忏
+忐
+忑
+忒
+忖
+志
+忘
+忙
+応
+忠
+忡
+忤
+忧
+忪
+快
+忱
+念
+忻
+忽
+忿
+怀
+态
+怂
+怅
+怆
+怎
+怏
+怒
+怔
+怕
+怖
+怙
+怜
+思
+怠
+怡
+急
+怦
+性
+怨
+怪
+怯
+怵
+总
+怼
+恁
+恃
+恆
+恋
+恍
+恐
+恒
+恕
+恙
+恚
+恢
+恣
+恤
+恥
+恨
+恩
+恪
+恫
+恬
+恭
+息
+恰
+恳
+恵
+恶
+恸
+恺
+恻
+恼
+恿
+悄
+悅
+悉
+悌
+悍
+悔
+悖
+悚
+悟
+悠
+患
+悦
+您
+悩
+悪
+悬
+悯
+悱
+悲
+悴
+悵
+悶
+悸
+悻
+悼
+悽
+情
+惆
+惇
+惊
+惋
+惑
+惕
+惘
+惚
+惜
+惟
+惠
+惡
+惦
+惧
+惨
+惩
+惫
+惬
+惭
+惮
+惯
+惰
+惱
+想
+惴
+惶
+惹
+惺
+愁
+愆
+愈
+愉
+愍
+意
+愕
+愚
+愛
+愜
+感
+愣
+愤
+愧
+愫
+愷
+愿
+慄
+慈
+態
+慌
+慎
+慑
+慕
+慘
+慚
+慟
+慢
+慣
+慧
+慨
+慫
+慮
+慰
+慳
+慵
+慶
+慷
+慾
+憂
+憊
+憋
+憎
+憐
+憑
+憔
+憚
+憤
+憧
+憨
+憩
+憫
+憬
+憲
+憶
+憾
+懂
+懇
+懈
+應
+懊
+懋
+懑
+懒
+懦
+懲
+懵
+懶
+懷
+懸
+懺
+懼
+懾
+懿
+戀
+戈
+戊
+戌
+戍
+戎
+戏
+成
+我
+戒
+戕
+或
+战
+戚
+戛
+戟
+戡
+戦
+截
+戬
+戮
+戰
+戲
+戳
+戴
+戶
+户
+戸
+戻
+戾
+房
+所
+扁
+扇
+扈
+扉
+手
+才
+扎
+扑
+扒
+打
+扔
+払
+托
+扛
+扣
+扦
+执
+扩
+扪
+扫
+扬
+扭
+扮
+扯
+扰
+扱
+扳
+扶
+批
+扼
+找
+承
+技
+抄
+抉
+把
+抑
+抒
+抓
+投
+抖
+抗
+折
+抚
+抛
+抜
+択
+抟
+抠
+抡
+抢
+护
+报
+抨
+披
+抬
+抱
+抵
+抹
+押
+抽
+抿
+拂
+拄
+担
+拆
+拇
+拈
+拉
+拋
+拌
+拍
+拎
+拐
+拒
+拓
+拔
+拖
+拗
+拘
+拙
+拚
+招
+拜
+拟
+拡
+拢
+拣
+拥
+拦
+拧
+拨
+择
+括
+拭
+拮
+拯
+拱
+拳
+拴
+拷
+拼
+拽
+拾
+拿
+持
+挂
+指
+挈
+按
+挎
+挑
+挖
+挙
+挚
+挛
+挝
+挞
+挟
+挠
+挡
+挣
+挤
+挥
+挨
+挪
+挫
+振
+挲
+挹
+挺
+挽
+挾
+捂
+捅
+捆
+捉
+捋
+捌
+捍
+捎
+捏
+捐
+捕
+捞
+损
+捡
+换
+捣
+捧
+捨
+捩
+据
+捱
+捲
+捶
+捷
+捺
+捻
+掀
+掂
+掃
+掇
+授
+掉
+掌
+掏
+掐
+排
+掖
+掘
+掙
+掛
+掠
+採
+探
+掣
+接
+控
+推
+掩
+措
+掬
+掰
+掲
+掳
+掴
+掷
+掸
+掺
+揀
+揃
+揄
+揆
+揉
+揍
+描
+提
+插
+揖
+揚
+換
+握
+揣
+揩
+揪
+揭
+揮
+援
+揶
+揸
+揹
+揽
+搀
+搁
+搂
+搅
+損
+搏
+搐
+搓
+搔
+搖
+搗
+搜
+搞
+搡
+搪
+搬
+搭
+搵
+搶
+携
+搽
+摀
+摁
+摄
+摆
+摇
+摈
+摊
+摒
+摔
+摘
+摞
+摟
+摧
+摩
+摯
+摳
+摸
+摹
+摺
+摻
+撂
+撃
+撅
+撇
+撈
+撐
+撑
+撒
+撓
+撕
+撚
+撞
+撤
+撥
+撩
+撫
+撬
+播
+撮
+撰
+撲
+撵
+撷
+撸
+撻
+撼
+撿
+擀
+擁
+擂
+擄
+擅
+擇
+擊
+擋
+操
+擎
+擒
+擔
+擘
+據
+擞
+擠
+擡
+擢
+擦
+擬
+擰
+擱
+擲
+擴
+擷
+擺
+擼
+擾
+攀
+攏
+攒
+攔
+攘
+攙
+攜
+攝
+攞
+攢
+攣
+攤
+攥
+攪
+攫
+攬
+支
+收
+攸
+改
+攻
+放
+政
+故
+效
+敌
+敍
+敎
+敏
+救
+敕
+敖
+敗
+敘
+教
+敛
+敝
+敞
+敢
+散
+敦
+敬
+数
+敲
+整
+敵
+敷
+數
+斂
+斃
+文
+斋
+斌
+斎
+斐
+斑
+斓
+斗
+料
+斛
+斜
+斟
+斡
+斤
+斥
+斧
+斩
+斫
+斬
+断
+斯
+新
+斷
+方
+於
+施
+旁
+旃
+旅
+旋
+旌
+旎
+族
+旖
+旗
+无
+既
+日
+旦
+旧
+旨
+早
+旬
+旭
+旮
+旱
+时
+旷
+旺
+旻
+昀
+昂
+昆
+昇
+昉
+昊
+昌
+明
+昏
+易
+昔
+昕
+昙
+星
+映
+春
+昧
+昨
+昭
+是
+昱
+昴
+昵
+昶
+昼
+显
+晁
+時
+晃
+晉
+晋
+晌
+晏
+晒
+晓
+晔
+晕
+晖
+晗
+晚
+晝
+晞
+晟
+晤
+晦
+晨
+晩
+普
+景
+晰
+晴
+晶
+晷
+智
+晾
+暂
+暄
+暇
+暈
+暉
+暌
+暐
+暑
+暖
+暗
+暝
+暢
+暧
+暨
+暫
+暮
+暱
+暴
+暸
+暹
+曄
+曆
+曇
+曉
+曖
+曙
+曜
+曝
+曠
+曦
+曬
+曰
+曲
+曳
+更
+書
+曹
+曼
+曾
+替
+最
+會
+月
+有
+朋
+服
+朐
+朔
+朕
+朗
+望
+朝
+期
+朦
+朧
+木
+未
+末
+本
+札
+朮
+术
+朱
+朴
+朵
+机
+朽
+杀
+杂
+权
+杆
+杈
+杉
+李
+杏
+材
+村
+杓
+杖
+杜
+杞
+束
+杠
+条
+来
+杨
+杭
+杯
+杰
+東
+杳
+杵
+杷
+杼
+松
+板
+极
+构
+枇
+枉
+枋
+析
+枕
+林
+枚
+果
+枝
+枢
+枣
+枪
+枫
+枭
+枯
+枰
+枱
+枳
+架
+枷
+枸
+柄
+柏
+某
+柑
+柒
+染
+柔
+柘
+柚
+柜
+柞
+柠
+柢
+查
+柩
+柬
+柯
+柱
+柳
+柴
+柵
+査
+柿
+栀
+栃
+栄
+栅
+标
+栈
+栉
+栋
+栎
+栏
+树
+栓
+栖
+栗
+校
+栩
+株
+样
+核
+根
+格
+栽
+栾
+桀
+桁
+桂
+桃
+桅
+框
+案
+桉
+桌
+桎
+桐
+桑
+桓
+桔
+桜
+桠
+桡
+桢
+档
+桥
+桦
+桧
+桨
+桩
+桶
+桿
+梁
+梅
+梆
+梏
+梓
+梗
+條
+梟
+梢
+梦
+梧
+梨
+梭
+梯
+械
+梳
+梵
+梶
+检
+棂
+棄
+棉
+棋
+棍
+棒
+棕
+棗
+棘
+棚
+棟
+棠
+棣
+棧
+森
+棱
+棲
+棵
+棹
+棺
+椁
+椅
+椋
+植
+椎
+椒
+検
+椪
+椭
+椰
+椹
+椽
+椿
+楂
+楊
+楓
+楔
+楚
+楝
+楞
+楠
+楣
+楨
+楫
+業
+楮
+極
+楷
+楸
+楹
+楼
+楽
+概
+榄
+榆
+榈
+榉
+榔
+榕
+榖
+榛
+榜
+榨
+榫
+榭
+榮
+榱
+榴
+榷
+榻
+槁
+槃
+構
+槌
+槍
+槎
+槐
+槓
+様
+槛
+槟
+槤
+槭
+槲
+槳
+槻
+槽
+槿
+樁
+樂
+樊
+樑
+樓
+標
+樞
+樟
+模
+樣
+権
+横
+樫
+樯
+樱
+樵
+樸
+樹
+樺
+樽
+樾
+橄
+橇
+橋
+橐
+橘
+橙
+機
+橡
+橢
+橫
+橱
+橹
+橼
+檀
+檄
+檎
+檐
+檔
+檗
+檜
+檢
+檬
+檯
+檳
+檸
+檻
+櫃
+櫚
+櫛
+櫥
+櫸
+櫻
+欄
+權
+欒
+欖
+欠
+次
+欢
+欣
+欧
+欲
+欸
+欺
+欽
+款
+歆
+歇
+歉
+歌
+歎
+歐
+歓
+歙
+歛
+歡
+止
+正
+此
+步
+武
+歧
+歩
+歪
+歯
+歲
+歳
+歴
+歷
+歸
+歹
+死
+歼
+殁
+殃
+殆
+殇
+殉
+殊
+残
+殒
+殓
+殖
+殘
+殞
+殡
+殤
+殭
+殯
+殲
+殴
+段
+殷
+殺
+殼
+殿
+毀
+毁
+毂
+毅
+毆
+毋
+母
+毎
+每
+毒
+毓
+比
+毕
+毗
+毘
+毙
+毛
+毡
+毫
+毯
+毽
+氈
+氏
+氐
+民
+氓
+气
+氖
+気
+氙
+氛
+氟
+氡
+氢
+氣
+氤
+氦
+氧
+氨
+氪
+氫
+氮
+氯
+氰
+氲
+水
+氷
+永
+氹
+氾
+汀
+汁
+求
+汆
+汇
+汉
+汎
+汐
+汕
+汗
+汙
+汛
+汝
+汞
+江
+池
+污
+汤
+汨
+汩
+汪
+汰
+汲
+汴
+汶
+汹
+決
+汽
+汾
+沁
+沂
+沃
+沅
+沈
+沉
+沌
+沏
+沐
+沒
+沓
+沖
+沙
+沛
+沟
+没
+沢
+沣
+沥
+沦
+沧
+沪
+沫
+沭
+沮
+沱
+河
+沸
+油
+治
+沼
+沽
+沾
+沿
+況
+泄
+泉
+泊
+泌
+泓
+法
+泗
+泛
+泞
+泠
+泡
+波
+泣
+泥
+注
+泪
+泫
+泮
+泯
+泰
+泱
+泳
+泵
+泷
+泸
+泻
+泼
+泽
+泾
+洁
+洄
+洋
+洒
+洗
+洙
+洛
+洞
+津
+洩
+洪
+洮
+洱
+洲
+洵
+洶
+洸
+洹
+活
+洼
+洽
+派
+流
+浃
+浄
+浅
+浆
+浇
+浊
+测
+济
+浏
+浑
+浒
+浓
+浔
+浙
+浚
+浜
+浣
+浦
+浩
+浪
+浬
+浮
+浯
+浴
+海
+浸
+涂
+涅
+涇
+消
+涉
+涌
+涎
+涓
+涔
+涕
+涙
+涛
+涝
+涞
+涟
+涠
+涡
+涣
+涤
+润
+涧
+涨
+涩
+涪
+涮
+涯
+液
+涵
+涸
+涼
+涿
+淀
+淄
+淅
+淆
+淇
+淋
+淌
+淑
+淒
+淖
+淘
+淙
+淚
+淞
+淡
+淤
+淦
+淨
+淩
+淪
+淫
+淬
+淮
+深
+淳
+淵
+混
+淹
+淺
+添
+淼
+清
+済
+渉
+渊
+渋
+渍
+渎
+渐
+渔
+渗
+渙
+渚
+減
+渝
+渠
+渡
+渣
+渤
+渥
+渦
+温
+測
+渭
+港
+渲
+渴
+游
+渺
+渾
+湃
+湄
+湊
+湍
+湖
+湘
+湛
+湟
+湧
+湫
+湮
+湯
+湳
+湾
+湿
+満
+溃
+溅
+溉
+溏
+源
+準
+溜
+溝
+溟
+溢
+溥
+溧
+溪
+溫
+溯
+溱
+溴
+溶
+溺
+溼
+滁
+滂
+滄
+滅
+滇
+滋
+滌
+滑
+滓
+滔
+滕
+滙
+滚
+滝
+滞
+滟
+满
+滢
+滤
+滥
+滦
+滨
+滩
+滬
+滯
+滲
+滴
+滷
+滸
+滾
+滿
+漁
+漂
+漆
+漉
+漏
+漓
+演
+漕
+漠
+漢
+漣
+漩
+漪
+漫
+漬
+漯
+漱
+漲
+漳
+漸
+漾
+漿
+潆
+潇
+潋
+潍
+潑
+潔
+潘
+潛
+潜
+潞
+潟
+潢
+潤
+潦
+潧
+潭
+潮
+潰
+潴
+潸
+潺
+潼
+澀
+澄
+澆
+澈
+澍
+澎
+澗
+澜
+澡
+澤
+澧
+澱
+澳
+澹
+激
+濁
+濂
+濃
+濑
+濒
+濕
+濘
+濛
+濟
+濠
+濡
+濤
+濫
+濬
+濮
+濯
+濱
+濺
+濾
+瀅
+瀆
+瀉
+瀋
+瀏
+瀑
+瀕
+瀘
+瀚
+瀛
+瀝
+瀞
+瀟
+瀧
+瀨
+瀬
+瀰
+瀾
+灌
+灏
+灑
+灘
+灝
+灞
+灣
+火
+灬
+灭
+灯
+灰
+灵
+灶
+灸
+灼
+災
+灾
+灿
+炀
+炁
+炅
+炉
+炊
+炎
+炒
+炔
+炕
+炖
+炙
+炜
+炫
+炬
+炭
+炮
+炯
+炳
+炷
+炸
+点
+為
+炼
+炽
+烁
+烂
+烃
+烈
+烊
+烏
+烘
+烙
+烛
+烟
+烤
+烦
+烧
+烨
+烩
+烫
+烬
+热
+烯
+烷
+烹
+烽
+焉
+焊
+焕
+焖
+焗
+焘
+焙
+焚
+焜
+無
+焦
+焯
+焰
+焱
+然
+焼
+煅
+煉
+煊
+煌
+煎
+煒
+煖
+煙
+煜
+煞
+煤
+煥
+煦
+照
+煨
+煩
+煮
+煲
+煸
+煽
+熄
+熊
+熏
+熒
+熔
+熙
+熟
+熠
+熨
+熬
+熱
+熵
+熹
+熾
+燁
+燃
+燄
+燈
+燉
+燊
+燎
+燒
+燔
+燕
+燙
+燜
+營
+燥
+燦
+燧
+燭
+燮
+燴
+燻
+燼
+燿
+爆
+爍
+爐
+爛
+爪
+爬
+爭
+爰
+爱
+爲
+爵
+父
+爷
+爸
+爹
+爺
+爻
+爽
+爾
+牆
+片
+版
+牌
+牍
+牒
+牙
+牛
+牝
+牟
+牠
+牡
+牢
+牦
+牧
+物
+牯
+牲
+牴
+牵
+特
+牺
+牽
+犀
+犁
+犄
+犊
+犍
+犒
+犢
+犧
+犬
+犯
+状
+犷
+犸
+犹
+狀
+狂
+狄
+狈
+狎
+狐
+狒
+狗
+狙
+狞
+狠
+狡
+狩
+独
+狭
+狮
+狰
+狱
+狸
+狹
+狼
+狽
+猎
+猕
+猖
+猗
+猙
+猛
+猜
+猝
+猥
+猩
+猪
+猫
+猬
+献
+猴
+猶
+猷
+猾
+猿
+獄
+獅
+獎
+獐
+獒
+獗
+獠
+獣
+獨
+獭
+獰
+獲
+獵
+獷
+獸
+獺
+獻
+獼
+獾
+玄
+率
+玉
+王
+玑
+玖
+玛
+玟
+玠
+玥
+玩
+玫
+玮
+环
+现
+玲
+玳
+玷
+玺
+玻
+珀
+珂
+珅
+珈
+珉
+珊
+珍
+珏
+珐
+珑
+珙
+珞
+珠
+珣
+珥
+珩
+珪
+班
+珮
+珲
+珺
+現
+球
+琅
+理
+琇
+琉
+琊
+琍
+琏
+琐
+琛
+琢
+琥
+琦
+琨
+琪
+琬
+琮
+琰
+琲
+琳
+琴
+琵
+琶
+琺
+琼
+瑀
+瑁
+瑄
+瑋
+瑕
+瑗
+瑙
+瑚
+瑛
+瑜
+瑞
+瑟
+瑠
+瑣
+瑤
+瑩
+瑪
+瑯
+瑰
+瑶
+瑾
+璀
+璁
+璃
+璇
+璉
+璋
+璎
+璐
+璜
+璞
+璟
+璧
+璨
+環
+璽
+璿
+瓊
+瓏
+瓒
+瓜
+瓢
+瓣
+瓤
+瓦
+瓮
+瓯
+瓴
+瓶
+瓷
+甄
+甌
+甕
+甘
+甙
+甚
+甜
+生
+產
+産
+甥
+甦
+用
+甩
+甫
+甬
+甭
+甯
+田
+由
+甲
+申
+电
+男
+甸
+町
+画
+甾
+畀
+畅
+界
+畏
+畑
+畔
+留
+畜
+畝
+畢
+略
+畦
+番
+畫
+異
+畲
+畳
+畴
+當
+畸
+畹
+畿
+疆
+疇
+疊
+疏
+疑
+疔
+疖
+疗
+疙
+疚
+疝
+疟
+疡
+疣
+疤
+疥
+疫
+疮
+疯
+疱
+疲
+疳
+疵
+疸
+疹
+疼
+疽
+疾
+痂
+病
+症
+痈
+痉
+痊
+痍
+痒
+痔
+痕
+痘
+痙
+痛
+痞
+痠
+痢
+痣
+痤
+痧
+痨
+痪
+痫
+痰
+痱
+痴
+痹
+痺
+痼
+痿
+瘀
+瘁
+瘋
+瘍
+瘓
+瘘
+瘙
+瘟
+瘠
+瘡
+瘢
+瘤
+瘦
+瘧
+瘩
+瘪
+瘫
+瘴
+瘸
+瘾
+療
+癇
+癌
+癒
+癖
+癜
+癞
+癡
+癢
+癣
+癥
+癫
+癬
+癮
+癱
+癲
+癸
+発
+登
+發
+白
+百
+皂
+的
+皆
+皇
+皈
+皋
+皎
+皑
+皓
+皖
+皙
+皚
+皮
+皰
+皱
+皴
+皺
+皿
+盂
+盃
+盅
+盆
+盈
+益
+盎
+盏
+盐
+监
+盒
+盔
+盖
+盗
+盘
+盛
+盜
+盞
+盟
+盡
+監
+盤
+盥
+盧
+盪
+目
+盯
+盱
+盲
+直
+相
+盹
+盼
+盾
+省
+眈
+眉
+看
+県
+眙
+眞
+真
+眠
+眦
+眨
+眩
+眯
+眶
+眷
+眸
+眺
+眼
+眾
+着
+睁
+睇
+睏
+睐
+睑
+睛
+睜
+睞
+睡
+睢
+督
+睥
+睦
+睨
+睪
+睫
+睬
+睹
+睽
+睾
+睿
+瞄
+瞅
+瞇
+瞋
+瞌
+瞎
+瞑
+瞒
+瞓
+瞞
+瞟
+瞠
+瞥
+瞧
+瞩
+瞪
+瞬
+瞭
+瞰
+瞳
+瞻
+瞼
+瞿
+矇
+矍
+矗
+矚
+矛
+矜
+矢
+矣
+知
+矩
+矫
+短
+矮
+矯
+石
+矶
+矽
+矾
+矿
+码
+砂
+砌
+砍
+砒
+研
+砖
+砗
+砚
+砝
+砣
+砥
+砧
+砭
+砰
+砲
+破
+砷
+砸
+砺
+砼
+砾
+础
+硅
+硐
+硒
+硕
+硝
+硫
+硬
+确
+硯
+硼
+碁
+碇
+碉
+碌
+碍
+碎
+碑
+碓
+碗
+碘
+碚
+碛
+碟
+碣
+碧
+碩
+碰
+碱
+碳
+碴
+確
+碼
+碾
+磁
+磅
+磊
+磋
+磐
+磕
+磚
+磡
+磨
+磬
+磯
+磲
+磷
+磺
+礁
+礎
+礙
+礡
+礦
+礪
+礫
+礴
+示
+礼
+社
+祀
+祁
+祂
+祇
+祈
+祉
+祎
+祐
+祕
+祖
+祗
+祚
+祛
+祜
+祝
+神
+祟
+祠
+祢
+祥
+票
+祭
+祯
+祷
+祸
+祺
+祿
+禀
+禁
+禄
+禅
+禍
+禎
+福
+禛
+禦
+禧
+禪
+禮
+禱
+禹
+禺
+离
+禽
+禾
+禿
+秀
+私
+秃
+秆
+秉
+秋
+种
+科
+秒
+秘
+租
+秣
+秤
+秦
+秧
+秩
+秭
+积
+称
+秸
+移
+秽
+稀
+稅
+程
+稍
+税
+稔
+稗
+稚
+稜
+稞
+稟
+稠
+稣
+種
+稱
+稲
+稳
+稷
+稹
+稻
+稼
+稽
+稿
+穀
+穂
+穆
+穌
+積
+穎
+穗
+穢
+穩
+穫
+穴
+究
+穷
+穹
+空
+穿
+突
+窃
+窄
+窈
+窍
+窑
+窒
+窓
+窕
+窖
+窗
+窘
+窜
+窝
+窟
+窠
+窥
+窦
+窨
+窩
+窪
+窮
+窯
+窺
+窿
+竄
+竅
+竇
+竊
+立
+竖
+站
+竜
+竞
+竟
+章
+竣
+童
+竭
+端
+競
+竹
+竺
+竽
+竿
+笃
+笆
+笈
+笋
+笏
+笑
+笔
+笙
+笛
+笞
+笠
+符
+笨
+第
+笹
+笺
+笼
+筆
+等
+筊
+筋
+筍
+筏
+筐
+筑
+筒
+答
+策
+筛
+筝
+筠
+筱
+筲
+筵
+筷
+筹
+签
+简
+箇
+箋
+箍
+箏
+箐
+箔
+箕
+算
+箝
+管
+箩
+箫
+箭
+箱
+箴
+箸
+節
+篁
+範
+篆
+篇
+築
+篑
+篓
+篙
+篝
+篠
+篡
+篤
+篩
+篪
+篮
+篱
+篷
+簇
+簌
+簍
+簡
+簦
+簧
+簪
+簫
+簷
+簸
+簽
+簾
+簿
+籁
+籃
+籌
+籍
+籐
+籟
+籠
+籤
+籬
+籮
+籲
+米
+类
+籼
+籽
+粄
+粉
+粑
+粒
+粕
+粗
+粘
+粟
+粤
+粥
+粧
+粪
+粮
+粱
+粲
+粳
+粵
+粹
+粼
+粽
+精
+粿
+糅
+糊
+糍
+糕
+糖
+糗
+糙
+糜
+糞
+糟
+糠
+糧
+糬
+糯
+糰
+糸
+系
+糾
+紀
+紂
+約
+紅
+紉
+紊
+紋
+納
+紐
+紓
+純
+紗
+紘
+紙
+級
+紛
+紜
+素
+紡
+索
+紧
+紫
+紮
+累
+細
+紳
+紹
+紺
+終
+絃
+組
+絆
+経
+結
+絕
+絞
+絡
+絢
+給
+絨
+絮
+統
+絲
+絳
+絵
+絶
+絹
+綁
+綏
+綑
+經
+継
+続
+綜
+綠
+綢
+綦
+綫
+綬
+維
+綱
+網
+綴
+綵
+綸
+綺
+綻
+綽
+綾
+綿
+緊
+緋
+総
+緑
+緒
+緘
+線
+緝
+緞
+締
+緣
+編
+緩
+緬
+緯
+練
+緹
+緻
+縁
+縄
+縈
+縛
+縝
+縣
+縫
+縮
+縱
+縴
+縷
+總
+績
+繁
+繃
+繆
+繇
+繋
+織
+繕
+繚
+繞
+繡
+繩
+繪
+繫
+繭
+繳
+繹
+繼
+繽
+纂
+續
+纍
+纏
+纓
+纔
+纖
+纜
+纠
+红
+纣
+纤
+约
+级
+纨
+纪
+纫
+纬
+纭
+纯
+纰
+纱
+纲
+纳
+纵
+纶
+纷
+纸
+纹
+纺
+纽
+纾
+线
+绀
+练
+组
+绅
+细
+织
+终
+绊
+绍
+绎
+经
+绑
+绒
+结
+绔
+绕
+绘
+给
+绚
+绛
+络
+绝
+绞
+统
+绡
+绢
+绣
+绥
+绦
+继
+绩
+绪
+绫
+续
+绮
+绯
+绰
+绳
+维
+绵
+绶
+绷
+绸
+绻
+综
+绽
+绾
+绿
+缀
+缄
+缅
+缆
+缇
+缈
+缉
+缎
+缓
+缔
+缕
+编
+缘
+缙
+缚
+缜
+缝
+缠
+缢
+缤
+缥
+缨
+缩
+缪
+缭
+缮
+缰
+缱
+缴
+缸
+缺
+缽
+罂
+罄
+罌
+罐
+网
+罔
+罕
+罗
+罚
+罡
+罢
+罩
+罪
+置
+罰
+署
+罵
+罷
+罹
+羁
+羅
+羈
+羊
+羌
+美
+羔
+羚
+羞
+羟
+羡
+羣
+群
+羥
+羧
+羨
+義
+羯
+羲
+羸
+羹
+羽
+羿
+翁
+翅
+翊
+翌
+翎
+習
+翔
+翘
+翟
+翠
+翡
+翦
+翩
+翰
+翱
+翳
+翹
+翻
+翼
+耀
+老
+考
+耄
+者
+耆
+耋
+而
+耍
+耐
+耒
+耕
+耗
+耘
+耙
+耦
+耨
+耳
+耶
+耷
+耸
+耻
+耽
+耿
+聂
+聆
+聊
+聋
+职
+聒
+联
+聖
+聘
+聚
+聞
+聪
+聯
+聰
+聲
+聳
+聴
+聶
+職
+聽
+聾
+聿
+肃
+肄
+肅
+肆
+肇
+肉
+肋
+肌
+肏
+肓
+肖
+肘
+肚
+肛
+肝
+肠
+股
+肢
+肤
+肥
+肩
+肪
+肮
+肯
+肱
+育
+肴
+肺
+肽
+肾
+肿
+胀
+胁
+胃
+胄
+胆
+背
+胍
+胎
+胖
+胚
+胛
+胜
+胝
+胞
+胡
+胤
+胥
+胧
+胫
+胭
+胯
+胰
+胱
+胳
+胴
+胶
+胸
+胺
+能
+脂
+脅
+脆
+脇
+脈
+脉
+脊
+脍
+脏
+脐
+脑
+脓
+脖
+脘
+脚
+脛
+脣
+脩
+脫
+脯
+脱
+脲
+脳
+脸
+脹
+脾
+腆
+腈
+腊
+腋
+腌
+腎
+腐
+腑
+腓
+腔
+腕
+腥
+腦
+腩
+腫
+腭
+腮
+腰
+腱
+腳
+腴
+腸
+腹
+腺
+腻
+腼
+腾
+腿
+膀
+膈
+膊
+膏
+膑
+膘
+膚
+膛
+膜
+膝
+膠
+膦
+膨
+膩
+膳
+膺
+膻
+膽
+膾
+膿
+臀
+臂
+臃
+臆
+臉
+臊
+臍
+臓
+臘
+臟
+臣
+臥
+臧
+臨
+自
+臬
+臭
+至
+致
+臺
+臻
+臼
+臾
+舀
+舂
+舅
+舆
+與
+興
+舉
+舊
+舌
+舍
+舎
+舐
+舒
+舔
+舖
+舗
+舛
+舜
+舞
+舟
+航
+舫
+般
+舰
+舱
+舵
+舶
+舷
+舸
+船
+舺
+舾
+艇
+艋
+艘
+艙
+艦
+艮
+良
+艰
+艱
+色
+艳
+艷
+艹
+艺
+艾
+节
+芃
+芈
+芊
+芋
+芍
+芎
+芒
+芙
+芜
+芝
+芡
+芥
+芦
+芩
+芪
+芫
+芬
+芭
+芮
+芯
+花
+芳
+芷
+芸
+芹
+芻
+芽
+芾
+苁
+苄
+苇
+苋
+苍
+苏
+苑
+苒
+苓
+苔
+苕
+苗
+苛
+苜
+苞
+苟
+苡
+苣
+若
+苦
+苫
+苯
+英
+苷
+苹
+苻
+茁
+茂
+范
+茄
+茅
+茉
+茎
+茏
+茗
+茜
+茧
+茨
+茫
+茬
+茭
+茯
+茱
+茲
+茴
+茵
+茶
+茸
+茹
+茼
+荀
+荃
+荆
+草
+荊
+荏
+荐
+荒
+荔
+荖
+荘
+荚
+荞
+荟
+荠
+荡
+荣
+荤
+荥
+荧
+荨
+荪
+荫
+药
+荳
+荷
+荸
+荻
+荼
+荽
+莅
+莆
+莉
+莊
+莎
+莒
+莓
+莖
+莘
+莞
+莠
+莢
+莧
+莪
+莫
+莱
+莲
+莴
+获
+莹
+莺
+莽
+莿
+菀
+菁
+菅
+菇
+菈
+菊
+菌
+菏
+菓
+菖
+菘
+菜
+菟
+菠
+菡
+菩
+華
+菱
+菲
+菸
+菽
+萁
+萃
+萄
+萊
+萋
+萌
+萍
+萎
+萘
+萝
+萤
+营
+萦
+萧
+萨
+萩
+萬
+萱
+萵
+萸
+萼
+落
+葆
+葉
+著
+葚
+葛
+葡
+董
+葦
+葩
+葫
+葬
+葭
+葯
+葱
+葳
+葵
+葷
+葺
+蒂
+蒋
+蒐
+蒔
+蒙
+蒜
+蒞
+蒟
+蒡
+蒨
+蒲
+蒸
+蒹
+蒻
+蒼
+蒿
+蓁
+蓄
+蓆
+蓉
+蓋
+蓑
+蓓
+蓖
+蓝
+蓟
+蓦
+蓬
+蓮
+蓼
+蓿
+蔑
+蔓
+蔔
+蔗
+蔘
+蔚
+蔡
+蔣
+蔥
+蔫
+蔬
+蔭
+蔵
+蔷
+蔺
+蔻
+蔼
+蔽
+蕁
+蕃
+蕈
+蕉
+蕊
+蕎
+蕙
+蕤
+蕨
+蕩
+蕪
+蕭
+蕲
+蕴
+蕻
+蕾
+薄
+薅
+薇
+薈
+薊
+薏
+薑
+薔
+薙
+薛
+薦
+薨
+薩
+薪
+薬
+薯
+薰
+薹
+藉
+藍
+藏
+藐
+藓
+藕
+藜
+藝
+藤
+藥
+藩
+藹
+藻
+藿
+蘆
+蘇
+蘊
+蘋
+蘑
+蘚
+蘭
+蘸
+蘼
+蘿
+虎
+虏
+虐
+虑
+虔
+處
+虚
+虛
+虜
+虞
+號
+虢
+虧
+虫
+虬
+虱
+虹
+虻
+虽
+虾
+蚀
+蚁
+蚂
+蚊
+蚌
+蚓
+蚕
+蚜
+蚝
+蚣
+蚤
+蚩
+蚪
+蚯
+蚱
+蚵
+蛀
+蛆
+蛇
+蛊
+蛋
+蛎
+蛐
+蛔
+蛙
+蛛
+蛟
+蛤
+蛭
+蛮
+蛰
+蛳
+蛹
+蛻
+蛾
+蜀
+蜂
+蜃
+蜆
+蜇
+蜈
+蜊
+蜍
+蜒
+蜓
+蜕
+蜗
+蜘
+蜚
+蜜
+蜡
+蜢
+蜥
+蜱
+蜴
+蜷
+蜻
+蜿
+蝇
+蝈
+蝉
+蝌
+蝎
+蝕
+蝗
+蝙
+蝟
+蝠
+蝦
+蝨
+蝴
+蝶
+蝸
+蝼
+螂
+螃
+融
+螞
+螢
+螨
+螯
+螳
+螺
+蟀
+蟄
+蟆
+蟋
+蟎
+蟑
+蟒
+蟠
+蟬
+蟲
+蟹
+蟻
+蟾
+蠅
+蠍
+蠔
+蠕
+蠛
+蠟
+蠡
+蠢
+蠣
+蠱
+蠶
+蠹
+蠻
+血
+衄
+衅
+衆
+行
+衍
+術
+衔
+街
+衙
+衛
+衝
+衞
+衡
+衢
+衣
+补
+表
+衩
+衫
+衬
+衮
+衰
+衲
+衷
+衹
+衾
+衿
+袁
+袂
+袄
+袅
+袈
+袋
+袍
+袒
+袖
+袜
+袞
+袤
+袪
+被
+袭
+袱
+裁
+裂
+装
+裆
+裊
+裏
+裔
+裕
+裘
+裙
+補
+裝
+裟
+裡
+裤
+裨
+裱
+裳
+裴
+裸
+裹
+製
+裾
+褂
+複
+褐
+褒
+褓
+褔
+褚
+褥
+褪
+褫
+褲
+褶
+褻
+襁
+襄
+襟
+襠
+襪
+襬
+襯
+襲
+西
+要
+覃
+覆
+覇
+見
+規
+覓
+視
+覚
+覦
+覧
+親
+覬
+観
+覷
+覺
+覽
+觀
+见
+观
+规
+觅
+视
+览
+觉
+觊
+觎
+觐
+觑
+角
+觞
+解
+觥
+触
+觸
+言
+訂
+計
+訊
+討
+訓
+訕
+訖
+託
+記
+訛
+訝
+訟
+訣
+訥
+訪
+設
+許
+訳
+訴
+訶
+診
+註
+証
+詆
+詐
+詔
+評
+詛
+詞
+詠
+詡
+詢
+詣
+試
+詩
+詫
+詬
+詭
+詮
+詰
+話
+該
+詳
+詹
+詼
+誅
+誇
+誉
+誌
+認
+誓
+誕
+誘
+語
+誠
+誡
+誣
+誤
+誥
+誦
+誨
+說
+説
+読
+誰
+課
+誹
+誼
+調
+諄
+談
+請
+諏
+諒
+論
+諗
+諜
+諡
+諦
+諧
+諫
+諭
+諮
+諱
+諳
+諷
+諸
+諺
+諾
+謀
+謁
+謂
+謄
+謊
+謎
+謐
+謔
+謗
+謙
+講
+謝
+謠
+謨
+謬
+謹
+謾
+譁
+證
+譎
+譏
+識
+譙
+譚
+譜
+警
+譬
+譯
+議
+譲
+譴
+護
+譽
+讀
+變
+讓
+讚
+讞
+计
+订
+认
+讥
+讧
+讨
+让
+讪
+讫
+训
+议
+讯
+记
+讲
+讳
+讴
+讶
+讷
+许
+讹
+论
+讼
+讽
+设
+访
+诀
+证
+诃
+评
+诅
+识
+诈
+诉
+诊
+诋
+词
+诏
+译
+试
+诗
+诘
+诙
+诚
+诛
+话
+诞
+诟
+诠
+诡
+询
+诣
+诤
+该
+详
+诧
+诩
+诫
+诬
+语
+误
+诰
+诱
+诲
+说
+诵
+诶
+请
+诸
+诺
+读
+诽
+课
+诿
+谀
+谁
+调
+谄
+谅
+谆
+谈
+谊
+谋
+谌
+谍
+谎
+谏
+谐
+谑
+谒
+谓
+谔
+谕
+谗
+谘
+谙
+谚
+谛
+谜
+谟
+谢
+谣
+谤
+谥
+谦
+谧
+谨
+谩
+谪
+谬
+谭
+谯
+谱
+谲
+谴
+谶
+谷
+豁
+豆
+豇
+豈
+豉
+豊
+豌
+豎
+豐
+豔
+豚
+象
+豢
+豪
+豫
+豬
+豹
+豺
+貂
+貅
+貌
+貓
+貔
+貘
+貝
+貞
+負
+財
+貢
+貧
+貨
+販
+貪
+貫
+責
+貯
+貰
+貳
+貴
+貶
+買
+貸
+費
+貼
+貽
+貿
+賀
+賁
+賂
+賃
+賄
+資
+賈
+賊
+賑
+賓
+賜
+賞
+賠
+賡
+賢
+賣
+賤
+賦
+質
+賬
+賭
+賴
+賺
+購
+賽
+贅
+贈
+贊
+贍
+贏
+贓
+贖
+贛
+贝
+贞
+负
+贡
+财
+责
+贤
+败
+账
+货
+质
+贩
+贪
+贫
+贬
+购
+贮
+贯
+贰
+贱
+贲
+贴
+贵
+贷
+贸
+费
+贺
+贻
+贼
+贾
+贿
+赁
+赂
+赃
+资
+赅
+赈
+赊
+赋
+赌
+赎
+赏
+赐
+赓
+赔
+赖
+赘
+赚
+赛
+赝
+赞
+赠
+赡
+赢
+赣
+赤
+赦
+赧
+赫
+赭
+走
+赳
+赴
+赵
+赶
+起
+趁
+超
+越
+趋
+趕
+趙
+趟
+趣
+趨
+足
+趴
+趵
+趸
+趺
+趾
+跃
+跄
+跆
+跋
+跌
+跎
+跑
+跖
+跚
+跛
+距
+跟
+跡
+跤
+跨
+跩
+跪
+路
+跳
+践
+跷
+跹
+跺
+跻
+踉
+踊
+踌
+踏
+踐
+踝
+踞
+踟
+踢
+踩
+踪
+踮
+踱
+踴
+踵
+踹
+蹂
+蹄
+蹇
+蹈
+蹉
+蹊
+蹋
+蹑
+蹒
+蹙
+蹟
+蹣
+蹤
+蹦
+蹩
+蹬
+蹭
+蹲
+蹴
+蹶
+蹺
+蹼
+蹿
+躁
+躇
+躉
+躊
+躋
+躍
+躏
+躪
+身
+躬
+躯
+躲
+躺
+軀
+車
+軋
+軌
+軍
+軒
+軟
+転
+軸
+軼
+軽
+軾
+較
+載
+輒
+輓
+輔
+輕
+輛
+輝
+輟
+輩
+輪
+輯
+輸
+輻
+輾
+輿
+轄
+轅
+轆
+轉
+轍
+轎
+轟
+车
+轧
+轨
+轩
+转
+轭
+轮
+软
+轰
+轲
+轴
+轶
+轻
+轼
+载
+轿
+较
+辄
+辅
+辆
+辇
+辈
+辉
+辊
+辍
+辐
+辑
+输
+辕
+辖
+辗
+辘
+辙
+辛
+辜
+辞
+辟
+辣
+辦
+辨
+辩
+辫
+辭
+辮
+辯
+辰
+辱
+農
+边
+辺
+辻
+込
+辽
+达
+迁
+迂
+迄
+迅
+过
+迈
+迎
+运
+近
+返
+还
+这
+进
+远
+违
+连
+迟
+迢
+迤
+迥
+迦
+迩
+迪
+迫
+迭
+述
+迴
+迷
+迸
+迹
+迺
+追
+退
+送
+适
+逃
+逅
+逆
+选
+逊
+逍
+透
+逐
+递
+途
+逕
+逗
+這
+通
+逛
+逝
+逞
+速
+造
+逢
+連
+逮
+週
+進
+逵
+逶
+逸
+逻
+逼
+逾
+遁
+遂
+遅
+遇
+遊
+運
+遍
+過
+遏
+遐
+遑
+遒
+道
+達
+違
+遗
+遙
+遛
+遜
+遞
+遠
+遢
+遣
+遥
+遨
+適
+遭
+遮
+遲
+遴
+遵
+遶
+遷
+選
+遺
+遼
+遽
+避
+邀
+邁
+邂
+邃
+還
+邇
+邈
+邊
+邋
+邏
+邑
+邓
+邕
+邛
+邝
+邢
+那
+邦
+邨
+邪
+邬
+邮
+邯
+邰
+邱
+邳
+邵
+邸
+邹
+邺
+邻
+郁
+郅
+郊
+郎
+郑
+郜
+郝
+郡
+郢
+郤
+郦
+郧
+部
+郫
+郭
+郴
+郵
+郷
+郸
+都
+鄂
+鄉
+鄒
+鄔
+鄙
+鄞
+鄢
+鄧
+鄭
+鄰
+鄱
+鄲
+鄺
+酉
+酊
+酋
+酌
+配
+酐
+酒
+酗
+酚
+酝
+酢
+酣
+酥
+酩
+酪
+酬
+酮
+酯
+酰
+酱
+酵
+酶
+酷
+酸
+酿
+醃
+醇
+醉
+醋
+醍
+醐
+醒
+醚
+醛
+醜
+醞
+醣
+醪
+醫
+醬
+醮
+醯
+醴
+醺
+釀
+釁
+采
+釉
+释
+釋
+里
+重
+野
+量
+釐
+金
+釗
+釘
+釜
+針
+釣
+釦
+釧
+釵
+鈀
+鈉
+鈍
+鈎
+鈔
+鈕
+鈞
+鈣
+鈦
+鈪
+鈴
+鈺
+鈾
+鉀
+鉄
+鉅
+鉉
+鉑
+鉗
+鉚
+鉛
+鉤
+鉴
+鉻
+銀
+銃
+銅
+銑
+銓
+銖
+銘
+銜
+銬
+銭
+銮
+銳
+銷
+銹
+鋁
+鋅
+鋒
+鋤
+鋪
+鋰
+鋸
+鋼
+錄
+錐
+錘
+錚
+錠
+錢
+錦
+錨
+錫
+錮
+錯
+録
+錳
+錶
+鍊
+鍋
+鍍
+鍛
+鍥
+鍰
+鍵
+鍺
+鍾
+鎂
+鎊
+鎌
+鎏
+鎔
+鎖
+鎗
+鎚
+鎧
+鎬
+鎮
+鎳
+鏈
+鏖
+鏗
+鏘
+鏞
+鏟
+鏡
+鏢
+鏤
+鏽
+鐘
+鐮
+鐲
+鐳
+鐵
+鐸
+鐺
+鑄
+鑊
+鑑
+鑒
+鑣
+鑫
+鑰
+鑲
+鑼
+鑽
+鑾
+鑿
+针
+钉
+钊
+钎
+钏
+钒
+钓
+钗
+钙
+钛
+钜
+钝
+钞
+钟
+钠
+钡
+钢
+钣
+钤
+钥
+钦
+钧
+钨
+钩
+钮
+钯
+钰
+钱
+钳
+钴
+钵
+钺
+钻
+钼
+钾
+钿
+铀
+铁
+铂
+铃
+铄
+铅
+铆
+铉
+铎
+铐
+铛
+铜
+铝
+铠
+铡
+铢
+铣
+铤
+铨
+铩
+铬
+铭
+铮
+铰
+铲
+铵
+银
+铸
+铺
+链
+铿
+销
+锁
+锂
+锄
+锅
+锆
+锈
+锉
+锋
+锌
+锏
+锐
+锑
+错
+锚
+锟
+锡
+锢
+锣
+锤
+锥
+锦
+锭
+键
+锯
+锰
+锲
+锵
+锹
+锺
+锻
+镀
+镁
+镂
+镇
+镉
+镌
+镍
+镐
+镑
+镕
+镖
+镗
+镛
+镜
+镣
+镭
+镯
+镰
+镳
+镶
+長
+长
+門
+閃
+閉
+開
+閎
+閏
+閑
+閒
+間
+閔
+閘
+閡
+関
+閣
+閥
+閨
+閩
+閱
+閲
+閹
+閻
+閾
+闆
+闇
+闊
+闌
+闍
+闔
+闕
+闖
+闘
+關
+闡
+闢
+门
+闪
+闫
+闭
+问
+闯
+闰
+闲
+间
+闵
+闷
+闸
+闹
+闺
+闻
+闽
+闾
+阀
+阁
+阂
+阅
+阆
+阇
+阈
+阉
+阎
+阐
+阑
+阔
+阕
+阖
+阙
+阚
+阜
+队
+阡
+阪
+阮
+阱
+防
+阳
+阴
+阵
+阶
+阻
+阿
+陀
+陂
+附
+际
+陆
+陇
+陈
+陋
+陌
+降
+限
+陕
+陛
+陝
+陞
+陟
+陡
+院
+陣
+除
+陨
+险
+陪
+陰
+陲
+陳
+陵
+陶
+陷
+陸
+険
+陽
+隅
+隆
+隈
+隊
+隋
+隍
+階
+随
+隐
+隔
+隕
+隘
+隙
+際
+障
+隠
+隣
+隧
+隨
+險
+隱
+隴
+隶
+隸
+隻
+隼
+隽
+难
+雀
+雁
+雄
+雅
+集
+雇
+雉
+雋
+雌
+雍
+雎
+雏
+雑
+雒
+雕
+雖
+雙
+雛
+雜
+雞
+離
+難
+雨
+雪
+雯
+雰
+雲
+雳
+零
+雷
+雹
+電
+雾
+需
+霁
+霄
+霆
+震
+霈
+霉
+霊
+霍
+霎
+霏
+霑
+霓
+霖
+霜
+霞
+霧
+霭
+霰
+露
+霸
+霹
+霽
+霾
+靂
+靄
+靈
+青
+靓
+靖
+静
+靚
+靛
+靜
+非
+靠
+靡
+面
+靥
+靦
+革
+靳
+靴
+靶
+靼
+鞅
+鞋
+鞍
+鞏
+鞑
+鞘
+鞠
+鞣
+鞦
+鞭
+韆
+韋
+韌
+韓
+韜
+韦
+韧
+韩
+韬
+韭
+音
+韵
+韶
+韻
+響
+頁
+頂
+頃
+項
+順
+須
+頌
+預
+頑
+頒
+頓
+頗
+領
+頜
+頡
+頤
+頫
+頭
+頰
+頷
+頸
+頹
+頻
+頼
+顆
+題
+額
+顎
+顏
+顔
+願
+顛
+類
+顧
+顫
+顯
+顱
+顴
+页
+顶
+顷
+项
+顺
+须
+顼
+顽
+顾
+顿
+颁
+颂
+预
+颅
+领
+颇
+颈
+颉
+颊
+颌
+颍
+颐
+频
+颓
+颔
+颖
+颗
+题
+颚
+颛
+颜
+额
+颞
+颠
+颡
+颢
+颤
+颦
+颧
+風
+颯
+颱
+颳
+颶
+颼
+飄
+飆
+风
+飒
+飓
+飕
+飘
+飙
+飚
+飛
+飞
+食
+飢
+飨
+飩
+飪
+飯
+飲
+飼
+飽
+飾
+餃
+餅
+餉
+養
+餌
+餐
+餒
+餓
+餘
+餚
+餛
+餞
+餡
+館
+餮
+餵
+餾
+饅
+饈
+饋
+饌
+饍
+饑
+饒
+饕
+饗
+饞
+饥
+饨
+饪
+饬
+饭
+饮
+饯
+饰
+饱
+饲
+饴
+饵
+饶
+饷
+饺
+饼
+饽
+饿
+馀
+馁
+馄
+馅
+馆
+馈
+馋
+馍
+馏
+馒
+馔
+首
+馗
+香
+馥
+馨
+馬
+馭
+馮
+馳
+馴
+駁
+駄
+駅
+駆
+駐
+駒
+駕
+駛
+駝
+駭
+駱
+駿
+騁
+騎
+騏
+験
+騙
+騨
+騰
+騷
+驀
+驅
+驊
+驍
+驒
+驕
+驗
+驚
+驛
+驟
+驢
+驥
+马
+驭
+驮
+驯
+驰
+驱
+驳
+驴
+驶
+驷
+驸
+驹
+驻
+驼
+驾
+驿
+骁
+骂
+骄
+骅
+骆
+骇
+骈
+骊
+骋
+验
+骏
+骐
+骑
+骗
+骚
+骛
+骜
+骞
+骠
+骡
+骤
+骥
+骧
+骨
+骯
+骰
+骶
+骷
+骸
+骼
+髂
+髅
+髋
+髏
+髒
+髓
+體
+髖
+高
+髦
+髪
+髮
+髯
+髻
+鬃
+鬆
+鬍
+鬓
+鬚
+鬟
+鬢
+鬣
+鬥
+鬧
+鬱
+鬼
+魁
+魂
+魄
+魅
+魇
+魍
+魏
+魔
+魘
+魚
+魯
+魷
+鮑
+鮨
+鮪
+鮭
+鮮
+鯉
+鯊
+鯖
+鯛
+鯨
+鯰
+鯽
+鰍
+鰓
+鰭
+鰲
+鰻
+鰾
+鱈
+鱉
+鱔
+鱗
+鱷
+鱸
+鱼
+鱿
+鲁
+鲈
+鲍
+鲑
+鲛
+鲜
+鲟
+鲢
+鲤
+鲨
+鲫
+鲱
+鲲
+鲶
+鲷
+鲸
+鳃
+鳄
+鳅
+鳌
+鳍
+鳕
+鳖
+鳗
+鳝
+鳞
+鳥
+鳩
+鳳
+鳴
+鳶
+鴉
+鴕
+鴛
+鴦
+鴨
+鴻
+鴿
+鵑
+鵜
+鵝
+鵡
+鵬
+鵰
+鵲
+鶘
+鶩
+鶯
+鶴
+鷗
+鷲
+鷹
+鷺
+鸚
+鸞
+鸟
+鸠
+鸡
+鸢
+鸣
+鸥
+鸦
+鸨
+鸪
+鸭
+鸯
+鸳
+鸵
+鸽
+鸾
+鸿
+鹂
+鹃
+鹄
+鹅
+鹈
+鹉
+鹊
+鹌
+鹏
+鹑
+鹕
+鹘
+鹜
+鹞
+鹤
+鹦
+鹧
+鹫
+鹭
+鹰
+鹳
+鹵
+鹹
+鹼
+鹽
+鹿
+麂
+麋
+麒
+麓
+麗
+麝
+麟
+麥
+麦
+麩
+麴
+麵
+麸
+麺
+麻
+麼
+麽
+麾
+黃
+黄
+黍
+黎
+黏
+黑
+黒
+黔
+默
+黛
+黜
+黝
+點
+黠
+黨
+黯
+黴
+鼋
+鼎
+鼐
+鼓
+鼠
+鼬
+鼹
+鼻
+鼾
+齁
+齊
+齋
+齐
+齒
+齡
+齢
+齣
+齦
+齿
+龄
+龅
+龈
+龊
+龋
+龌
+龍
+龐
+龔
+龕
+龙
+龚
+龛
+龜
+龟
+︰
+︱
+︶
+︿
+﹁
+﹂
+﹍
+﹏
+﹐
+﹑
+﹒
+﹔
+﹕
+﹖
+﹗
+﹙
+﹚
+﹝
+﹞
+﹡
+﹣
+！
+＂
+＃
+＄
+％
+＆
+＇
+（
+）
+＊
+＋
+，
+－
+．
+／
+０
+１
+２
+３
+４
+５
+６
+７
+８
+９
+：
+；
+＜
+＝
+＞
+？
+＠
+［
+＼
+］
+＾
+＿
+｀
+ａ
+ｂ
+ｃ
+ｄ
+ｅ
+ｆ
+ｇ
+ｈ
+ｉ
+ｊ
+ｋ
+ｌ
+ｍ
+ｎ
+ｏ
+ｐ
+ｑ
+ｒ
+ｓ
+ｔ
+ｕ
+ｖ
+ｗ
+ｘ
+ｙ
+ｚ
+｛
+｜
+｝
+～
+｡
+｢
+｣
+､
+･
+ｯ
+ｰ
+ｲ
+ｸ
+ｼ
+ｽ
+ﾄ
+ﾉ
+ﾌ
+ﾗ
+ﾙ
+ﾝ
+ﾞ
+ﾟ
+￣
+￥
+👍
+🔥
+😂
+😎
+...
+yam
+10
+2017
+12
+11
+2016
+20
+30
+15
+06
+lofter
+##s
+2015
+by
+16
+14
+18
+13
+24
+17
+2014
+21
+##0
+22
+19
+25
+23
+com
+100
+00
+05
+2013
+##a
+03
+09
+08
+28
+##2
+50
+01
+04
+##1
+27
+02
+2012
+##3
+26
+##e
+07
+##8
+##5
+##6
+##4
+##9
+##7
+29
+2011
+40
+##t
+2010
+##o
+##d
+##i
+2009
+##n
+app
+www
+the
+##m
+31
+##c
+##l
+##y
+##r
+##g
+2008
+60
+http
+200
+qq
+##p
+80
+##f
+google
+pixnet
+90
+cookies
+tripadvisor
+500
+##er
+##k
+35
+##h
+facebook
+2007
+2000
+70
+##b
+of
+##x
+##u
+45
+300
+iphone
+32
+1000
+2006
+48
+ip
+36
+in
+38
+3d
+##w
+##ing
+55
+ctrip
+##on
+##v
+33
+##の
+to
+34
+400
+id
+2005
+it
+37
+windows
+llc
+top
+99
+42
+39
+000
+led
+at
+##an
+41
+51
+52
+46
+49
+43
+53
+44
+##z
+android
+58
+and
+59
+2004
+56
+vr
+##か
+5000
+2003
+47
+blogthis
+twitter
+54
+##le
+150
+ok
+2018
+57
+75
+cn
+no
+ios
+##in
+##mm
+##00
+800
+on
+te
+3000
+65
+2001
+360
+95
+ig
+lv
+120
+##ng
+##を
+##us
+##に
+pc
+てす
+──
+600
+##te
+85
+2002
+88
+##ed
+html
+ncc
+wifi
+email
+64
+blog
+is
+##10
+##て
+mail
+online
+##al
+dvd
+##ic
+studio
+##は
+##℃
+##ia
+##と
+line
+vip
+72
+##q
+98
+##ce
+##en
+for
+##is
+##ra
+##es
+##j
+usb
+net
+cp
+1999
+asia
+4g
+##cm
+diy
+new
+3c
+##お
+ta
+66
+language
+vs
+apple
+tw
+86
+web
+##ne
+ipad
+62
+you
+##re
+101
+68
+##tion
+ps
+de
+bt
+pony
+atm
+##2017
+1998
+67
+##ch
+ceo
+##or
+go
+##na
+av
+pro
+cafe
+96
+pinterest
+97
+63
+pixstyleme3c
+##ta
+more
+said
+##2016
+1997
+mp3
+700
+##ll
+nba
+jun
+##20
+92
+tv
+1995
+pm
+61
+76
+nbsp
+250
+##ie
+linux
+##ma
+cd
+110
+hd
+##17
+78
+##ion
+77
+6000
+am
+##th
+##st
+94
+##se
+##et
+69
+180
+gdp
+my
+105
+81
+abc
+89
+flash
+79
+one
+93
+1990
+1996
+##ck
+gps
+##も
+##ly
+web885
+106
+2020
+91
+##ge
+4000
+1500
+xd
+boss
+isbn
+1994
+org
+##ry
+me
+love
+##11
+0fork
+73
+##12
+3g
+##ter
+##ar
+71
+82
+##la
+hotel
+130
+1970
+pk
+83
+87
+140
+ie
+##os
+##30
+##el
+74
+##50
+seo
+cpu
+##ml
+p2p
+84
+may
+##る
+sun
+tue
+internet
+cc
+posted
+youtube
+##at
+##ン
+##man
+ii
+##ル
+##15
+abs
+nt
+pdf
+yahoo
+ago
+1980
+##it
+news
+mac
+104
+##てす
+##me
+##り
+java
+1992
+spa
+##de
+##nt
+hk
+all
+plus
+la
+1993
+##mb
+##16
+##ve
+west
+##da
+160
+air
+##い
+##ps
+から
+##to
+1989
+logo
+htc
+php
+https
+fi
+momo
+##son
+sat
+##ke
+##80
+ebd
+suv
+wi
+day
+apk
+##88
+##um
+mv
+galaxy
+wiki
+or
+brake
+##ス
+1200
+する
+this
+1991
+mon
+##こ
+❤2017
+po
+##ない
+javascript
+life
+home
+june
+##ss
+system
+900
+##ー
+##０
+pp
+1988
+world
+fb
+4k
+br
+##as
+ic
+ai
+leonardo
+safari
+##60
+live
+free
+xx
+wed
+win7
+kiehl
+##co
+lg
+o2o
+##go
+us
+235
+1949
+mm
+しい
+vfm
+kanye
+##90
+##2015
+##id
+jr
+##ey
+123
+rss
+##sa
+##ro
+##am
+##no
+thu
+fri
+350
+##sh
+##ki
+103
+comments
+name
+##のて
+##pe
+##ine
+max
+1987
+8000
+uber
+##mi
+##ton
+wordpress
+office
+1986
+1985
+##ment
+107
+bd
+win10
+##ld
+##li
+gmail
+bb
+dior
+##rs
+##ri
+##rd
+##ます
+up
+cad
+##®
+dr
+して
+read
+##21
+をお
+##io
+##99
+url
+1984
+pvc
+paypal
+show
+policy
+##40
+##ty
+##18
+with
+##★
+##01
+txt
+102
+##ba
+dna
+from
+post
+mini
+ar
+taiwan
+john
+##ga
+privacy
+agoda
+##13
+##ny
+word
+##24
+##22
+##by
+##ur
+##hz
+1982
+##ang
+265
+cookie
+netscape
+108
+##ka
+##～
+##ad
+house
+share
+note
+ibm
+code
+hello
+nike
+sim
+survey
+##016
+1979
+1950
+wikia
+##32
+##017
+5g
+cbc
+##tor
+##kg
+1983
+##rt
+##14
+campaign
+store
+2500
+os
+##ct
+##ts
+##°
+170
+api
+##ns
+365
+excel
+##な
+##ao
+##ら
+##し
+～～
+##nd
+university
+163
+には
+518
+##70
+##ya
+##il
+##25
+pierre
+ipo
+0020
+897
+##23
+hotels
+##ian
+のお
+125
+years
+6606
+##ers
+##26
+high
+##day
+time
+##ay
+bug
+##line
+##く
+##す
+##be
+xp
+talk2yam
+yamservice
+10000
+coco
+##dy
+sony
+##ies
+1978
+microsoft
+david
+people
+##ha
+1960
+instagram
+intel
+その
+##ot
+iso
+1981
+##va
+115
+##mo
+##land
+xxx
+man
+co
+ltxsw
+##ation
+baby
+220
+##pa
+##ol
+1945
+7000
+tag
+450
+##ue
+msn
+##31
+oppo
+##ト
+##ca
+control
+##om
+st
+chrome
+##ure
+##ん
+be
+##き
+lol
+##19
+した
+##bo
+240
+lady
+##100
+##way
+##から
+4600
+##ko
+##do
+##un
+4s
+corporation
+168
+##ni
+herme
+##28
+ｃｐ
+978
+##up
+##06
+ui
+##ds
+ppt
+admin
+three
+します
+bbc
+re
+128
+##48
+ca
+##015
+##35
+hp
+##ee
+tpp
+##た
+##ive
+××
+root
+##cc
+##ました
+##ble
+##ity
+adobe
+park
+114
+et
+oled
+city
+##ex
+##ler
+##ap
+china
+##book
+20000
+view
+##ice
+global
+##km
+your
+hong
+##mg
+out
+##ms
+ng
+ebay
+##29
+menu
+ubuntu
+##cy
+rom
+##view
+open
+ktv
+do
+server
+##lo
+if
+english
+##ね
+##５
+##oo
+1600
+##02
+step1
+kong
+club
+135
+july
+inc
+1976
+mr
+hi
+##net
+touch
+##ls
+##ii
+michael
+lcd
+##05
+##33
+phone
+james
+step2
+1300
+ios9
+##box
+dc
+##２
+##ley
+samsung
+111
+280
+pokemon
+css
+##ent
+##les
+いいえ
+##１
+s8
+atom
+play
+bmw
+##said
+sa
+etf
+ctrl
+♥yoyo♥
+##55
+2025
+##2014
+##66
+adidas
+amazon
+1958
+##ber
+##ner
+visa
+##77
+##der
+1800
+connectivity
+##hi
+firefox
+109
+118
+hr
+so
+style
+mark
+pop
+ol
+skip
+1975
+as
+##27
+##ir
+##61
+190
+mba
+##う
+##ai
+le
+##ver
+1900
+cafe2017
+lte
+super
+113
+129
+##ron
+amd
+like
+##☆
+are
+##ster
+we
+##sk
+paul
+data
+international
+##ft
+longchamp
+ssd
+good
+##ート
+##ti
+reply
+##my
+↓↓↓
+apr
+star
+##ker
+source
+136
+js
+112
+get
+force
+photo
+##one
+126
+##2013
+##ow
+link
+bbs
+1972
+goods
+##lin
+python
+119
+##ip
+game
+##ics
+##ません
+blue
+##●
+520
+##45
+page
+itunes
+##03
+1955
+260
+1968
+gt
+gif
+618
+##ff
+##47
+group
+くたさい
+about
+bar
+ganji
+##nce
+music
+lee
+not
+1977
+1971
+1973
+##per
+an
+faq
+comment
+##って
+days
+##ock
+116
+##bs
+1974
+1969
+v1
+player
+1956
+xbox
+sql
+fm
+f1
+139
+##ah
+210
+##lv
+##mp
+##000
+melody
+1957
+##３
+550
+17life
+199
+1966
+xml
+market
+##au
+##71
+999
+##04
+what
+gl
+##95
+##age
+tips
+##68
+book
+##ting
+mysql
+can
+1959
+230
+##ung
+wonderland
+watch
+10℃
+##ction
+9000
+mar
+mobile
+1946
+1962
+article
+##db
+part
+▲top
+party
+って
+1967
+1964
+1948
+##07
+##ore
+##op
+この
+dj
+##78
+##38
+010
+main
+225
+1965
+##ong
+art
+320
+ad
+134
+020
+##73
+117
+pm2
+japan
+228
+##08
+ts
+1963
+##ica
+der
+sm
+##36
+2019
+##wa
+ct
+##７
+##や
+##64
+1937
+homemesh
+search
+##85
+##れは
+##tv
+##di
+macbook
+##９
+##くたさい
+service
+##♥
+type
+った
+750
+##ier
+##si
+##75
+##います
+##ok
+best
+##ット
+goris
+lock
+##った
+cf
+3m
+big
+##ut
+ftp
+carol
+##vi
+１０
+1961
+happy
+sd
+##ac
+122
+anti
+pe
+cnn
+iii
+1920
+138
+##ラ
+1940
+esp
+jan
+tags
+##98
+##51
+august
+vol
+##86
+154
+##™
+##fs
+##れ
+##sion
+design
+ac
+##ム
+press
+jordan
+ppp
+that
+key
+check
+##６
+##tt
+##㎡
+1080p
+##lt
+power
+##42
+1952
+##bc
+vivi
+##ック
+he
+133
+121
+jpg
+##rry
+201
+175
+3500
+1947
+nb
+##ted
+##rn
+しています
+1954
+usd
+##t00
+master
+##ンク
+001
+model
+##58
+al
+##09
+1953
+##34
+ram
+goo
+ても
+##ui
+127
+1930
+red
+##ary
+rpg
+item
+##pm
+##41
+270
+##za
+project
+##2012
+hot
+td
+blogabstract
+##ger
+##62
+650
+##44
+gr2
+##します
+##ｍ
+black
+electronic
+nfc
+year
+asus
+また
+html5
+cindy
+##hd
+m3
+132
+esc
+##od
+booking
+##53
+fed
+tvb
+##81
+##ina
+mit
+165
+##いる
+chan
+192
+distribution
+next
+になる
+peter
+bios
+steam
+cm
+1941
+にも
+pk10
+##ix
+##65
+##91
+dec
+nasa
+##ana
+icecat
+00z
+b1
+will
+##46
+li
+se
+##ji
+##み
+##ard
+oct
+##ain
+jp
+##ze
+##bi
+cio
+##56
+smart
+h5
+##39
+##port
+curve
+vpn
+##nm
+##dia
+utc
+##あり
+12345678910
+##52
+rmvb
+chanel
+a4
+miss
+##and
+##im
+media
+who
+##63
+she
+girl
+5s
+124
+vera
+##して
+class
+vivo
+king
+##フ
+##ei
+national
+ab
+1951
+5cm
+888
+145
+ipod
+ap
+1100
+5mm
+211
+ms
+2756
+##69
+mp4
+msci
+##po
+##89
+131
+mg
+index
+380
+##bit
+##out
+##zz
+##97
+##67
+158
+apec
+##８
+photoshop
+opec
+￥799
+ては
+##96
+##tes
+##ast
+2g
+○○
+##ール
+￥2899
+##ling
+##よ
+##ory
+1938
+##ical
+kitty
+content
+##43
+step3
+##cn
+win8
+155
+vc
+1400
+iphone7
+robert
+##した
+tcl
+137
+beauty
+##87
+en
+dollars
+##ys
+##oc
+step
+pay
+yy
+a1
+##2011
+##lly
+##ks
+##♪
+1939
+188
+download
+1944
+sep
+exe
+ph
+います
+school
+gb
+center
+pr
+street
+##board
+uv
+##37
+##lan
+winrar
+##que
+##ua
+##com
+1942
+1936
+480
+gpu
+##４
+ettoday
+fu
+tom
+##54
+##ren
+##via
+149
+##72
+b2b
+144
+##79
+##tch
+rose
+arm
+mb
+##49
+##ial
+##nn
+nvidia
+step4
+mvp
+00㎡
+york
+156
+##イ
+how
+cpi
+591
+2765
+gov
+kg
+joe
+##xx
+mandy
+pa
+##ser
+copyright
+fashion
+1935
+don
+##け
+ecu
+##ist
+##art
+erp
+wap
+have
+##lm
+talk
+##ek
+##ning
+##if
+ch
+##ite
+video
+1943
+cs
+san
+iot
+look
+##84
+##2010
+##ku
+october
+##ux
+trump
+##hs
+##ide
+box
+141
+first
+##ins
+april
+##ight
+##83
+185
+angel
+protected
+aa
+151
+162
+x1
+m2
+##fe
+##×
+##ho
+size
+143
+min
+ofo
+fun
+gomaji
+ex
+hdmi
+food
+dns
+march
+chris
+kevin
+##のか
+##lla
+##pp
+##ec
+ag
+ems
+6s
+720p
+##rm
+##ham
+off
+##92
+asp
+team
+fandom
+ed
+299
+▌♥
+##ell
+info
+されています
+##82
+sina
+4066
+161
+##able
+##ctor
+330
+399
+315
+dll
+rights
+ltd
+idc
+jul
+3kg
+1927
+142
+ma
+surface
+##76
+##ク
+～～～
+304
+mall
+eps
+146
+green
+##59
+map
+space
+donald
+v2
+sodu
+##light
+1931
+148
+1700
+まて
+310
+reserved
+htm
+##han
+##57
+2d
+178
+mod
+##ise
+##tions
+152
+ti
+##shi
+doc
+1933
+icp
+055
+wang
+##ram
+shopping
+aug
+##pi
+##well
+now
+wam
+b2
+からお
+##hu
+236
+1928
+##gb
+266
+f2
+##93
+153
+mix
+##ef
+##uan
+bwl
+##plus
+##res
+core
+##ess
+tea
+5℃
+hktvmall
+nhk
+##ate
+list
+##ese
+301
+feb
+4m
+inn
+ての
+nov
+159
+12345
+daniel
+##ci
+pass
+##bet
+##nk
+coffee
+202
+ssl
+airbnb
+##ute
+fbi
+woshipm
+skype
+ea
+cg
+sp
+##fc
+##www
+yes
+edge
+alt
+007
+##94
+fpga
+##ght
+##gs
+iso9001
+さい
+##ile
+##wood
+##uo
+image
+lin
+icon
+american
+##em
+1932
+set
+says
+##king
+##tive
+blogger
+##74
+なと
+256
+147
+##ox
+##zy
+##red
+##ium
+##lf
+nokia
+claire
+##リ
+##ding
+november
+lohas
+##500
+##tic
+##マ
+##cs
+##ある
+##che
+##ire
+##gy
+##ult
+db
+january
+win
+##カ
+166
+road
+ptt
+##ま
+##つ
+198
+##fa
+##mer
+anna
+pchome
+はい
+udn
+ef
+420
+##time
+##tte
+2030
+##ア
+g20
+white
+かかります
+1929
+308
+garden
+eleven
+di
+##おります
+chen
+309b
+777
+172
+young
+cosplay
+ちてない
+4500
+bat
+##123
+##tra
+##ては
+kindle
+npc
+steve
+etc
+##ern
+##｜
+call
+xperia
+ces
+travel
+sk
+s7
+##ous
+1934
+##int
+みいたたけます
+183
+edu
+file
+cho
+qr
+##car
+##our
+186
+##ant
+##ｄ
+eric
+1914
+rends
+##jo
+##する
+mastercard
+##2000
+kb
+##min
+290
+##ino
+vista
+##ris
+##ud
+jack
+2400
+##set
+169
+pos
+1912
+##her
+##ou
+taipei
+しく
+205
+beta
+##ませんか
+232
+##fi
+express
+255
+body
+##ill
+aphojoy
+user
+december
+meiki
+##ick
+tweet
+richard
+##av
+##ᆫ
+iphone6
+##dd
+ちてすか
+views
+##mark
+321
+pd
+##００
+times
+##▲
+level
+##ash
+10g
+point
+5l
+##ome
+208
+koreanmall
+##ak
+george
+q2
+206
+wma
+tcp
+##200
+スタッフ
+full
+mlb
+##lle
+##watch
+tm
+run
+179
+911
+smith
+business
+##und
+1919
+color
+##tal
+222
+171
+##less
+moon
+4399
+##rl
+update
+pcb
+shop
+499
+157
+little
+なし
+end
+##mhz
+van
+dsp
+easy
+660
+##house
+##key
+history
+##ｏ
+oh
+##001
+##hy
+##web
+oem
+let
+was
+##2009
+##gg
+review
+##wan
+182
+##°c
+203
+uc
+title
+##val
+united
+233
+2021
+##ons
+doi
+trivago
+overdope
+sbs
+##ance
+##ち
+grand
+special
+573032185
+imf
+216
+wx17house
+##so
+##ーム
+audi
+##he
+london
+william
+##rp
+##ake
+science
+beach
+cfa
+amp
+ps4
+880
+##800
+##link
+##hp
+crm
+ferragamo
+bell
+make
+##eng
+195
+under
+zh
+photos
+2300
+##style
+##ント
+via
+176
+da
+##gi
+company
+i7
+##ray
+thomas
+370
+ufo
+i5
+##max
+plc
+ben
+back
+research
+8g
+173
+mike
+##pc
+##ッフ
+september
+189
+##ace
+vps
+february
+167
+pantos
+wp
+lisa
+1921
+★★
+jquery
+night
+long
+offer
+##berg
+##news
+1911
+##いて
+ray
+fks
+wto
+せます
+over
+164
+340
+##all
+##rus
+1924
+##888
+##works
+blogtitle
+loftpermalink
+##→
+187
+martin
+test
+ling
+km
+##め
+15000
+fda
+v3
+##ja
+##ロ
+ｗedding
+かある
+outlet
+family
+##ea
+をこ
+##top
+story
+##ness
+salvatore
+##lu
+204
+swift
+215
+room
+している
+oracle
+##ul
+1925
+sam
+b2c
+week
+pi
+rock
+##のは
+##ａ
+##けと
+##ean
+##300
+##gle
+cctv
+after
+chinese
+##back
+powered
+x2
+##tan
+1918
+##nes
+##イン
+canon
+only
+181
+##zi
+##las
+say
+##oe
+184
+##sd
+221
+##bot
+##world
+##zo
+sky
+made
+top100
+just
+1926
+pmi
+802
+234
+gap
+##vr
+177
+les
+174
+▲topoct
+ball
+vogue
+vi
+ing
+ofweek
+cos
+##list
+##ort
+▲topmay
+##なら
+##lon
+として
+last
+##tc
+##of
+##bus
+##gen
+real
+eva
+##コ
+a3
+nas
+##lie
+##ria
+##coin
+##bt
+▲topapr
+his
+212
+cat
+nata
+vive
+health
+⋯⋯
+drive
+sir
+▲topmar
+du
+cup
+##カー
+##ook
+##よう
+##sy
+alex
+msg
+tour
+しました
+3ce
+##word
+193
+ebooks
+r8
+block
+318
+##より
+2200
+nice
+pvp
+207
+months
+1905
+rewards
+##ther
+1917
+0800
+##xi
+##チ
+##sc
+micro
+850
+gg
+blogfp
+op
+1922
+daily
+m1
+264
+true
+##bb
+ml
+##tar
+##のお
+##ky
+anthony
+196
+253
+##yo
+state
+218
+##ara
+##aa
+##rc
+##tz
+##ston
+より
+gear
+##eo
+##ade
+ge
+see
+1923
+##win
+##ura
+ss
+heart
+##den
+##ita
+down
+##sm
+el
+png
+2100
+610
+rakuten
+whatsapp
+bay
+dream
+add
+##use
+680
+311
+pad
+gucci
+mpv
+##ode
+##fo
+island
+▲topjun
+##▼
+223
+jason
+214
+chicago
+##❤
+しの
+##hone
+io
+##れる
+##ことか
+sogo
+be2
+##ology
+990
+cloud
+vcd
+##con
+2～3
+##ford
+##joy
+##kb
+##こさいます
+##rade
+but
+##ach
+docker
+##ful
+rfid
+ul
+##ase
+hit
+ford
+##star
+580
+##○
+１１
+a2
+sdk
+reading
+edited
+##are
+cmos
+##mc
+238
+siri
+light
+##ella
+##ため
+bloomberg
+##read
+pizza
+##ison
+jimmy
+##vm
+college
+node
+journal
+ba
+18k
+##play
+245
+##cer
+２０
+magic
+##yu
+191
+jump
+288
+tt
+##ings
+asr
+##lia
+3200
+step5
+network
+##cd
+mc
+いします
+1234
+pixstyleme
+273
+##600
+2800
+money
+★★★★★
+1280
+１２
+430
+bl
+みの
+act
+##tus
+tokyo
+##rial
+##life
+emba
+##ae
+saas
+tcs
+##rk
+##wang
+summer
+##sp
+ko
+##ving
+390
+premium
+##その
+netflix
+##ヒ
+uk
+mt
+##lton
+right
+frank
+two
+209
+える
+##ple
+##cal
+021
+##んな
+##sen
+##ville
+hold
+nexus
+dd
+##ius
+てお
+##mah
+##なく
+tila
+zero
+820
+ce
+##tin
+resort
+##ws
+charles
+old
+p10
+5d
+report
+##360
+##ru
+##には
+bus
+vans
+lt
+##est
+pv
+##レ
+links
+rebecca
+##ツ
+##dm
+azure
+##365
+きな
+limited
+bit
+4gb
+##mon
+1910
+moto
+##eam
+213
+1913
+var
+eos
+なとの
+226
+blogspot
+された
+699
+e3
+dos
+dm
+fc
+##ments
+##ik
+##kw
+boy
+##bin
+##ata
+960
+er
+##せ
+219
+##vin
+##tu
+##ula
+194
+##∥
+station
+##ろ
+##ature
+835
+files
+zara
+hdr
+top10
+nature
+950
+magazine
+s6
+marriott
+##シ
+avira
+case
+##っと
+tab
+##ran
+tony
+##home
+oculus
+im
+##ral
+jean
+saint
+cry
+307
+rosie
+##force
+##ini
+ice
+##bert
+のある
+##nder
+##mber
+pet
+2600
+##◆
+plurk
+▲topdec
+##sis
+00kg
+▲topnov
+720
+##ence
+tim
+##ω
+##nc
+##ても
+##name
+log
+ips
+great
+ikea
+malaysia
+unix
+##イト
+3600
+##ncy
+##nie
+12000
+akb48
+##ye
+##oid
+404
+##chi
+##いた
+oa
+xuehai
+##1000
+##orm
+##rf
+275
+さん
+##ware
+##リー
+980
+ho
+##pro
+text
+##era
+560
+bob
+227
+##ub
+##2008
+8891
+scp
+avi
+##zen
+2022
+mi
+wu
+museum
+qvod
+apache
+lake
+jcb
+▲topaug
+★★★
+ni
+##hr
+hill
+302
+ne
+weibo
+490
+ruby
+##ーシ
+##ヶ
+##row
+4d
+▲topjul
+iv
+##ish
+github
+306
+mate
+312
+##スト
+##lot
+##ane
+andrew
+のハイト
+##tina
+t1
+rf
+ed2k
+##vel
+##900
+way
+final
+りの
+ns
+5a
+705
+197
+##メ
+sweet
+bytes
+##ene
+▲topjan
+231
+##cker
+##2007
+##px
+100g
+topapp
+229
+helpapp
+rs
+low
+14k
+g4g
+care
+630
+ldquo
+あり
+##fork
+leave
+rm
+edition
+##gan
+##zon
+##qq
+▲topsep
+##google
+##ism
+gold
+224
+explorer
+##zer
+toyota
+category
+select
+visual
+##labels
+restaurant
+##md
+posts
+s1
+##ico
+もっと
+angelababy
+123456
+217
+sports
+s3
+mbc
+1915
+してくたさい
+shell
+x86
+candy
+##new
+kbs
+face
+xl
+470
+##here
+4a
+swissinfo
+v8
+▲topfeb
+dram
+##ual
+##vice
+3a
+##wer
+sport
+q1
+ios10
+public
+int
+card
+##ｃ
+ep
+au
+rt
+##れた
+1080
+bill
+##mll
+kim
+３０
+460
+wan
+##uk
+##ミ
+x3
+298
+0t
+scott
+##ming
+239
+e5
+##3d
+h7n9
+worldcat
+brown
+##あります
+##vo
+##led
+##580
+##ax
+249
+410
+##ert
+paris
+##～6
+polo
+925
+##lr
+599
+##ナ
+capital
+##hing
+bank
+cv
+1g
+##chat
+##ｓ
+##たい
+adc
+##ule
+2m
+##ｅ
+digital
+hotmail
+268
+##pad
+870
+bbq
+quot
+##ring
+before
+wali
+##まて
+mcu
+2k
+2b
+という
+costco
+316
+north
+333
+switch
+##city
+##ｐ
+philips
+##mann
+management
+panasonic
+##cl
+##vd
+##ping
+##rge
+alice
+##lk
+##ましょう
+css3
+##ney
+vision
+alpha
+##ular
+##400
+##tter
+lz
+にお
+##ありません
+mode
+gre
+1916
+pci
+##tm
+237
+1～2
+##yan
+##そ
+について
+##let
+##キ
+work
+war
+coach
+ah
+mary
+##ᅵ
+huang
+##pt
+a8
+pt
+follow
+##berry
+1895
+##ew
+a5
+ghost
+##ション
+##wn
+##og
+south
+##code
+girls
+##rid
+action
+villa
+git
+r11
+table
+games
+##cket
+error
+##anonymoussaid
+##ag
+here
+##ame
+##gc
+qa
+##■
+##lis
+gmp
+##gin
+vmalife
+##cher
+yu
+wedding
+##tis
+demo
+dragon
+530
+soho
+social
+bye
+##rant
+river
+orz
+acer
+325
+##↑
+##ース
+##ats
+261
+del
+##ven
+440
+ups
+##ように
+##ター
+305
+value
+macd
+yougou
+##dn
+661
+##ano
+ll
+##urt
+##rent
+continue
+script
+##wen
+##ect
+paper
+263
+319
+shift
+##chel
+##フト
+##cat
+258
+x5
+fox
+243
+##さん
+car
+aaa
+##blog
+loading
+##yn
+##tp
+kuso
+799
+si
+sns
+イカせるテンマ
+ヒンクテンマ3
+rmb
+vdc
+forest
+central
+prime
+help
+ultra
+##rmb
+##ような
+241
+square
+688
+##しい
+のないフロクに
+##field
+##reen
+##ors
+##ju
+c1
+start
+510
+##air
+##map
+cdn
+##wo
+cba
+stephen
+m8
+100km
+##get
+opera
+##base
+##ood
+vsa
+com™
+##aw
+##ail
+251
+なのて
+count
+t2
+##ᅡ
+##een
+2700
+hop
+##gp
+vsc
+tree
+##eg
+##ose
+816
+285
+##ories
+##shop
+alphago
+v4
+1909
+simon
+##ᆼ
+fluke62max
+zip
+スホンサー
+##sta
+louis
+cr
+bas
+##～10
+bc
+##yer
+hadoop
+##ube
+##wi
+1906
+0755
+hola
+##low
+place
+centre
+5v
+d3
+##fer
+252
+##750
+##media
+281
+540
+0l
+exchange
+262
+series
+##ハー
+##san
+eb
+##bank
+##ｋ
+q3
+##nge
+##mail
+take
+##lp
+259
+1888
+client
+east
+cache
+event
+vincent
+##ールを
+きを
+##nse
+sui
+855
+adchoice
+##и
+##stry
+##なたの
+246
+##zone
+ga
+apps
+sea
+##ab
+248
+cisco
+##タ
+##rner
+kymco
+##care
+dha
+##pu
+##yi
+minkoff
+royal
+p1
+への
+annie
+269
+collection
+kpi
+playstation
+257
+になります
+866
+bh
+##bar
+queen
+505
+radio
+1904
+andy
+armani
+##xy
+manager
+iherb
+##ery
+##share
+spring
+raid
+johnson
+1908
+##ob
+volvo
+hall
+##ball
+v6
+our
+taylor
+##hk
+bi
+242
+##cp
+kate
+bo
+water
+technology
+##rie
+サイトは
+277
+##ona
+##sl
+hpv
+303
+gtx
+hip
+rdquo
+jayz
+stone
+##lex
+##rum
+namespace
+##やり
+620
+##ale
+##atic
+des
+##erson
+##ql
+##ves
+##type
+enter
+##この
+##てきます
+d2
+##168
+##mix
+##bian
+との
+a9
+jj
+ky
+##lc
+access
+movie
+##hc
+リストに
+tower
+##ration
+##mit
+ます
+##nch
+ua
+tel
+prefix
+##o2
+1907
+##point
+1901
+ott
+～10
+##http
+##ury
+baidu
+##ink
+member
+##logy
+bigbang
+nownews
+##js
+##shot
+##tb
+##こと
+247
+eba
+##tics
+##lus
+ける
+v5
+spark
+##ama
+there
+##ions
+god
+##lls
+##down
+hiv
+##ress
+burberry
+day2
+##kv
+◆◆
+jeff
+related
+film
+edit
+joseph
+283
+##ark
+cx
+32gb
+order
+g9
+30000
+##ans
+##tty
+s5
+##bee
+かあります
+thread
+xr
+buy
+sh
+005
+land
+spotify
+mx
+##ari
+276
+##verse
+×email
+sf
+why
+##ことて
+244
+7headlines
+nego
+sunny
+dom
+exo
+401
+666
+positioning
+fit
+rgb
+##tton
+278
+kiss
+alexa
+adam
+lp
+みリストを
+##ｇ
+mp
+##ties
+##llow
+amy
+##du
+np
+002
+institute
+271
+##rth
+##lar
+2345
+590
+##des
+sidebar
+１５
+imax
+site
+##cky
+##kit
+##ime
+##009
+season
+323
+##fun
+##ンター
+##ひ
+gogoro
+a7
+pu
+lily
+fire
+twd600
+##ッセーシを
+いて
+##vis
+30ml
+##cture
+##をお
+information
+##オ
+close
+friday
+##くれる
+yi
+nick
+てすか
+##tta
+##tel
+6500
+##lock
+cbd
+economy
+254
+かお
+267
+tinker
+double
+375
+8gb
+voice
+##app
+oops
+channel
+today
+985
+##right
+raw
+xyz
+##＋
+jim
+edm
+##cent
+7500
+supreme
+814
+ds
+##its
+##asia
+dropbox
+##てすか
+##tti
+books
+272
+100ml
+##tle
+##ller
+##ken
+##more
+##boy
+sex
+309
+##dom
+t3
+##ider
+##なります
+##unch
+1903
+810
+feel
+5500
+##かった
+##put
+により
+s2
+mo
+##gh
+men
+ka
+amoled
+div
+##tr
+##n1
+port
+howard
+##tags
+ken
+dnf
+##nus
+adsense
+##а
+ide
+##へ
+buff
+thunder
+##town
+##ique
+has
+##body
+auto
+pin
+##erry
+tee
+てした
+295
+number
+##the
+##013
+object
+psp
+cool
+udnbkk
+16gb
+##mic
+miui
+##tro
+most
+r2
+##alk
+##nity
+1880
+±0
+##いました
+428
+s4
+law
+version
+##oa
+n1
+sgs
+docomo
+##tf
+##ack
+henry
+fc2
+##ded
+##sco
+##014
+##rite
+286
+0mm
+linkedin
+##ada
+##now
+wii
+##ndy
+ucbug
+##◎
+sputniknews
+legalminer
+##ika
+##xp
+2gb
+##bu
+q10
+oo
+b6
+come
+##rman
+cheese
+ming
+maker
+##gm
+nikon
+##fig
+ppi
+kelly
+##ります
+jchere
+てきます
+ted
+md
+003
+fgo
+tech
+##tto
+dan
+soc
+##gl
+##len
+hair
+earth
+640
+521
+img
+##pper
+##a1
+##てきる
+##ロク
+acca
+##ition
+##ference
+suite
+##ig
+outlook
+##mond
+##cation
+398
+##pr
+279
+101vip
+358
+##999
+282
+64gb
+3800
+345
+airport
+##over
+284
+##おり
+jones
+##ith
+lab
+##su
+##いるのて
+co2
+town
+piece
+##llo
+no1
+vmware
+24h
+##qi
+focus
+reader
+##admin
+##ora
+tb
+false
+##log
+1898
+know
+lan
+838
+##ces
+f4
+##ume
+motel
+stop
+##oper
+na
+flickr
+netcomponents
+##af
+##─
+pose
+williams
+local
+##ound
+##cg
+##site
+##iko
+いお
+274
+5m
+gsm
+con
+##ath
+1902
+friends
+##hip
+cell
+317
+##rey
+780
+cream
+##cks
+012
+##dp
+facebooktwitterpinterestgoogle
+sso
+324
+shtml
+song
+swiss
+##mw
+##キンク
+lumia
+xdd
+string
+tiffany
+522
+marc
+られた
+insee
+russell
+sc
+dell
+##ations
+ｏｋ
+camera
+289
+##vs
+##flow
+##late
+classic
+287
+##nter
+stay
+g1
+mtv
+512
+##ever
+##lab
+##nger
+qe
+sata
+ryan
+d1
+50ml
+cms
+##cing
+su
+292
+3300
+editor
+296
+##nap
+security
+sunday
+association
+##ens
+##700
+##bra
+acg
+##かり
+sofascore
+とは
+mkv
+##ign
+jonathan
+gary
+build
+labels
+##oto
+tesla
+moba
+qi
+gohappy
+general
+ajax
+1024
+##かる
+サイト
+society
+##test
+##urs
+wps
+fedora
+##ich
+mozilla
+328
+##480
+##dr
+usa
+urn
+##lina
+##ｒ
+grace
+##die
+##try
+##ader
+1250
+##なり
+elle
+570
+##chen
+##ᆯ
+price
+##ten
+uhz
+##ough
+eq
+##hen
+states
+push
+session
+balance
+wow
+506
+##cus
+##py
+when
+##ward
+##ep
+34e
+wong
+library
+prada
+##サイト
+##cle
+running
+##ree
+313
+ck
+date
+q4
+##ctive
+##ool
+##＞
+mk
+##ira
+##163
+388
+die
+secret
+rq
+dota
+buffet
+は１ヶ
+e6
+##ez
+pan
+368
+ha
+##card
+##cha
+2a
+##さ
+alan
+day3
+eye
+f3
+##end
+france
+keep
+adi
+rna
+tvbs
+##ala
+solo
+nova
+##え
+##tail
+##ょう
+support
+##ries
+##なる
+##ved
+base
+copy
+iis
+fps
+##ways
+hero
+hgih
+profile
+fish
+mu
+ssh
+entertainment
+chang
+##wd
+click
+cake
+##ond
+pre
+##tom
+kic
+pixel
+##ov
+##fl
+product
+6a
+##pd
+dear
+##gate
+es
+yumi
+audio
+##²
+##sky
+echo
+bin
+where
+##ture
+329
+##ape
+find
+sap
+isis
+##なと
+nand
+##101
+##load
+##ream
+band
+a6
+525
+never
+##post
+festival
+50cm
+##we
+555
+guide
+314
+zenfone
+##ike
+335
+gd
+forum
+jessica
+strong
+alexander
+##ould
+software
+allen
+##ious
+program
+360°
+else
+lohasthree
+##gar
+することかてきます
+please
+##れます
+rc
+##ggle
+##ric
+bim
+50000
+##own
+eclipse
+355
+brian
+3ds
+##side
+061
+361
+##other
+##ける
+##tech
+##ator
+485
+engine
+##ged
+##ｔ
+plaza
+##fit
+cia
+ngo
+westbrook
+shi
+tbs
+50mm
+##みませんか
+sci
+291
+reuters
+##ily
+contextlink
+##hn
+af
+##cil
+bridge
+very
+##cel
+1890
+cambridge
+##ize
+15g
+##aid
+##data
+790
+frm
+##head
+award
+butler
+##sun
+meta
+##mar
+america
+ps3
+puma
+pmid
+##すか
+lc
+670
+kitchen
+##lic
+オーフン5
+きなしソフトサーヒス
+そして
+day1
+future
+★★★★
+##text
+##page
+##rris
+pm1
+##ket
+fans
+##っています
+1001
+christian
+bot
+kids
+trackback
+##hai
+c3
+display
+##hl
+n2
+1896
+idea
+さんも
+##sent
+airmail
+##ug
+##men
+pwm
+けます
+028
+##lution
+369
+852
+awards
+schemas
+354
+asics
+wikipedia
+font
+##tional
+##vy
+c2
+293
+##れている
+##dget
+##ein
+っている
+contact
+pepper
+スキル
+339
+##～5
+294
+##uel
+##ument
+730
+##hang
+みてす
+q5
+##sue
+rain
+##ndi
+wei
+swatch
+##cept
+わせ
+331
+popular
+##ste
+##tag
+p2
+501
+trc
+1899
+##west
+##live
+justin
+honda
+ping
+messenger
+##rap
+v9
+543
+##とは
+unity
+appqq
+はすへて
+025
+leo
+##tone
+##テ
+##ass
+uniqlo
+##010
+502
+her
+jane
+memory
+moneydj
+##tical
+human
+12306
+していると
+##m2
+coc
+miacare
+##mn
+tmt
+##core
+vim
+kk
+##may
+fan
+target
+use
+too
+338
+435
+2050
+867
+737
+fast
+##2c
+services
+##ope
+omega
+energy
+##わ
+pinkoi
+1a
+##なから
+##rain
+jackson
+##ement
+##シャンルの
+374
+366
+そんな
+p9
+rd
+##ᆨ
+1111
+##tier
+##vic
+zone
+##│
+385
+690
+dl
+isofix
+cpa
+m4
+322
+kimi
+めて
+davis
+##lay
+lulu
+##uck
+050
+weeks
+qs
+##hop
+920
+##ｎ
+ae
+##ear
+～5
+eia
+405
+##fly
+korea
+jpeg
+boost
+##ship
+small
+##リア
+1860
+eur
+297
+425
+valley
+##iel
+simple
+##ude
+rn
+k2
+##ena
+されます
+non
+patrick
+しているから
+##ナー
+feed
+5757
+30g
+process
+well
+qqmei
+##thing
+they
+aws
+lu
+pink
+##ters
+##kin
+または
+board
+##vertisement
+wine
+##ien
+unicode
+##dge
+r1
+359
+##tant
+いを
+##twitter
+##3c
+cool1
+される
+##れて
+##ｌ
+isp
+##012
+standard
+45㎡2
+402
+##150
+matt
+##fu
+326
+##iner
+googlemsn
+pixnetfacebookyahoo
+##ラン
+x7
+886
+##uce
+メーカー
+sao
+##ev
+##きました
+##file
+9678
+403
+xddd
+shirt
+6l
+##rio
+##hat
+3mm
+givenchy
+ya
+bang
+##lio
+monday
+crystal
+ロクイン
+##abc
+336
+head
+890
+ubuntuforumwikilinuxpastechat
+##vc
+##～20
+##rity
+cnc
+7866
+ipv6
+null
+1897
+##ost
+yang
+imsean
+tiger
+##fet
+##ンス
+352
+##＝
+dji
+327
+ji
+maria
+##come
+##んて
+foundation
+3100
+##beth
+##なった
+1m
+601
+active
+##aft
+##don
+3p
+sr
+349
+emma
+##khz
+living
+415
+353
+1889
+341
+709
+457
+sas
+x6
+##face
+pptv
+x4
+##mate
+han
+sophie
+##jing
+337
+fifa
+##mand
+other
+sale
+inwedding
+##gn
+てきちゃいます
+##mmy
+##pmlast
+bad
+nana
+nbc
+してみてくたさいね
+なとはお
+##wu
+##かあります
+##あ
+note7
+single
+##340
+せからこ
+してくたさい♪この
+しにはとんとんワークケートを
+するとあなたにもっとマッチした
+ならワークケートへ
+もみつかっちゃうかも
+ワークケートの
+##bel
+window
+##dio
+##ht
+union
+age
+382
+１４
+##ivity
+##ｙ
+コメント
+domain
+neo
+##isa
+##lter
+5k
+f5
+steven
+##cts
+powerpoint
+tft
+self
+g2
+ft
+##テル
+zol
+##act
+mwc
+381
+343
+もう
+nbapop
+408
+てある
+eds
+ace
+##room
+previous
+author
+tomtom
+il
+##ets
+hu
+financial
+☆☆☆
+っています
+bp
+5t
+chi
+1gb
+##hg
+fairmont
+cross
+008
+gay
+h2
+function
+##けて
+356
+also
+1b
+625
+##ータ
+##raph
+1894
+3～5
+##ils
+i3
+334
+avenue
+##host
+による
+##bon
+##tsu
+message
+navigation
+50g
+fintech
+h6
+##ことを
+8cm
+##ject
+##vas
+##firm
+credit
+##wf
+xxxx
+form
+##nor
+##space
+huawei
+plan
+json
+sbl
+##dc
+machine
+921
+392
+wish
+##120
+##sol
+windows7
+edward
+##ために
+development
+washington
+##nsis
+lo
+818
+##sio
+##ym
+##bor
+planet
+##～8
+##wt
+ieee
+gpa
+##めて
+camp
+ann
+gm
+##tw
+##oka
+connect
+##rss
+##work
+##atus
+wall
+chicken
+soul
+2mm
+##times
+fa
+##ather
+##cord
+009
+##eep
+hitachi
+gui
+harry
+##pan
+e1
+disney
+##press
+##ーション
+wind
+386
+frigidaire
+##tl
+liu
+hsu
+332
+basic
+von
+ev
+いた
+てきる
+スホンサーサイト
+learning
+##ull
+expedia
+archives
+change
+##wei
+santa
+cut
+ins
+6gb
+turbo
+brand
+cf1
+508
+004
+return
+747
+##rip
+h1
+##nis
+##をこ
+128gb
+##にお
+3t
+application
+しており
+emc
+rx
+##oon
+384
+quick
+412
+15058
+wilson
+wing
+chapter
+##bug
+beyond
+##cms
+##dar
+##oh
+zoom
+e2
+trip
+sb
+##nba
+rcep
+342
+aspx
+ci
+080
+gc
+gnu
+める
+##count
+advanced
+dance
+dv
+##url
+##ging
+367
+8591
+am09
+shadow
+battle
+346
+##ｉ
+##cia
+##という
+emily
+##のてす
+##tation
+host
+ff
+techorz
+sars
+##mini
+##mporary
+##ering
+nc
+4200
+798
+##next
+cma
+##mbps
+##gas
+##ift
+##dot
+##ィ
+455
+##～17
+amana
+##りの
+426
+##ros
+ir
+00㎡1
+##eet
+##ible
+##↓
+710
+ˋ▽ˊ
+##aka
+dcs
+iq
+##ｖ
+l1
+##lor
+maggie
+##011
+##iu
+588
+##～1
+830
+##gt
+1tb
+articles
+create
+##burg
+##iki
+database
+fantasy
+##rex
+##cam
+dlc
+dean
+##you
+hard
+path
+gaming
+victoria
+maps
+cb
+##lee
+##itor
+overchicstoretvhome
+systems
+##xt
+416
+p3
+sarah
+760
+##nan
+407
+486
+x9
+install
+second
+626
+##ann
+##ph
+##rcle
+##nic
+860
+##nar
+ec
+##とう
+768
+metro
+chocolate
+##rian
+～4
+##table
+##しています
+skin
+##sn
+395
+mountain
+##0mm
+inparadise
+6m
+7x24
+ib
+4800
+##jia
+eeworld
+creative
+g5
+g3
+357
+parker
+ecfa
+village
+からの
+18000
+sylvia
+サーヒス
+hbl
+##ques
+##onsored
+##x2
+##きます
+##v4
+##tein
+ie6
+383
+##stack
+389
+ver
+##ads
+##baby
+sound
+bbe
+##110
+##lone
+##uid
+ads
+022
+gundam
+351
+thinkpad
+006
+scrum
+match
+##ave
+mems
+##470
+##oy
+##なりました
+##talk
+glass
+lamigo
+span
+##eme
+job
+##a5
+jay
+wade
+kde
+498
+##lace
+ocean
+tvg
+##covery
+##r3
+##ners
+##rea
+junior
+think
+##aine
+cover
+##ision
+##sia
+↓↓
+##bow
+msi
+413
+458
+406
+##love
+711
+801
+soft
+z2
+##pl
+456
+1840
+mobil
+mind
+##uy
+427
+nginx
+##oi
+めた
+##rr
+6221
+##mple
+##sson
+##ーシてす
+371
+##nts
+91tv
+comhd
+crv3000
+##uard
+1868
+397
+deep
+lost
+field
+gallery
+##bia
+rate
+spf
+redis
+traction
+930
+icloud
+011
+なら
+fe
+jose
+372
+##tory
+into
+sohu
+fx
+899
+379
+kicstart2
+##hia
+すく
+##～3
+##sit
+ra
+２４
+##walk
+##xure
+500g
+##pact
+pacific
+xa
+natural
+carlo
+##250
+##walker
+1850
+##can
+cto
+gigi
+516
+##サー
+pen
+##hoo
+ob
+matlab
+##ｂ
+##yy
+13913459
+##iti
+mango
+##bbs
+sense
+c5
+oxford
+##ニア
+walker
+jennifer
+##ola
+course
+##bre
+701
+##pus
+##rder
+lucky
+075
+##ぁ
+ivy
+なお
+##nia
+sotheby
+side
+##ugh
+joy
+##orage
+##ush
+##bat
+##dt
+364
+r9
+##2d
+##gio
+511
+country
+wear
+##lax
+##～7
+##moon
+393
+seven
+study
+411
+348
+lonzo
+8k
+##ェ
+evolution
+##イフ
+##kk
+gs
+kd
+##レス
+arduino
+344
+b12
+##lux
+arpg
+##rdon
+cook
+##x5
+dark
+five
+##als
+##ida
+とても
+sign
+362
+##ちの
+something
+20mm
+##nda
+387
+##posted
+fresh
+tf
+1870
+422
+cam
+##mine
+##skip
+##form
+##ssion
+education
+394
+##tee
+dyson
+stage
+##jie
+want
+##night
+epson
+pack
+あります
+##ppy
+テリヘル
+##█
+wd
+##eh
+##rence
+left
+##lvin
+golden
+mhz
+discovery
+##trix
+##n2
+loft
+##uch
+##dra
+##sse
+speed
+～1
+1mdb
+sorry
+welcome
+##urn
+wave
+gaga
+##lmer
+teddy
+##160
+トラックハック
+せよ
+611
+##f2016
+378
+rp
+##sha
+rar
+##あなたに
+##きた
+840
+holiday
+##ュー
+373
+074
+##vg
+##nos
+##rail
+gartner
+gi
+6p
+##dium
+kit
+488
+b3
+eco
+##ろう
+20g
+sean
+##stone
+autocad
+nu
+##np
+f16
+write
+029
+m5
+##ias
+images
+atp
+##dk
+fsm
+504
+1350
+ve
+52kb
+##xxx
+##のに
+##cake
+414
+unit
+lim
+ru
+1v
+##ification
+published
+angela
+16g
+analytics
+ak
+##ｑ
+##nel
+gmt
+##icon
+again
+##₂
+##bby
+ios11
+445
+かこさいます
+waze
+いてす
+##ハ
+9985
+##ust
+##ティー
+framework
+##007
+iptv
+delete
+52sykb
+cl
+wwdc
+027
+30cm
+##fw
+##ての
+1389
+##xon
+brandt
+##ses
+##dragon
+tc
+vetements
+anne
+monte
+modern
+official
+##へて
+##ere
+##nne
+##oud
+もちろん
+５０
+etnews
+##a2
+##graphy
+421
+863
+##ちゃん
+444
+##rtex
+##てお
+l2
+##gma
+mount
+ccd
+たと
+archive
+morning
+tan
+ddos
+e7
+##ホ
+day4
+##ウ
+gis
+453
+its
+495
+factory
+bruce
+pg
+##ito
+ってくたさい
+guest
+cdma
+##lling
+536
+n3
+しかし
+3～4
+mega
+eyes
+ro
+１３
+women
+dac
+church
+##jun
+singapore
+##facebook
+6991
+starbucks
+##tos
+##stin
+##shine
+zen
+##mu
+tina
+20℃
+1893
+##たけて
+503
+465
+request
+##gence
+qt
+##っ
+1886
+347
+363
+q7
+##zzi
+diary
+##tore
+409
+##ead
+468
+cst
+##osa
+canada
+agent
+va
+##jiang
+##ちは
+##ーク
+##lam
+sg
+##nix
+##sday
+##よって
+g6
+##master
+bing
+##zl
+charlie
+１６
+8mm
+nb40
+##ーン
+thai
+##ルフ
+ln284ct
+##itz
+##2f
+bonnie
+##food
+##lent
+originals
+##stro
+##lts
+418
+∟∣
+##bscribe
+children
+ntd
+yesstyle
+##かも
+hmv
+##tment
+d5
+2cm
+arts
+sms
+##pn
+##я
+##いい
+topios9
+539
+lifestyle
+virtual
+##ague
+xz
+##deo
+muji
+024
+unt
+##nnis
+##ᅩ
+faq1
+1884
+396
+##ette
+fly
+64㎡
+はしめまして
+441
+curry
+##pop
+のこ
+release
+##←
+##◆◆
+##cast
+073
+ありな
+500ml
+##ews
+5c
+##stle
+ios7
+##ima
+787
+dog
+lenovo
+##r4
+roger
+013
+cbs
+vornado
+100m
+417
+##desk
+##クok
+##ald
+1867
+9595
+2900
+##van
+oil
+##ｘ
+some
+break
+common
+##jy
+##lines
+g7
+twice
+419
+ella
+nano
+belle
+にこ
+##mes
+##self
+##note
+jb
+##ことかてきます
+benz
+##との
+##ova
+451
+save
+##wing
+##ますのて
+kai
+りは
+##hua
+##rect
+rainer
+##unge
+448
+##0m
+adsl
+##かな
+guestname
+##uma
+##kins
+##zu
+tokichoi
+##price
+county
+##med
+##mus
+rmk
+391
+address
+vm
+えて
+openload
+##group
+##hin
+##iginal
+amg
+urban
+##oz
+jobs
+emi
+##public
+beautiful
+##sch
+album
+##dden
+##bell
+jerry
+works
+hostel
+miller
+##drive
+##rmin
+##１０
+376
+boot
+828
+##370
+##fx
+##cm～
+1885
+##nome
+##ctionary
+##oman
+##lish
+##cr
+##hm
+433
+##how
+432
+francis
+xi
+c919
+b5
+evernote
+##uc
+vga
+##3000
+coupe
+##urg
+##cca
+##uality
+019
+6g
+れる
+multi
+##また
+##ett
+em
+hey
+##ani
+##tax
+##rma
+inside
+than
+740
+leonnhurt
+##jin
+ict
+れた
+bird
+notes
+200mm
+くの
+##dical
+##lli
+result
+442
+iu
+ee
+438
+smap
+gopro
+##last
+yin
+pure
+998
+32g
+けた
+5kg
+##dan
+##rame
+mama
+##oot
+bean
+marketing
+##hur
+2l
+bella
+sync
+xuite
+##ground
+515
+discuz
+##getrelax
+##ince
+##bay
+##5s
+cj
+##イス
+gmat
+apt
+##pass
+jing
+##rix
+c4
+rich
+##とても
+niusnews
+##ello
+bag
+770
+##eting
+##mobile
+１８
+culture
+015
+##のてすか
+377
+1020
+area
+##ience
+616
+details
+gp
+universal
+silver
+dit
+はお
+private
+ddd
+u11
+kanshu
+##ified
+fung
+##nny
+dx
+##520
+tai
+475
+023
+##fr
+##lean
+3s
+##pin
+429
+##rin
+25000
+ly
+rick
+##bility
+usb3
+banner
+##baru
+##gion
+metal
+dt
+vdf
+1871
+karl
+qualcomm
+bear
+1010
+oldid
+ian
+jo
+##tors
+population
+##ernel
+1882
+mmorpg
+##mv
+##bike
+603
+##©
+ww
+friend
+##ager
+exhibition
+##del
+##pods
+fpx
+structure
+##free
+##tings
+kl
+##rley
+##copyright
+##mma
+california
+3400
+orange
+yoga
+4l
+canmake
+honey
+##anda
+##コメント
+595
+nikkie
+##ルハイト
+dhl
+publishing
+##mall
+##gnet
+20cm
+513
+##クセス
+##┅
+e88
+970
+##dog
+fishbase
+##!
+##"
+###
+##$
+##%
+##&
+##'
+##(
+##)
+##*
+##+
+##,
+##-
+##.
+##/
+##:
+##;
+##<
+##=
+##>
+##?
+##@
+##[
+##\
+##]
+##^
+##_
+##{
+##|
+##}
+##~
+##£
+##¤
+##¥
+##§
+##«
+##±
+##³
+##µ
+##·
+##¹
+##º
+##»
+##¼
+##ß
+##æ
+##÷
+##ø
+##đ
+##ŋ
+##ɔ
+##ə
+##ɡ
+##ʰ
+##ˇ
+##ˈ
+##ˊ
+##ˋ
+##ˍ
+##ː
+##˙
+##˚
+##ˢ
+##α
+##β
+##γ
+##δ
+##ε
+##η
+##θ
+##ι
+##κ
+##λ
+##μ
+##ν
+##ο
+##π
+##ρ
+##ς
+##σ
+##τ
+##υ
+##φ
+##χ
+##ψ
+##б
+##в
+##г
+##д
+##е
+##ж
+##з
+##к
+##л
+##м
+##н
+##о
+##п
+##р
+##с
+##т
+##у
+##ф
+##х
+##ц
+##ч
+##ш
+##ы
+##ь
+##і
+##ا
+##ب
+##ة
+##ت
+##د
+##ر
+##س
+##ع
+##ل
+##م
+##ن
+##ه
+##و
+##ي
+##۩
+##ก
+##ง
+##น
+##ม
+##ย
+##ร
+##อ
+##า
+##เ
+##๑
+##་
+##ღ
+##ᄀ
+##ᄁ
+##ᄂ
+##ᄃ
+##ᄅ
+##ᄆ
+##ᄇ
+##ᄈ
+##ᄉ
+##ᄋ
+##ᄌ
+##ᄎ
+##ᄏ
+##ᄐ
+##ᄑ
+##ᄒ
+##ᅢ
+##ᅣ
+##ᅥ
+##ᅦ
+##ᅧ
+##ᅨ
+##ᅪ
+##ᅬ
+##ᅭ
+##ᅮ
+##ᅯ
+##ᅲ
+##ᅳ
+##ᅴ
+##ᆷ
+##ᆸ
+##ᆺ
+##ᆻ
+##ᗜ
+##ᵃ
+##ᵉ
+##ᵍ
+##ᵏ
+##ᵐ
+##ᵒ
+##ᵘ
+##‖
+##„
+##†
+##•
+##‥
+##‧
+## 
+##‰
+##′
+##″
+##‹
+##›
+##※
+##‿
+##⁄
+##ⁱ
+##⁺
+##ⁿ
+##₁
+##₃
+##₄
+##€
+##№
+##ⅰ
+##ⅱ
+##ⅲ
+##ⅳ
+##ⅴ
+##↔
+##↗
+##↘
+##⇒
+##∀
+##−
+##∕
+##∙
+##√
+##∞
+##∟
+##∠
+##∣
+##∩
+##∮
+##∶
+##∼
+##∽
+##≈
+##≒
+##≡
+##≤
+##≥
+##≦
+##≧
+##≪
+##≫
+##⊙
+##⋅
+##⋈
+##⋯
+##⌒
+##①
+##②
+##③
+##④
+##⑤
+##⑥
+##⑦
+##⑧
+##⑨
+##⑩
+##⑴
+##⑵
+##⑶
+##⑷
+##⑸
+##⒈
+##⒉
+##⒊
+##⒋
+##ⓒ
+##ⓔ
+##ⓘ
+##━
+##┃
+##┆
+##┊
+##┌
+##└
+##├
+##┣
+##═
+##║
+##╚
+##╞
+##╠
+##╭
+##╮
+##╯
+##╰
+##╱
+##╳
+##▂
+##▃
+##▅
+##▇
+##▉
+##▋
+##▌
+##▍
+##▎
+##□
+##▪
+##▫
+##▬
+##△
+##▶
+##►
+##▽
+##◇
+##◕
+##◠
+##◢
+##◤
+##☀
+##☕
+##☞
+##☺
+##☼
+##♀
+##♂
+##♠
+##♡
+##♣
+##♦
+##♫
+##♬
+##✈
+##✔
+##✕
+##✖
+##✦
+##✨
+##✪
+##✰
+##✿
+##❀
+##➜
+##➤
+##⦿
+##、
+##。
+##〃
+##々
+##〇
+##〈
+##〉
+##《
+##》
+##「
+##」
+##『
+##』
+##【
+##】
+##〓
+##〔
+##〕
+##〖
+##〗
+##〜
+##〝
+##〞
+##ぃ
+##ぇ
+##ぬ
+##ふ
+##ほ
+##む
+##ゃ
+##ゅ
+##ゆ
+##ょ
+##゜
+##ゝ
+##ァ
+##ゥ
+##エ
+##ォ
+##ケ
+##サ
+##セ
+##ソ
+##ッ
+##ニ
+##ヌ
+##ネ
+##ノ
+##ヘ
+##モ
+##ャ
+##ヤ
+##ュ
+##ユ
+##ョ
+##ヨ
+##ワ
+##ヲ
+##・
+##ヽ
+##ㄅ
+##ㄆ
+##ㄇ
+##ㄉ
+##ㄋ
+##ㄌ
+##ㄍ
+##ㄎ
+##ㄏ
+##ㄒ
+##ㄚ
+##ㄛ
+##ㄞ
+##ㄟ
+##ㄢ
+##ㄤ
+##ㄥ
+##ㄧ
+##ㄨ
+##ㆍ
+##㈦
+##㊣
+##㗎
+##一
+##丁
+##七
+##万
+##丈
+##三
+##上
+##下
+##不
+##与
+##丐
+##丑
+##专
+##且
+##丕
+##世
+##丘
+##丙
+##业
+##丛
+##东
+##丝
+##丞
+##丟
+##両
+##丢
+##两
+##严
+##並
+##丧
+##丨
+##个
+##丫
+##中
+##丰
+##串
+##临
+##丶
+##丸
+##丹
+##为
+##主
+##丼
+##丽
+##举
+##丿
+##乂
+##乃
+##久
+##么
+##义
+##之
+##乌
+##乍
+##乎
+##乏
+##乐
+##乒
+##乓
+##乔
+##乖
+##乗
+##乘
+##乙
+##乜
+##九
+##乞
+##也
+##习
+##乡
+##书
+##乩
+##买
+##乱
+##乳
+##乾
+##亀
+##亂
+##了
+##予
+##争
+##事
+##二
+##于
+##亏
+##云
+##互
+##五
+##井
+##亘
+##亙
+##亚
+##些
+##亜
+##亞
+##亟
+##亡
+##亢
+##交
+##亥
+##亦
+##产
+##亨
+##亩
+##享
+##京
+##亭
+##亮
+##亲
+##亳
+##亵
+##人
+##亿
+##什
+##仁
+##仃
+##仄
+##仅
+##仆
+##仇
+##今
+##介
+##仍
+##从
+##仏
+##仑
+##仓
+##仔
+##仕
+##他
+##仗
+##付
+##仙
+##仝
+##仞
+##仟
+##代
+##令
+##以
+##仨
+##仪
+##们
+##仮
+##仰
+##仲
+##件
+##价
+##任
+##份
+##仿
+##企
+##伉
+##伊
+##伍
+##伎
+##伏
+##伐
+##休
+##伕
+##众
+##优
+##伙
+##会
+##伝
+##伞
+##伟
+##传
+##伢
+##伤
+##伦
+##伪
+##伫
+##伯
+##估
+##伴
+##伶
+##伸
+##伺
+##似
+##伽
+##佃
+##但
+##佇
+##佈
+##位
+##低
+##住
+##佐
+##佑
+##体
+##佔
+##何
+##佗
+##佘
+##余
+##佚
+##佛
+##作
+##佝
+##佞
+##佟
+##你
+##佢
+##佣
+##佤
+##佥
+##佩
+##佬
+##佯
+##佰
+##佳
+##併
+##佶
+##佻
+##佼
+##使
+##侃
+##侄
+##來
+##侈
+##例
+##侍
+##侏
+##侑
+##侖
+##侗
+##供
+##依
+##侠
+##価
+##侣
+##侥
+##侦
+##侧
+##侨
+##侬
+##侮
+##侯
+##侵
+##侶
+##侷
+##便
+##係
+##促
+##俄
+##俊
+##俎
+##俏
+##俐
+##俑
+##俗
+##俘
+##俚
+##保
+##俞
+##俟
+##俠
+##信
+##俨
+##俩
+##俪
+##俬
+##俭
+##修
+##俯
+##俱
+##俳
+##俸
+##俺
+##俾
+##倆
+##倉
+##個
+##倌
+##倍
+##倏
+##們
+##倒
+##倔
+##倖
+##倘
+##候
+##倚
+##倜
+##借
+##倡
+##値
+##倦
+##倩
+##倪
+##倫
+##倬
+##倭
+##倶
+##债
+##值
+##倾
+##偃
+##假
+##偈
+##偉
+##偌
+##偎
+##偏
+##偕
+##做
+##停
+##健
+##側
+##偵
+##偶
+##偷
+##偻
+##偽
+##偿
+##傀
+##傅
+##傍
+##傑
+##傘
+##備
+##傚
+##傢
+##傣
+##傥
+##储
+##傩
+##催
+##傭
+##傲
+##傳
+##債
+##傷
+##傻
+##傾
+##僅
+##働
+##像
+##僑
+##僕
+##僖
+##僚
+##僥
+##僧
+##僭
+##僮
+##僱
+##僵
+##價
+##僻
+##儀
+##儂
+##億
+##儆
+##儉
+##儋
+##儒
+##儕
+##儘
+##償
+##儡
+##優
+##儲
+##儷
+##儼
+##儿
+##兀
+##允
+##元
+##兄
+##充
+##兆
+##兇
+##先
+##光
+##克
+##兌
+##免
+##児
+##兑
+##兒
+##兔
+##兖
+##党
+##兜
+##兢
+##入
+##內
+##全
+##兩
+##八
+##公
+##六
+##兮
+##兰
+##共
+##兲
+##关
+##兴
+##兵
+##其
+##具
+##典
+##兹
+##养
+##兼
+##兽
+##冀
+##内
+##円
+##冇
+##冈
+##冉
+##冊
+##册
+##再
+##冏
+##冒
+##冕
+##冗
+##写
+##军
+##农
+##冠
+##冢
+##冤
+##冥
+##冨
+##冪
+##冬
+##冯
+##冰
+##冲
+##决
+##况
+##冶
+##冷
+##冻
+##冼
+##冽
+##冾
+##净
+##凄
+##准
+##凇
+##凈
+##凉
+##凋
+##凌
+##凍
+##减
+##凑
+##凛
+##凜
+##凝
+##几
+##凡
+##凤
+##処
+##凪
+##凭
+##凯
+##凰
+##凱
+##凳
+##凶
+##凸
+##凹
+##出
+##击
+##函
+##凿
+##刀
+##刁
+##刃
+##分
+##切
+##刈
+##刊
+##刍
+##刎
+##刑
+##划
+##列
+##刘
+##则
+##刚
+##创
+##初
+##删
+##判
+##別
+##刨
+##利
+##刪
+##别
+##刮
+##到
+##制
+##刷
+##券
+##刹
+##刺
+##刻
+##刽
+##剁
+##剂
+##剃
+##則
+##剉
+##削
+##剋
+##剌
+##前
+##剎
+##剐
+##剑
+##剔
+##剖
+##剛
+##剜
+##剝
+##剣
+##剤
+##剥
+##剧
+##剩
+##剪
+##副
+##割
+##創
+##剷
+##剽
+##剿
+##劃
+##劇
+##劈
+##劉
+##劊
+##劍
+##劏
+##劑
+##力
+##劝
+##办
+##功
+##加
+##务
+##劣
+##动
+##助
+##努
+##劫
+##劭
+##励
+##劲
+##劳
+##労
+##劵
+##効
+##劾
+##势
+##勁
+##勃
+##勇
+##勉
+##勋
+##勐
+##勒
+##動
+##勖
+##勘
+##務
+##勛
+##勝
+##勞
+##募
+##勢
+##勤
+##勧
+##勳
+##勵
+##勸
+##勺
+##勻
+##勾
+##勿
+##匀
+##包
+##匆
+##匈
+##匍
+##匐
+##匕
+##化
+##北
+##匙
+##匝
+##匠
+##匡
+##匣
+##匪
+##匮
+##匯
+##匱
+##匹
+##区
+##医
+##匾
+##匿
+##區
+##十
+##千
+##卅
+##升
+##午
+##卉
+##半
+##卍
+##华
+##协
+##卑
+##卒
+##卓
+##協
+##单
+##卖
+##南
+##単
+##博
+##卜
+##卞
+##卟
+##占
+##卡
+##卢
+##卤
+##卦
+##卧
+##卫
+##卮
+##卯
+##印
+##危
+##即
+##却
+##卵
+##卷
+##卸
+##卻
+##卿
+##厂
+##厄
+##厅
+##历
+##厉
+##压
+##厌
+##厕
+##厘
+##厚
+##厝
+##原
+##厢
+##厥
+##厦
+##厨
+##厩
+##厭
+##厮
+##厲
+##厳
+##去
+##县
+##叁
+##参
+##參
+##又
+##叉
+##及
+##友
+##双
+##反
+##収
+##发
+##叔
+##取
+##受
+##变
+##叙
+##叛
+##叟
+##叠
+##叡
+##叢
+##口
+##古
+##句
+##另
+##叨
+##叩
+##只
+##叫
+##召
+##叭
+##叮
+##可
+##台
+##叱
+##史
+##右
+##叵
+##叶
+##号
+##司
+##叹
+##叻
+##叼
+##叽
+##吁
+##吃
+##各
+##吆
+##合
+##吉
+##吊
+##吋
+##同
+##名
+##后
+##吏
+##吐
+##向
+##吒
+##吓
+##吕
+##吖
+##吗
+##君
+##吝
+##吞
+##吟
+##吠
+##吡
+##否
+##吧
+##吨
+##吩
+##含
+##听
+##吭
+##吮
+##启
+##吱
+##吳
+##吴
+##吵
+##吶
+##吸
+##吹
+##吻
+##吼
+##吽
+##吾
+##呀
+##呂
+##呃
+##呆
+##呈
+##告
+##呋
+##呎
+##呐
+##呓
+##呕
+##呗
+##员
+##呛
+##呜
+##呢
+##呤
+##呦
+##周
+##呱
+##呲
+##味
+##呵
+##呷
+##呸
+##呻
+##呼
+##命
+##咀
+##咁
+##咂
+##咄
+##咆
+##咋
+##和
+##咎
+##咏
+##咐
+##咒
+##咔
+##咕
+##咖
+##咗
+##咘
+##咙
+##咚
+##咛
+##咣
+##咤
+##咦
+##咧
+##咨
+##咩
+##咪
+##咫
+##咬
+##咭
+##咯
+##咱
+##咲
+##咳
+##咸
+##咻
+##咽
+##咿
+##哀
+##品
+##哂
+##哄
+##哆
+##哇
+##哈
+##哉
+##哋
+##哌
+##响
+##哎
+##哏
+##哐
+##哑
+##哒
+##哔
+##哗
+##哟
+##員
+##哥
+##哦
+##哧
+##哨
+##哩
+##哪
+##哭
+##哮
+##哲
+##哺
+##哼
+##哽
+##唁
+##唄
+##唆
+##唇
+##唉
+##唏
+##唐
+##唑
+##唔
+##唠
+##唤
+##唧
+##唬
+##售
+##唯
+##唰
+##唱
+##唳
+##唷
+##唸
+##唾
+##啃
+##啄
+##商
+##啉
+##啊
+##問
+##啓
+##啕
+##啖
+##啜
+##啞
+##啟
+##啡
+##啤
+##啥
+##啦
+##啧
+##啪
+##啫
+##啬
+##啮
+##啰
+##啱
+##啲
+##啵
+##啶
+##啷
+##啸
+##啻
+##啼
+##啾
+##喀
+##喂
+##喃
+##善
+##喆
+##喇
+##喉
+##喊
+##喋
+##喎
+##喏
+##喔
+##喘
+##喙
+##喚
+##喜
+##喝
+##喟
+##喧
+##喪
+##喫
+##喬
+##單
+##喰
+##喱
+##喲
+##喳
+##喵
+##営
+##喷
+##喹
+##喺
+##喻
+##喽
+##嗅
+##嗆
+##嗇
+##嗎
+##嗑
+##嗒
+##嗓
+##嗔
+##嗖
+##嗚
+##嗜
+##嗝
+##嗟
+##嗡
+##嗣
+##嗤
+##嗦
+##嗨
+##嗪
+##嗬
+##嗯
+##嗰
+##嗲
+##嗳
+##嗶
+##嗷
+##嗽
+##嘀
+##嘅
+##嘆
+##嘈
+##嘉
+##嘌
+##嘍
+##嘎
+##嘔
+##嘖
+##嘗
+##嘘
+##嘚
+##嘛
+##嘜
+##嘞
+##嘟
+##嘢
+##嘣
+##嘤
+##嘧
+##嘩
+##嘭
+##嘮
+##嘯
+##嘰
+##嘱
+##嘲
+##嘴
+##嘶
+##嘸
+##嘹
+##嘻
+##嘿
+##噁
+##噌
+##噎
+##噓
+##噔
+##噗
+##噙
+##噜
+##噠
+##噢
+##噤
+##器
+##噩
+##噪
+##噬
+##噱
+##噴
+##噶
+##噸
+##噹
+##噻
+##噼
+##嚀
+##嚇
+##嚎
+##嚏
+##嚐
+##嚓
+##嚕
+##嚟
+##嚣
+##嚥
+##嚨
+##嚮
+##嚴
+##嚷
+##嚼
+##囂
+##囉
+##囊
+##囍
+##囑
+##囔
+##囗
+##囚
+##四
+##囝
+##回
+##囟
+##因
+##囡
+##团
+##団
+##囤
+##囧
+##囪
+##囫
+##园
+##困
+##囱
+##囲
+##図
+##围
+##囹
+##固
+##国
+##图
+##囿
+##圃
+##圄
+##圆
+##圈
+##國
+##圍
+##圏
+##園
+##圓
+##圖
+##團
+##圜
+##土
+##圣
+##圧
+##在
+##圩
+##圭
+##地
+##圳
+##场
+##圻
+##圾
+##址
+##坂
+##均
+##坊
+##坍
+##坎
+##坏
+##坐
+##坑
+##块
+##坚
+##坛
+##坝
+##坞
+##坟
+##坠
+##坡
+##坤
+##坦
+##坨
+##坪
+##坯
+##坳
+##坵
+##坷
+##垂
+##垃
+##垄
+##型
+##垒
+##垚
+##垛
+##垠
+##垢
+##垣
+##垦
+##垩
+##垫
+##垭
+##垮
+##垵
+##埂
+##埃
+##埋
+##城
+##埔
+##埕
+##埗
+##域
+##埠
+##埤
+##埵
+##執
+##埸
+##培
+##基
+##埼
+##堀
+##堂
+##堃
+##堅
+##堆
+##堇
+##堑
+##堕
+##堙
+##堡
+##堤
+##堪
+##堯
+##堰
+##報
+##場
+##堵
+##堺
+##堿
+##塊
+##塌
+##塑
+##塔
+##塗
+##塘
+##塚
+##塞
+##塢
+##塩
+##填
+##塬
+##塭
+##塵
+##塾
+##墀
+##境
+##墅
+##墉
+##墊
+##墒
+##墓
+##増
+##墘
+##墙
+##墜
+##增
+##墟
+##墨
+##墩
+##墮
+##墳
+##墻
+##墾
+##壁
+##壅
+##壆
+##壇
+##壊
+##壑
+##壓
+##壕
+##壘
+##壞
+##壟
+##壢
+##壤
+##壩
+##士
+##壬
+##壮
+##壯
+##声
+##売
+##壳
+##壶
+##壹
+##壺
+##壽
+##处
+##备
+##変
+##复
+##夏
+##夔
+##夕
+##外
+##夙
+##多
+##夜
+##够
+##夠
+##夢
+##夥
+##大
+##天
+##太
+##夫
+##夭
+##央
+##夯
+##失
+##头
+##夷
+##夸
+##夹
+##夺
+##夾
+##奂
+##奄
+##奇
+##奈
+##奉
+##奋
+##奎
+##奏
+##奐
+##契
+##奔
+##奕
+##奖
+##套
+##奘
+##奚
+##奠
+##奢
+##奥
+##奧
+##奪
+##奬
+##奮
+##女
+##奴
+##奶
+##奸
+##她
+##好
+##如
+##妃
+##妄
+##妆
+##妇
+##妈
+##妊
+##妍
+##妒
+##妓
+##妖
+##妘
+##妙
+##妝
+##妞
+##妣
+##妤
+##妥
+##妨
+##妩
+##妪
+##妮
+##妲
+##妳
+##妹
+##妻
+##妾
+##姆
+##姉
+##姊
+##始
+##姍
+##姐
+##姑
+##姒
+##姓
+##委
+##姗
+##姚
+##姜
+##姝
+##姣
+##姥
+##姦
+##姨
+##姪
+##姫
+##姬
+##姹
+##姻
+##姿
+##威
+##娃
+##娄
+##娅
+##娆
+##娇
+##娉
+##娑
+##娓
+##娘
+##娛
+##娜
+##娟
+##娠
+##娣
+##娥
+##娩
+##娱
+##娲
+##娴
+##娶
+##娼
+##婀
+##婁
+##婆
+##婉
+##婊
+##婕
+##婚
+##婢
+##婦
+##婧
+##婪
+##婭
+##婴
+##婵
+##婶
+##婷
+##婺
+##婿
+##媒
+##媚
+##媛
+##媞
+##媧
+##媲
+##媳
+##媽
+##媾
+##嫁
+##嫂
+##嫉
+##嫌
+##嫑
+##嫔
+##嫖
+##嫘
+##嫚
+##嫡
+##嫣
+##嫦
+##嫩
+##嫲
+##嫵
+##嫻
+##嬅
+##嬉
+##嬌
+##嬗
+##嬛
+##嬢
+##嬤
+##嬪
+##嬰
+##嬴
+##嬷
+##嬸
+##嬿
+##孀
+##孃
+##子
+##孑
+##孔
+##孕
+##孖
+##字
+##存
+##孙
+##孚
+##孛
+##孜
+##孝
+##孟
+##孢
+##季
+##孤
+##学
+##孩
+##孪
+##孫
+##孬
+##孰
+##孱
+##孳
+##孵
+##學
+##孺
+##孽
+##孿
+##宁
+##它
+##宅
+##宇
+##守
+##安
+##宋
+##完
+##宏
+##宓
+##宕
+##宗
+##官
+##宙
+##定
+##宛
+##宜
+##宝
+##实
+##実
+##宠
+##审
+##客
+##宣
+##室
+##宥
+##宦
+##宪
+##宫
+##宮
+##宰
+##害
+##宴
+##宵
+##家
+##宸
+##容
+##宽
+##宾
+##宿
+##寂
+##寄
+##寅
+##密
+##寇
+##富
+##寐
+##寒
+##寓
+##寛
+##寝
+##寞
+##察
+##寡
+##寢
+##寥
+##實
+##寧
+##寨
+##審
+##寫
+##寬
+##寮
+##寰
+##寵
+##寶
+##寸
+##对
+##寺
+##寻
+##导
+##対
+##寿
+##封
+##専
+##射
+##将
+##將
+##專
+##尉
+##尊
+##尋
+##對
+##導
+##小
+##少
+##尔
+##尕
+##尖
+##尘
+##尚
+##尝
+##尤
+##尧
+##尬
+##就
+##尴
+##尷
+##尸
+##尹
+##尺
+##尻
+##尼
+##尽
+##尾
+##尿
+##局
+##屁
+##层
+##屄
+##居
+##屆
+##屈
+##屉
+##届
+##屋
+##屌
+##屍
+##屎
+##屏
+##屐
+##屑
+##展
+##屜
+##属
+##屠
+##屡
+##屢
+##層
+##履
+##屬
+##屯
+##山
+##屹
+##屿
+##岀
+##岁
+##岂
+##岌
+##岐
+##岑
+##岔
+##岖
+##岗
+##岘
+##岙
+##岚
+##岛
+##岡
+##岩
+##岫
+##岬
+##岭
+##岱
+##岳
+##岷
+##岸
+##峇
+##峋
+##峒
+##峙
+##峡
+##峤
+##峥
+##峦
+##峨
+##峪
+##峭
+##峯
+##峰
+##峴
+##島
+##峻
+##峽
+##崁
+##崂
+##崆
+##崇
+##崎
+##崑
+##崔
+##崖
+##崗
+##崙
+##崛
+##崧
+##崩
+##崭
+##崴
+##崽
+##嵇
+##嵊
+##嵋
+##嵌
+##嵐
+##嵘
+##嵩
+##嵬
+##嵯
+##嶂
+##嶄
+##嶇
+##嶋
+##嶙
+##嶺
+##嶼
+##嶽
+##巅
+##巍
+##巒
+##巔
+##巖
+##川
+##州
+##巡
+##巢
+##工
+##左
+##巧
+##巨
+##巩
+##巫
+##差
+##己
+##已
+##巳
+##巴
+##巷
+##巻
+##巽
+##巾
+##巿
+##币
+##市
+##布
+##帅
+##帆
+##师
+##希
+##帐
+##帑
+##帕
+##帖
+##帘
+##帚
+##帛
+##帜
+##帝
+##帥
+##带
+##帧
+##師
+##席
+##帮
+##帯
+##帰
+##帳
+##帶
+##帷
+##常
+##帼
+##帽
+##幀
+##幂
+##幄
+##幅
+##幌
+##幔
+##幕
+##幟
+##幡
+##幢
+##幣
+##幫
+##干
+##平
+##年
+##并
+##幸
+##幹
+##幺
+##幻
+##幼
+##幽
+##幾
+##广
+##庁
+##広
+##庄
+##庆
+##庇
+##床
+##序
+##庐
+##库
+##应
+##底
+##庖
+##店
+##庙
+##庚
+##府
+##庞
+##废
+##庠
+##度
+##座
+##庫
+##庭
+##庵
+##庶
+##康
+##庸
+##庹
+##庾
+##廁
+##廂
+##廃
+##廈
+##廉
+##廊
+##廓
+##廖
+##廚
+##廝
+##廟
+##廠
+##廢
+##廣
+##廬
+##廳
+##延
+##廷
+##建
+##廿
+##开
+##弁
+##异
+##弃
+##弄
+##弈
+##弊
+##弋
+##式
+##弑
+##弒
+##弓
+##弔
+##引
+##弗
+##弘
+##弛
+##弟
+##张
+##弥
+##弦
+##弧
+##弩
+##弭
+##弯
+##弱
+##張
+##強
+##弹
+##强
+##弼
+##弾
+##彅
+##彆
+##彈
+##彌
+##彎
+##归
+##当
+##录
+##彗
+##彙
+##彝
+##形
+##彤
+##彥
+##彦
+##彧
+##彩
+##彪
+##彫
+##彬
+##彭
+##彰
+##影
+##彷
+##役
+##彻
+##彼
+##彿
+##往
+##征
+##径
+##待
+##徇
+##很
+##徉
+##徊
+##律
+##後
+##徐
+##徑
+##徒
+##従
+##徕
+##得
+##徘
+##徙
+##徜
+##從
+##徠
+##御
+##徨
+##復
+##循
+##徬
+##微
+##徳
+##徴
+##徵
+##德
+##徹
+##徼
+##徽
+##心
+##必
+##忆
+##忌
+##忍
+##忏
+##忐
+##忑
+##忒
+##忖
+##志
+##忘
+##忙
+##応
+##忠
+##忡
+##忤
+##忧
+##忪
+##快
+##忱
+##念
+##忻
+##忽
+##忿
+##怀
+##态
+##怂
+##怅
+##怆
+##怎
+##怏
+##怒
+##怔
+##怕
+##怖
+##怙
+##怜
+##思
+##怠
+##怡
+##急
+##怦
+##性
+##怨
+##怪
+##怯
+##怵
+##总
+##怼
+##恁
+##恃
+##恆
+##恋
+##恍
+##恐
+##恒
+##恕
+##恙
+##恚
+##恢
+##恣
+##恤
+##恥
+##恨
+##恩
+##恪
+##恫
+##恬
+##恭
+##息
+##恰
+##恳
+##恵
+##恶
+##恸
+##恺
+##恻
+##恼
+##恿
+##悄
+##悅
+##悉
+##悌
+##悍
+##悔
+##悖
+##悚
+##悟
+##悠
+##患
+##悦
+##您
+##悩
+##悪
+##悬
+##悯
+##悱
+##悲
+##悴
+##悵
+##悶
+##悸
+##悻
+##悼
+##悽
+##情
+##惆
+##惇
+##惊
+##惋
+##惑
+##惕
+##惘
+##惚
+##惜
+##惟
+##惠
+##惡
+##惦
+##惧
+##惨
+##惩
+##惫
+##惬
+##惭
+##惮
+##惯
+##惰
+##惱
+##想
+##惴
+##惶
+##惹
+##惺
+##愁
+##愆
+##愈
+##愉
+##愍
+##意
+##愕
+##愚
+##愛
+##愜
+##感
+##愣
+##愤
+##愧
+##愫
+##愷
+##愿
+##慄
+##慈
+##態
+##慌
+##慎
+##慑
+##慕
+##慘
+##慚
+##慟
+##慢
+##慣
+##慧
+##慨
+##慫
+##慮
+##慰
+##慳
+##慵
+##慶
+##慷
+##慾
+##憂
+##憊
+##憋
+##憎
+##憐
+##憑
+##憔
+##憚
+##憤
+##憧
+##憨
+##憩
+##憫
+##憬
+##憲
+##憶
+##憾
+##懂
+##懇
+##懈
+##應
+##懊
+##懋
+##懑
+##懒
+##懦
+##懲
+##懵
+##懶
+##懷
+##懸
+##懺
+##懼
+##懾
+##懿
+##戀
+##戈
+##戊
+##戌
+##戍
+##戎
+##戏
+##成
+##我
+##戒
+##戕
+##或
+##战
+##戚
+##戛
+##戟
+##戡
+##戦
+##截
+##戬
+##戮
+##戰
+##戲
+##戳
+##戴
+##戶
+##户
+##戸
+##戻
+##戾
+##房
+##所
+##扁
+##扇
+##扈
+##扉
+##手
+##才
+##扎
+##扑
+##扒
+##打
+##扔
+##払
+##托
+##扛
+##扣
+##扦
+##执
+##扩
+##扪
+##扫
+##扬
+##扭
+##扮
+##扯
+##扰
+##扱
+##扳
+##扶
+##批
+##扼
+##找
+##承
+##技
+##抄
+##抉
+##把
+##抑
+##抒
+##抓
+##投
+##抖
+##抗
+##折
+##抚
+##抛
+##抜
+##択
+##抟
+##抠
+##抡
+##抢
+##护
+##报
+##抨
+##披
+##抬
+##抱
+##抵
+##抹
+##押
+##抽
+##抿
+##拂
+##拄
+##担
+##拆
+##拇
+##拈
+##拉
+##拋
+##拌
+##拍
+##拎
+##拐
+##拒
+##拓
+##拔
+##拖
+##拗
+##拘
+##拙
+##拚
+##招
+##拜
+##拟
+##拡
+##拢
+##拣
+##拥
+##拦
+##拧
+##拨
+##择
+##括
+##拭
+##拮
+##拯
+##拱
+##拳
+##拴
+##拷
+##拼
+##拽
+##拾
+##拿
+##持
+##挂
+##指
+##挈
+##按
+##挎
+##挑
+##挖
+##挙
+##挚
+##挛
+##挝
+##挞
+##挟
+##挠
+##挡
+##挣
+##挤
+##挥
+##挨
+##挪
+##挫
+##振
+##挲
+##挹
+##挺
+##挽
+##挾
+##捂
+##捅
+##捆
+##捉
+##捋
+##捌
+##捍
+##捎
+##捏
+##捐
+##捕
+##捞
+##损
+##捡
+##换
+##捣
+##捧
+##捨
+##捩
+##据
+##捱
+##捲
+##捶
+##捷
+##捺
+##捻
+##掀
+##掂
+##掃
+##掇
+##授
+##掉
+##掌
+##掏
+##掐
+##排
+##掖
+##掘
+##掙
+##掛
+##掠
+##採
+##探
+##掣
+##接
+##控
+##推
+##掩
+##措
+##掬
+##掰
+##掲
+##掳
+##掴
+##掷
+##掸
+##掺
+##揀
+##揃
+##揄
+##揆
+##揉
+##揍
+##描
+##提
+##插
+##揖
+##揚
+##換
+##握
+##揣
+##揩
+##揪
+##揭
+##揮
+##援
+##揶
+##揸
+##揹
+##揽
+##搀
+##搁
+##搂
+##搅
+##損
+##搏
+##搐
+##搓
+##搔
+##搖
+##搗
+##搜
+##搞
+##搡
+##搪
+##搬
+##搭
+##搵
+##搶
+##携
+##搽
+##摀
+##摁
+##摄
+##摆
+##摇
+##摈
+##摊
+##摒
+##摔
+##摘
+##摞
+##摟
+##摧
+##摩
+##摯
+##摳
+##摸
+##摹
+##摺
+##摻
+##撂
+##撃
+##撅
+##撇
+##撈
+##撐
+##撑
+##撒
+##撓
+##撕
+##撚
+##撞
+##撤
+##撥
+##撩
+##撫
+##撬
+##播
+##撮
+##撰
+##撲
+##撵
+##撷
+##撸
+##撻
+##撼
+##撿
+##擀
+##擁
+##擂
+##擄
+##擅
+##擇
+##擊
+##擋
+##操
+##擎
+##擒
+##擔
+##擘
+##據
+##擞
+##擠
+##擡
+##擢
+##擦
+##擬
+##擰
+##擱
+##擲
+##擴
+##擷
+##擺
+##擼
+##擾
+##攀
+##攏
+##攒
+##攔
+##攘
+##攙
+##攜
+##攝
+##攞
+##攢
+##攣
+##攤
+##攥
+##攪
+##攫
+##攬
+##支
+##收
+##攸
+##改
+##攻
+##放
+##政
+##故
+##效
+##敌
+##敍
+##敎
+##敏
+##救
+##敕
+##敖
+##敗
+##敘
+##教
+##敛
+##敝
+##敞
+##敢
+##散
+##敦
+##敬
+##数
+##敲
+##整
+##敵
+##敷
+##數
+##斂
+##斃
+##文
+##斋
+##斌
+##斎
+##斐
+##斑
+##斓
+##斗
+##料
+##斛
+##斜
+##斟
+##斡
+##斤
+##斥
+##斧
+##斩
+##斫
+##斬
+##断
+##斯
+##新
+##斷
+##方
+##於
+##施
+##旁
+##旃
+##旅
+##旋
+##旌
+##旎
+##族
+##旖
+##旗
+##无
+##既
+##日
+##旦
+##旧
+##旨
+##早
+##旬
+##旭
+##旮
+##旱
+##时
+##旷
+##旺
+##旻
+##昀
+##昂
+##昆
+##昇
+##昉
+##昊
+##昌
+##明
+##昏
+##易
+##昔
+##昕
+##昙
+##星
+##映
+##春
+##昧
+##昨
+##昭
+##是
+##昱
+##昴
+##昵
+##昶
+##昼
+##显
+##晁
+##時
+##晃
+##晉
+##晋
+##晌
+##晏
+##晒
+##晓
+##晔
+##晕
+##晖
+##晗
+##晚
+##晝
+##晞
+##晟
+##晤
+##晦
+##晨
+##晩
+##普
+##景
+##晰
+##晴
+##晶
+##晷
+##智
+##晾
+##暂
+##暄
+##暇
+##暈
+##暉
+##暌
+##暐
+##暑
+##暖
+##暗
+##暝
+##暢
+##暧
+##暨
+##暫
+##暮
+##暱
+##暴
+##暸
+##暹
+##曄
+##曆
+##曇
+##曉
+##曖
+##曙
+##曜
+##曝
+##曠
+##曦
+##曬
+##曰
+##曲
+##曳
+##更
+##書
+##曹
+##曼
+##曾
+##替
+##最
+##會
+##月
+##有
+##朋
+##服
+##朐
+##朔
+##朕
+##朗
+##望
+##朝
+##期
+##朦
+##朧
+##木
+##未
+##末
+##本
+##札
+##朮
+##术
+##朱
+##朴
+##朵
+##机
+##朽
+##杀
+##杂
+##权
+##杆
+##杈
+##杉
+##李
+##杏
+##材
+##村
+##杓
+##杖
+##杜
+##杞
+##束
+##杠
+##条
+##来
+##杨
+##杭
+##杯
+##杰
+##東
+##杳
+##杵
+##杷
+##杼
+##松
+##板
+##极
+##构
+##枇
+##枉
+##枋
+##析
+##枕
+##林
+##枚
+##果
+##枝
+##枢
+##枣
+##枪
+##枫
+##枭
+##枯
+##枰
+##枱
+##枳
+##架
+##枷
+##枸
+##柄
+##柏
+##某
+##柑
+##柒
+##染
+##柔
+##柘
+##柚
+##柜
+##柞
+##柠
+##柢
+##查
+##柩
+##柬
+##柯
+##柱
+##柳
+##柴
+##柵
+##査
+##柿
+##栀
+##栃
+##栄
+##栅
+##标
+##栈
+##栉
+##栋
+##栎
+##栏
+##树
+##栓
+##栖
+##栗
+##校
+##栩
+##株
+##样
+##核
+##根
+##格
+##栽
+##栾
+##桀
+##桁
+##桂
+##桃
+##桅
+##框
+##案
+##桉
+##桌
+##桎
+##桐
+##桑
+##桓
+##桔
+##桜
+##桠
+##桡
+##桢
+##档
+##桥
+##桦
+##桧
+##桨
+##桩
+##桶
+##桿
+##梁
+##梅
+##梆
+##梏
+##梓
+##梗
+##條
+##梟
+##梢
+##梦
+##梧
+##梨
+##梭
+##梯
+##械
+##梳
+##梵
+##梶
+##检
+##棂
+##棄
+##棉
+##棋
+##棍
+##棒
+##棕
+##棗
+##棘
+##棚
+##棟
+##棠
+##棣
+##棧
+##森
+##棱
+##棲
+##棵
+##棹
+##棺
+##椁
+##椅
+##椋
+##植
+##椎
+##椒
+##検
+##椪
+##椭
+##椰
+##椹
+##椽
+##椿
+##楂
+##楊
+##楓
+##楔
+##楚
+##楝
+##楞
+##楠
+##楣
+##楨
+##楫
+##業
+##楮
+##極
+##楷
+##楸
+##楹
+##楼
+##楽
+##概
+##榄
+##榆
+##榈
+##榉
+##榔
+##榕
+##榖
+##榛
+##榜
+##榨
+##榫
+##榭
+##榮
+##榱
+##榴
+##榷
+##榻
+##槁
+##槃
+##構
+##槌
+##槍
+##槎
+##槐
+##槓
+##様
+##槛
+##槟
+##槤
+##槭
+##槲
+##槳
+##槻
+##槽
+##槿
+##樁
+##樂
+##樊
+##樑
+##樓
+##標
+##樞
+##樟
+##模
+##樣
+##権
+##横
+##樫
+##樯
+##樱
+##樵
+##樸
+##樹
+##樺
+##樽
+##樾
+##橄
+##橇
+##橋
+##橐
+##橘
+##橙
+##機
+##橡
+##橢
+##橫
+##橱
+##橹
+##橼
+##檀
+##檄
+##檎
+##檐
+##檔
+##檗
+##檜
+##檢
+##檬
+##檯
+##檳
+##檸
+##檻
+##櫃
+##櫚
+##櫛
+##櫥
+##櫸
+##櫻
+##欄
+##權
+##欒
+##欖
+##欠
+##次
+##欢
+##欣
+##欧
+##欲
+##欸
+##欺
+##欽
+##款
+##歆
+##歇
+##歉
+##歌
+##歎
+##歐
+##歓
+##歙
+##歛
+##歡
+##止
+##正
+##此
+##步
+##武
+##歧
+##歩
+##歪
+##歯
+##歲
+##歳
+##歴
+##歷
+##歸
+##歹
+##死
+##歼
+##殁
+##殃
+##殆
+##殇
+##殉
+##殊
+##残
+##殒
+##殓
+##殖
+##殘
+##殞
+##殡
+##殤
+##殭
+##殯
+##殲
+##殴
+##段
+##殷
+##殺
+##殼
+##殿
+##毀
+##毁
+##毂
+##毅
+##毆
+##毋
+##母
+##毎
+##每
+##毒
+##毓
+##比
+##毕
+##毗
+##毘
+##毙
+##毛
+##毡
+##毫
+##毯
+##毽
+##氈
+##氏
+##氐
+##民
+##氓
+##气
+##氖
+##気
+##氙
+##氛
+##氟
+##氡
+##氢
+##氣
+##氤
+##氦
+##氧
+##氨
+##氪
+##氫
+##氮
+##氯
+##氰
+##氲
+##水
+##氷
+##永
+##氹
+##氾
+##汀
+##汁
+##求
+##汆
+##汇
+##汉
+##汎
+##汐
+##汕
+##汗
+##汙
+##汛
+##汝
+##汞
+##江
+##池
+##污
+##汤
+##汨
+##汩
+##汪
+##汰
+##汲
+##汴
+##汶
+##汹
+##決
+##汽
+##汾
+##沁
+##沂
+##沃
+##沅
+##沈
+##沉
+##沌
+##沏
+##沐
+##沒
+##沓
+##沖
+##沙
+##沛
+##沟
+##没
+##沢
+##沣
+##沥
+##沦
+##沧
+##沪
+##沫
+##沭
+##沮
+##沱
+##河
+##沸
+##油
+##治
+##沼
+##沽
+##沾
+##沿
+##況
+##泄
+##泉
+##泊
+##泌
+##泓
+##法
+##泗
+##泛
+##泞
+##泠
+##泡
+##波
+##泣
+##泥
+##注
+##泪
+##泫
+##泮
+##泯
+##泰
+##泱
+##泳
+##泵
+##泷
+##泸
+##泻
+##泼
+##泽
+##泾
+##洁
+##洄
+##洋
+##洒
+##洗
+##洙
+##洛
+##洞
+##津
+##洩
+##洪
+##洮
+##洱
+##洲
+##洵
+##洶
+##洸
+##洹
+##活
+##洼
+##洽
+##派
+##流
+##浃
+##浄
+##浅
+##浆
+##浇
+##浊
+##测
+##济
+##浏
+##浑
+##浒
+##浓
+##浔
+##浙
+##浚
+##浜
+##浣
+##浦
+##浩
+##浪
+##浬
+##浮
+##浯
+##浴
+##海
+##浸
+##涂
+##涅
+##涇
+##消
+##涉
+##涌
+##涎
+##涓
+##涔
+##涕
+##涙
+##涛
+##涝
+##涞
+##涟
+##涠
+##涡
+##涣
+##涤
+##润
+##涧
+##涨
+##涩
+##涪
+##涮
+##涯
+##液
+##涵
+##涸
+##涼
+##涿
+##淀
+##淄
+##淅
+##淆
+##淇
+##淋
+##淌
+##淑
+##淒
+##淖
+##淘
+##淙
+##淚
+##淞
+##淡
+##淤
+##淦
+##淨
+##淩
+##淪
+##淫
+##淬
+##淮
+##深
+##淳
+##淵
+##混
+##淹
+##淺
+##添
+##淼
+##清
+##済
+##渉
+##渊
+##渋
+##渍
+##渎
+##渐
+##渔
+##渗
+##渙
+##渚
+##減
+##渝
+##渠
+##渡
+##渣
+##渤
+##渥
+##渦
+##温
+##測
+##渭
+##港
+##渲
+##渴
+##游
+##渺
+##渾
+##湃
+##湄
+##湊
+##湍
+##湖
+##湘
+##湛
+##湟
+##湧
+##湫
+##湮
+##湯
+##湳
+##湾
+##湿
+##満
+##溃
+##溅
+##溉
+##溏
+##源
+##準
+##溜
+##溝
+##溟
+##溢
+##溥
+##溧
+##溪
+##溫
+##溯
+##溱
+##溴
+##溶
+##溺
+##溼
+##滁
+##滂
+##滄
+##滅
+##滇
+##滋
+##滌
+##滑
+##滓
+##滔
+##滕
+##滙
+##滚
+##滝
+##滞
+##滟
+##满
+##滢
+##滤
+##滥
+##滦
+##滨
+##滩
+##滬
+##滯
+##滲
+##滴
+##滷
+##滸
+##滾
+##滿
+##漁
+##漂
+##漆
+##漉
+##漏
+##漓
+##演
+##漕
+##漠
+##漢
+##漣
+##漩
+##漪
+##漫
+##漬
+##漯
+##漱
+##漲
+##漳
+##漸
+##漾
+##漿
+##潆
+##潇
+##潋
+##潍
+##潑
+##潔
+##潘
+##潛
+##潜
+##潞
+##潟
+##潢
+##潤
+##潦
+##潧
+##潭
+##潮
+##潰
+##潴
+##潸
+##潺
+##潼
+##澀
+##澄
+##澆
+##澈
+##澍
+##澎
+##澗
+##澜
+##澡
+##澤
+##澧
+##澱
+##澳
+##澹
+##激
+##濁
+##濂
+##濃
+##濑
+##濒
+##濕
+##濘
+##濛
+##濟
+##濠
+##濡
+##濤
+##濫
+##濬
+##濮
+##濯
+##濱
+##濺
+##濾
+##瀅
+##瀆
+##瀉
+##瀋
+##瀏
+##瀑
+##瀕
+##瀘
+##瀚
+##瀛
+##瀝
+##瀞
+##瀟
+##瀧
+##瀨
+##瀬
+##瀰
+##瀾
+##灌
+##灏
+##灑
+##灘
+##灝
+##灞
+##灣
+##火
+##灬
+##灭
+##灯
+##灰
+##灵
+##灶
+##灸
+##灼
+##災
+##灾
+##灿
+##炀
+##炁
+##炅
+##炉
+##炊
+##炎
+##炒
+##炔
+##炕
+##炖
+##炙
+##炜
+##炫
+##炬
+##炭
+##炮
+##炯
+##炳
+##炷
+##炸
+##点
+##為
+##炼
+##炽
+##烁
+##烂
+##烃
+##烈
+##烊
+##烏
+##烘
+##烙
+##烛
+##烟
+##烤
+##烦
+##烧
+##烨
+##烩
+##烫
+##烬
+##热
+##烯
+##烷
+##烹
+##烽
+##焉
+##焊
+##焕
+##焖
+##焗
+##焘
+##焙
+##焚
+##焜
+##無
+##焦
+##焯
+##焰
+##焱
+##然
+##焼
+##煅
+##煉
+##煊
+##煌
+##煎
+##煒
+##煖
+##煙
+##煜
+##煞
+##煤
+##煥
+##煦
+##照
+##煨
+##煩
+##煮
+##煲
+##煸
+##煽
+##熄
+##熊
+##熏
+##熒
+##熔
+##熙
+##熟
+##熠
+##熨
+##熬
+##熱
+##熵
+##熹
+##熾
+##燁
+##燃
+##燄
+##燈
+##燉
+##燊
+##燎
+##燒
+##燔
+##燕
+##燙
+##燜
+##營
+##燥
+##燦
+##燧
+##燭
+##燮
+##燴
+##燻
+##燼
+##燿
+##爆
+##爍
+##爐
+##爛
+##爪
+##爬
+##爭
+##爰
+##爱
+##爲
+##爵
+##父
+##爷
+##爸
+##爹
+##爺
+##爻
+##爽
+##爾
+##牆
+##片
+##版
+##牌
+##牍
+##牒
+##牙
+##牛
+##牝
+##牟
+##牠
+##牡
+##牢
+##牦
+##牧
+##物
+##牯
+##牲
+##牴
+##牵
+##特
+##牺
+##牽
+##犀
+##犁
+##犄
+##犊
+##犍
+##犒
+##犢
+##犧
+##犬
+##犯
+##状
+##犷
+##犸
+##犹
+##狀
+##狂
+##狄
+##狈
+##狎
+##狐
+##狒
+##狗
+##狙
+##狞
+##狠
+##狡
+##狩
+##独
+##狭
+##狮
+##狰
+##狱
+##狸
+##狹
+##狼
+##狽
+##猎
+##猕
+##猖
+##猗
+##猙
+##猛
+##猜
+##猝
+##猥
+##猩
+##猪
+##猫
+##猬
+##献
+##猴
+##猶
+##猷
+##猾
+##猿
+##獄
+##獅
+##獎
+##獐
+##獒
+##獗
+##獠
+##獣
+##獨
+##獭
+##獰
+##獲
+##獵
+##獷
+##獸
+##獺
+##獻
+##獼
+##獾
+##玄
+##率
+##玉
+##王
+##玑
+##玖
+##玛
+##玟
+##玠
+##玥
+##玩
+##玫
+##玮
+##环
+##现
+##玲
+##玳
+##玷
+##玺
+##玻
+##珀
+##珂
+##珅
+##珈
+##珉
+##珊
+##珍
+##珏
+##珐
+##珑
+##珙
+##珞
+##珠
+##珣
+##珥
+##珩
+##珪
+##班
+##珮
+##珲
+##珺
+##現
+##球
+##琅
+##理
+##琇
+##琉
+##琊
+##琍
+##琏
+##琐
+##琛
+##琢
+##琥
+##琦
+##琨
+##琪
+##琬
+##琮
+##琰
+##琲
+##琳
+##琴
+##琵
+##琶
+##琺
+##琼
+##瑀
+##瑁
+##瑄
+##瑋
+##瑕
+##瑗
+##瑙
+##瑚
+##瑛
+##瑜
+##瑞
+##瑟
+##瑠
+##瑣
+##瑤
+##瑩
+##瑪
+##瑯
+##瑰
+##瑶
+##瑾
+##璀
+##璁
+##璃
+##璇
+##璉
+##璋
+##璎
+##璐
+##璜
+##璞
+##璟
+##璧
+##璨
+##環
+##璽
+##璿
+##瓊
+##瓏
+##瓒
+##瓜
+##瓢
+##瓣
+##瓤
+##瓦
+##瓮
+##瓯
+##瓴
+##瓶
+##瓷
+##甄
+##甌
+##甕
+##甘
+##甙
+##甚
+##甜
+##生
+##產
+##産
+##甥
+##甦
+##用
+##甩
+##甫
+##甬
+##甭
+##甯
+##田
+##由
+##甲
+##申
+##电
+##男
+##甸
+##町
+##画
+##甾
+##畀
+##畅
+##界
+##畏
+##畑
+##畔
+##留
+##畜
+##畝
+##畢
+##略
+##畦
+##番
+##畫
+##異
+##畲
+##畳
+##畴
+##當
+##畸
+##畹
+##畿
+##疆
+##疇
+##疊
+##疏
+##疑
+##疔
+##疖
+##疗
+##疙
+##疚
+##疝
+##疟
+##疡
+##疣
+##疤
+##疥
+##疫
+##疮
+##疯
+##疱
+##疲
+##疳
+##疵
+##疸
+##疹
+##疼
+##疽
+##疾
+##痂
+##病
+##症
+##痈
+##痉
+##痊
+##痍
+##痒
+##痔
+##痕
+##痘
+##痙
+##痛
+##痞
+##痠
+##痢
+##痣
+##痤
+##痧
+##痨
+##痪
+##痫
+##痰
+##痱
+##痴
+##痹
+##痺
+##痼
+##痿
+##瘀
+##瘁
+##瘋
+##瘍
+##瘓
+##瘘
+##瘙
+##瘟
+##瘠
+##瘡
+##瘢
+##瘤
+##瘦
+##瘧
+##瘩
+##瘪
+##瘫
+##瘴
+##瘸
+##瘾
+##療
+##癇
+##癌
+##癒
+##癖
+##癜
+##癞
+##癡
+##癢
+##癣
+##癥
+##癫
+##癬
+##癮
+##癱
+##癲
+##癸
+##発
+##登
+##發
+##白
+##百
+##皂
+##的
+##皆
+##皇
+##皈
+##皋
+##皎
+##皑
+##皓
+##皖
+##皙
+##皚
+##皮
+##皰
+##皱
+##皴
+##皺
+##皿
+##盂
+##盃
+##盅
+##盆
+##盈
+##益
+##盎
+##盏
+##盐
+##监
+##盒
+##盔
+##盖
+##盗
+##盘
+##盛
+##盜
+##盞
+##盟
+##盡
+##監
+##盤
+##盥
+##盧
+##盪
+##目
+##盯
+##盱
+##盲
+##直
+##相
+##盹
+##盼
+##盾
+##省
+##眈
+##眉
+##看
+##県
+##眙
+##眞
+##真
+##眠
+##眦
+##眨
+##眩
+##眯
+##眶
+##眷
+##眸
+##眺
+##眼
+##眾
+##着
+##睁
+##睇
+##睏
+##睐
+##睑
+##睛
+##睜
+##睞
+##睡
+##睢
+##督
+##睥
+##睦
+##睨
+##睪
+##睫
+##睬
+##睹
+##睽
+##睾
+##睿
+##瞄
+##瞅
+##瞇
+##瞋
+##瞌
+##瞎
+##瞑
+##瞒
+##瞓
+##瞞
+##瞟
+##瞠
+##瞥
+##瞧
+##瞩
+##瞪
+##瞬
+##瞭
+##瞰
+##瞳
+##瞻
+##瞼
+##瞿
+##矇
+##矍
+##矗
+##矚
+##矛
+##矜
+##矢
+##矣
+##知
+##矩
+##矫
+##短
+##矮
+##矯
+##石
+##矶
+##矽
+##矾
+##矿
+##码
+##砂
+##砌
+##砍
+##砒
+##研
+##砖
+##砗
+##砚
+##砝
+##砣
+##砥
+##砧
+##砭
+##砰
+##砲
+##破
+##砷
+##砸
+##砺
+##砼
+##砾
+##础
+##硅
+##硐
+##硒
+##硕
+##硝
+##硫
+##硬
+##确
+##硯
+##硼
+##碁
+##碇
+##碉
+##碌
+##碍
+##碎
+##碑
+##碓
+##碗
+##碘
+##碚
+##碛
+##碟
+##碣
+##碧
+##碩
+##碰
+##碱
+##碳
+##碴
+##確
+##碼
+##碾
+##磁
+##磅
+##磊
+##磋
+##磐
+##磕
+##磚
+##磡
+##磨
+##磬
+##磯
+##磲
+##磷
+##磺
+##礁
+##礎
+##礙
+##礡
+##礦
+##礪
+##礫
+##礴
+##示
+##礼
+##社
+##祀
+##祁
+##祂
+##祇
+##祈
+##祉
+##祎
+##祐
+##祕
+##祖
+##祗
+##祚
+##祛
+##祜
+##祝
+##神
+##祟
+##祠
+##祢
+##祥
+##票
+##祭
+##祯
+##祷
+##祸
+##祺
+##祿
+##禀
+##禁
+##禄
+##禅
+##禍
+##禎
+##福
+##禛
+##禦
+##禧
+##禪
+##禮
+##禱
+##禹
+##禺
+##离
+##禽
+##禾
+##禿
+##秀
+##私
+##秃
+##秆
+##秉
+##秋
+##种
+##科
+##秒
+##秘
+##租
+##秣
+##秤
+##秦
+##秧
+##秩
+##秭
+##积
+##称
+##秸
+##移
+##秽
+##稀
+##稅
+##程
+##稍
+##税
+##稔
+##稗
+##稚
+##稜
+##稞
+##稟
+##稠
+##稣
+##種
+##稱
+##稲
+##稳
+##稷
+##稹
+##稻
+##稼
+##稽
+##稿
+##穀
+##穂
+##穆
+##穌
+##積
+##穎
+##穗
+##穢
+##穩
+##穫
+##穴
+##究
+##穷
+##穹
+##空
+##穿
+##突
+##窃
+##窄
+##窈
+##窍
+##窑
+##窒
+##窓
+##窕
+##窖
+##窗
+##窘
+##窜
+##窝
+##窟
+##窠
+##窥
+##窦
+##窨
+##窩
+##窪
+##窮
+##窯
+##窺
+##窿
+##竄
+##竅
+##竇
+##竊
+##立
+##竖
+##站
+##竜
+##竞
+##竟
+##章
+##竣
+##童
+##竭
+##端
+##競
+##竹
+##竺
+##竽
+##竿
+##笃
+##笆
+##笈
+##笋
+##笏
+##笑
+##笔
+##笙
+##笛
+##笞
+##笠
+##符
+##笨
+##第
+##笹
+##笺
+##笼
+##筆
+##等
+##筊
+##筋
+##筍
+##筏
+##筐
+##筑
+##筒
+##答
+##策
+##筛
+##筝
+##筠
+##筱
+##筲
+##筵
+##筷
+##筹
+##签
+##简
+##箇
+##箋
+##箍
+##箏
+##箐
+##箔
+##箕
+##算
+##箝
+##管
+##箩
+##箫
+##箭
+##箱
+##箴
+##箸
+##節
+##篁
+##範
+##篆
+##篇
+##築
+##篑
+##篓
+##篙
+##篝
+##篠
+##篡
+##篤
+##篩
+##篪
+##篮
+##篱
+##篷
+##簇
+##簌
+##簍
+##簡
+##簦
+##簧
+##簪
+##簫
+##簷
+##簸
+##簽
+##簾
+##簿
+##籁
+##籃
+##籌
+##籍
+##籐
+##籟
+##籠
+##籤
+##籬
+##籮
+##籲
+##米
+##类
+##籼
+##籽
+##粄
+##粉
+##粑
+##粒
+##粕
+##粗
+##粘
+##粟
+##粤
+##粥
+##粧
+##粪
+##粮
+##粱
+##粲
+##粳
+##粵
+##粹
+##粼
+##粽
+##精
+##粿
+##糅
+##糊
+##糍
+##糕
+##糖
+##糗
+##糙
+##糜
+##糞
+##糟
+##糠
+##糧
+##糬
+##糯
+##糰
+##糸
+##系
+##糾
+##紀
+##紂
+##約
+##紅
+##紉
+##紊
+##紋
+##納
+##紐
+##紓
+##純
+##紗
+##紘
+##紙
+##級
+##紛
+##紜
+##素
+##紡
+##索
+##紧
+##紫
+##紮
+##累
+##細
+##紳
+##紹
+##紺
+##終
+##絃
+##組
+##絆
+##経
+##結
+##絕
+##絞
+##絡
+##絢
+##給
+##絨
+##絮
+##統
+##絲
+##絳
+##絵
+##絶
+##絹
+##綁
+##綏
+##綑
+##經
+##継
+##続
+##綜
+##綠
+##綢
+##綦
+##綫
+##綬
+##維
+##綱
+##網
+##綴
+##綵
+##綸
+##綺
+##綻
+##綽
+##綾
+##綿
+##緊
+##緋
+##総
+##緑
+##緒
+##緘
+##線
+##緝
+##緞
+##締
+##緣
+##編
+##緩
+##緬
+##緯
+##練
+##緹
+##緻
+##縁
+##縄
+##縈
+##縛
+##縝
+##縣
+##縫
+##縮
+##縱
+##縴
+##縷
+##總
+##績
+##繁
+##繃
+##繆
+##繇
+##繋
+##織
+##繕
+##繚
+##繞
+##繡
+##繩
+##繪
+##繫
+##繭
+##繳
+##繹
+##繼
+##繽
+##纂
+##續
+##纍
+##纏
+##纓
+##纔
+##纖
+##纜
+##纠
+##红
+##纣
+##纤
+##约
+##级
+##纨
+##纪
+##纫
+##纬
+##纭
+##纯
+##纰
+##纱
+##纲
+##纳
+##纵
+##纶
+##纷
+##纸
+##纹
+##纺
+##纽
+##纾
+##线
+##绀
+##练
+##组
+##绅
+##细
+##织
+##终
+##绊
+##绍
+##绎
+##经
+##绑
+##绒
+##结
+##绔
+##绕
+##绘
+##给
+##绚
+##绛
+##络
+##绝
+##绞
+##统
+##绡
+##绢
+##绣
+##绥
+##绦
+##继
+##绩
+##绪
+##绫
+##续
+##绮
+##绯
+##绰
+##绳
+##维
+##绵
+##绶
+##绷
+##绸
+##绻
+##综
+##绽
+##绾
+##绿
+##缀
+##缄
+##缅
+##缆
+##缇
+##缈
+##缉
+##缎
+##缓
+##缔
+##缕
+##编
+##缘
+##缙
+##缚
+##缜
+##缝
+##缠
+##缢
+##缤
+##缥
+##缨
+##缩
+##缪
+##缭
+##缮
+##缰
+##缱
+##缴
+##缸
+##缺
+##缽
+##罂
+##罄
+##罌
+##罐
+##网
+##罔
+##罕
+##罗
+##罚
+##罡
+##罢
+##罩
+##罪
+##置
+##罰
+##署
+##罵
+##罷
+##罹
+##羁
+##羅
+##羈
+##羊
+##羌
+##美
+##羔
+##羚
+##羞
+##羟
+##羡
+##羣
+##群
+##羥
+##羧
+##羨
+##義
+##羯
+##羲
+##羸
+##羹
+##羽
+##羿
+##翁
+##翅
+##翊
+##翌
+##翎
+##習
+##翔
+##翘
+##翟
+##翠
+##翡
+##翦
+##翩
+##翰
+##翱
+##翳
+##翹
+##翻
+##翼
+##耀
+##老
+##考
+##耄
+##者
+##耆
+##耋
+##而
+##耍
+##耐
+##耒
+##耕
+##耗
+##耘
+##耙
+##耦
+##耨
+##耳
+##耶
+##耷
+##耸
+##耻
+##耽
+##耿
+##聂
+##聆
+##聊
+##聋
+##职
+##聒
+##联
+##聖
+##聘
+##聚
+##聞
+##聪
+##聯
+##聰
+##聲
+##聳
+##聴
+##聶
+##職
+##聽
+##聾
+##聿
+##肃
+##肄
+##肅
+##肆
+##肇
+##肉
+##肋
+##肌
+##肏
+##肓
+##肖
+##肘
+##肚
+##肛
+##肝
+##肠
+##股
+##肢
+##肤
+##肥
+##肩
+##肪
+##肮
+##肯
+##肱
+##育
+##肴
+##肺
+##肽
+##肾
+##肿
+##胀
+##胁
+##胃
+##胄
+##胆
+##背
+##胍
+##胎
+##胖
+##胚
+##胛
+##胜
+##胝
+##胞
+##胡
+##胤
+##胥
+##胧
+##胫
+##胭
+##胯
+##胰
+##胱
+##胳
+##胴
+##胶
+##胸
+##胺
+##能
+##脂
+##脅
+##脆
+##脇
+##脈
+##脉
+##脊
+##脍
+##脏
+##脐
+##脑
+##脓
+##脖
+##脘
+##脚
+##脛
+##脣
+##脩
+##脫
+##脯
+##脱
+##脲
+##脳
+##脸
+##脹
+##脾
+##腆
+##腈
+##腊
+##腋
+##腌
+##腎
+##腐
+##腑
+##腓
+##腔
+##腕
+##腥
+##腦
+##腩
+##腫
+##腭
+##腮
+##腰
+##腱
+##腳
+##腴
+##腸
+##腹
+##腺
+##腻
+##腼
+##腾
+##腿
+##膀
+##膈
+##膊
+##膏
+##膑
+##膘
+##膚
+##膛
+##膜
+##膝
+##膠
+##膦
+##膨
+##膩
+##膳
+##膺
+##膻
+##膽
+##膾
+##膿
+##臀
+##臂
+##臃
+##臆
+##臉
+##臊
+##臍
+##臓
+##臘
+##臟
+##臣
+##臥
+##臧
+##臨
+##自
+##臬
+##臭
+##至
+##致
+##臺
+##臻
+##臼
+##臾
+##舀
+##舂
+##舅
+##舆
+##與
+##興
+##舉
+##舊
+##舌
+##舍
+##舎
+##舐
+##舒
+##舔
+##舖
+##舗
+##舛
+##舜
+##舞
+##舟
+##航
+##舫
+##般
+##舰
+##舱
+##舵
+##舶
+##舷
+##舸
+##船
+##舺
+##舾
+##艇
+##艋
+##艘
+##艙
+##艦
+##艮
+##良
+##艰
+##艱
+##色
+##艳
+##艷
+##艹
+##艺
+##艾
+##节
+##芃
+##芈
+##芊
+##芋
+##芍
+##芎
+##芒
+##芙
+##芜
+##芝
+##芡
+##芥
+##芦
+##芩
+##芪
+##芫
+##芬
+##芭
+##芮
+##芯
+##花
+##芳
+##芷
+##芸
+##芹
+##芻
+##芽
+##芾
+##苁
+##苄
+##苇
+##苋
+##苍
+##苏
+##苑
+##苒
+##苓
+##苔
+##苕
+##苗
+##苛
+##苜
+##苞
+##苟
+##苡
+##苣
+##若
+##苦
+##苫
+##苯
+##英
+##苷
+##苹
+##苻
+##茁
+##茂
+##范
+##茄
+##茅
+##茉
+##茎
+##茏
+##茗
+##茜
+##茧
+##茨
+##茫
+##茬
+##茭
+##茯
+##茱
+##茲
+##茴
+##茵
+##茶
+##茸
+##茹
+##茼
+##荀
+##荃
+##荆
+##草
+##荊
+##荏
+##荐
+##荒
+##荔
+##荖
+##荘
+##荚
+##荞
+##荟
+##荠
+##荡
+##荣
+##荤
+##荥
+##荧
+##荨
+##荪
+##荫
+##药
+##荳
+##荷
+##荸
+##荻
+##荼
+##荽
+##莅
+##莆
+##莉
+##莊
+##莎
+##莒
+##莓
+##莖
+##莘
+##莞
+##莠
+##莢
+##莧
+##莪
+##莫
+##莱
+##莲
+##莴
+##获
+##莹
+##莺
+##莽
+##莿
+##菀
+##菁
+##菅
+##菇
+##菈
+##菊
+##菌
+##菏
+##菓
+##菖
+##菘
+##菜
+##菟
+##菠
+##菡
+##菩
+##華
+##菱
+##菲
+##菸
+##菽
+##萁
+##萃
+##萄
+##萊
+##萋
+##萌
+##萍
+##萎
+##萘
+##萝
+##萤
+##营
+##萦
+##萧
+##萨
+##萩
+##萬
+##萱
+##萵
+##萸
+##萼
+##落
+##葆
+##葉
+##著
+##葚
+##葛
+##葡
+##董
+##葦
+##葩
+##葫
+##葬
+##葭
+##葯
+##葱
+##葳
+##葵
+##葷
+##葺
+##蒂
+##蒋
+##蒐
+##蒔
+##蒙
+##蒜
+##蒞
+##蒟
+##蒡
+##蒨
+##蒲
+##蒸
+##蒹
+##蒻
+##蒼
+##蒿
+##蓁
+##蓄
+##蓆
+##蓉
+##蓋
+##蓑
+##蓓
+##蓖
+##蓝
+##蓟
+##蓦
+##蓬
+##蓮
+##蓼
+##蓿
+##蔑
+##蔓
+##蔔
+##蔗
+##蔘
+##蔚
+##蔡
+##蔣
+##蔥
+##蔫
+##蔬
+##蔭
+##蔵
+##蔷
+##蔺
+##蔻
+##蔼
+##蔽
+##蕁
+##蕃
+##蕈
+##蕉
+##蕊
+##蕎
+##蕙
+##蕤
+##蕨
+##蕩
+##蕪
+##蕭
+##蕲
+##蕴
+##蕻
+##蕾
+##薄
+##薅
+##薇
+##薈
+##薊
+##薏
+##薑
+##薔
+##薙
+##薛
+##薦
+##薨
+##薩
+##薪
+##薬
+##薯
+##薰
+##薹
+##藉
+##藍
+##藏
+##藐
+##藓
+##藕
+##藜
+##藝
+##藤
+##藥
+##藩
+##藹
+##藻
+##藿
+##蘆
+##蘇
+##蘊
+##蘋
+##蘑
+##蘚
+##蘭
+##蘸
+##蘼
+##蘿
+##虎
+##虏
+##虐
+##虑
+##虔
+##處
+##虚
+##虛
+##虜
+##虞
+##號
+##虢
+##虧
+##虫
+##虬
+##虱
+##虹
+##虻
+##虽
+##虾
+##蚀
+##蚁
+##蚂
+##蚊
+##蚌
+##蚓
+##蚕
+##蚜
+##蚝
+##蚣
+##蚤
+##蚩
+##蚪
+##蚯
+##蚱
+##蚵
+##蛀
+##蛆
+##蛇
+##蛊
+##蛋
+##蛎
+##蛐
+##蛔
+##蛙
+##蛛
+##蛟
+##蛤
+##蛭
+##蛮
+##蛰
+##蛳
+##蛹
+##蛻
+##蛾
+##蜀
+##蜂
+##蜃
+##蜆
+##蜇
+##蜈
+##蜊
+##蜍
+##蜒
+##蜓
+##蜕
+##蜗
+##蜘
+##蜚
+##蜜
+##蜡
+##蜢
+##蜥
+##蜱
+##蜴
+##蜷
+##蜻
+##蜿
+##蝇
+##蝈
+##蝉
+##蝌
+##蝎
+##蝕
+##蝗
+##蝙
+##蝟
+##蝠
+##蝦
+##蝨
+##蝴
+##蝶
+##蝸
+##蝼
+##螂
+##螃
+##融
+##螞
+##螢
+##螨
+##螯
+##螳
+##螺
+##蟀
+##蟄
+##蟆
+##蟋
+##蟎
+##蟑
+##蟒
+##蟠
+##蟬
+##蟲
+##蟹
+##蟻
+##蟾
+##蠅
+##蠍
+##蠔
+##蠕
+##蠛
+##蠟
+##蠡
+##蠢
+##蠣
+##蠱
+##蠶
+##蠹
+##蠻
+##血
+##衄
+##衅
+##衆
+##行
+##衍
+##術
+##衔
+##街
+##衙
+##衛
+##衝
+##衞
+##衡
+##衢
+##衣
+##补
+##表
+##衩
+##衫
+##衬
+##衮
+##衰
+##衲
+##衷
+##衹
+##衾
+##衿
+##袁
+##袂
+##袄
+##袅
+##袈
+##袋
+##袍
+##袒
+##袖
+##袜
+##袞
+##袤
+##袪
+##被
+##袭
+##袱
+##裁
+##裂
+##装
+##裆
+##裊
+##裏
+##裔
+##裕
+##裘
+##裙
+##補
+##裝
+##裟
+##裡
+##裤
+##裨
+##裱
+##裳
+##裴
+##裸
+##裹
+##製
+##裾
+##褂
+##複
+##褐
+##褒
+##褓
+##褔
+##褚
+##褥
+##褪
+##褫
+##褲
+##褶
+##褻
+##襁
+##襄
+##襟
+##襠
+##襪
+##襬
+##襯
+##襲
+##西
+##要
+##覃
+##覆
+##覇
+##見
+##規
+##覓
+##視
+##覚
+##覦
+##覧
+##親
+##覬
+##観
+##覷
+##覺
+##覽
+##觀
+##见
+##观
+##规
+##觅
+##视
+##览
+##觉
+##觊
+##觎
+##觐
+##觑
+##角
+##觞
+##解
+##觥
+##触
+##觸
+##言
+##訂
+##計
+##訊
+##討
+##訓
+##訕
+##訖
+##託
+##記
+##訛
+##訝
+##訟
+##訣
+##訥
+##訪
+##設
+##許
+##訳
+##訴
+##訶
+##診
+##註
+##証
+##詆
+##詐
+##詔
+##評
+##詛
+##詞
+##詠
+##詡
+##詢
+##詣
+##試
+##詩
+##詫
+##詬
+##詭
+##詮
+##詰
+##話
+##該
+##詳
+##詹
+##詼
+##誅
+##誇
+##誉
+##誌
+##認
+##誓
+##誕
+##誘
+##語
+##誠
+##誡
+##誣
+##誤
+##誥
+##誦
+##誨
+##說
+##説
+##読
+##誰
+##課
+##誹
+##誼
+##調
+##諄
+##談
+##請
+##諏
+##諒
+##論
+##諗
+##諜
+##諡
+##諦
+##諧
+##諫
+##諭
+##諮
+##諱
+##諳
+##諷
+##諸
+##諺
+##諾
+##謀
+##謁
+##謂
+##謄
+##謊
+##謎
+##謐
+##謔
+##謗
+##謙
+##講
+##謝
+##謠
+##謨
+##謬
+##謹
+##謾
+##譁
+##證
+##譎
+##譏
+##識
+##譙
+##譚
+##譜
+##警
+##譬
+##譯
+##議
+##譲
+##譴
+##護
+##譽
+##讀
+##變
+##讓
+##讚
+##讞
+##计
+##订
+##认
+##讥
+##讧
+##讨
+##让
+##讪
+##讫
+##训
+##议
+##讯
+##记
+##讲
+##讳
+##讴
+##讶
+##讷
+##许
+##讹
+##论
+##讼
+##讽
+##设
+##访
+##诀
+##证
+##诃
+##评
+##诅
+##识
+##诈
+##诉
+##诊
+##诋
+##词
+##诏
+##译
+##试
+##诗
+##诘
+##诙
+##诚
+##诛
+##话
+##诞
+##诟
+##诠
+##诡
+##询
+##诣
+##诤
+##该
+##详
+##诧
+##诩
+##诫
+##诬
+##语
+##误
+##诰
+##诱
+##诲
+##说
+##诵
+##诶
+##请
+##诸
+##诺
+##读
+##诽
+##课
+##诿
+##谀
+##谁
+##调
+##谄
+##谅
+##谆
+##谈
+##谊
+##谋
+##谌
+##谍
+##谎
+##谏
+##谐
+##谑
+##谒
+##谓
+##谔
+##谕
+##谗
+##谘
+##谙
+##谚
+##谛
+##谜
+##谟
+##谢
+##谣
+##谤
+##谥
+##谦
+##谧
+##谨
+##谩
+##谪
+##谬
+##谭
+##谯
+##谱
+##谲
+##谴
+##谶
+##谷
+##豁
+##豆
+##豇
+##豈
+##豉
+##豊
+##豌
+##豎
+##豐
+##豔
+##豚
+##象
+##豢
+##豪
+##豫
+##豬
+##豹
+##豺
+##貂
+##貅
+##貌
+##貓
+##貔
+##貘
+##貝
+##貞
+##負
+##財
+##貢
+##貧
+##貨
+##販
+##貪
+##貫
+##責
+##貯
+##貰
+##貳
+##貴
+##貶
+##買
+##貸
+##費
+##貼
+##貽
+##貿
+##賀
+##賁
+##賂
+##賃
+##賄
+##資
+##賈
+##賊
+##賑
+##賓
+##賜
+##賞
+##賠
+##賡
+##賢
+##賣
+##賤
+##賦
+##質
+##賬
+##賭
+##賴
+##賺
+##購
+##賽
+##贅
+##贈
+##贊
+##贍
+##贏
+##贓
+##贖
+##贛
+##贝
+##贞
+##负
+##贡
+##财
+##责
+##贤
+##败
+##账
+##货
+##质
+##贩
+##贪
+##贫
+##贬
+##购
+##贮
+##贯
+##贰
+##贱
+##贲
+##贴
+##贵
+##贷
+##贸
+##费
+##贺
+##贻
+##贼
+##贾
+##贿
+##赁
+##赂
+##赃
+##资
+##赅
+##赈
+##赊
+##赋
+##赌
+##赎
+##赏
+##赐
+##赓
+##赔
+##赖
+##赘
+##赚
+##赛
+##赝
+##赞
+##赠
+##赡
+##赢
+##赣
+##赤
+##赦
+##赧
+##赫
+##赭
+##走
+##赳
+##赴
+##赵
+##赶
+##起
+##趁
+##超
+##越
+##趋
+##趕
+##趙
+##趟
+##趣
+##趨
+##足
+##趴
+##趵
+##趸
+##趺
+##趾
+##跃
+##跄
+##跆
+##跋
+##跌
+##跎
+##跑
+##跖
+##跚
+##跛
+##距
+##跟
+##跡
+##跤
+##跨
+##跩
+##跪
+##路
+##跳
+##践
+##跷
+##跹
+##跺
+##跻
+##踉
+##踊
+##踌
+##踏
+##踐
+##踝
+##踞
+##踟
+##踢
+##踩
+##踪
+##踮
+##踱
+##踴
+##踵
+##踹
+##蹂
+##蹄
+##蹇
+##蹈
+##蹉
+##蹊
+##蹋
+##蹑
+##蹒
+##蹙
+##蹟
+##蹣
+##蹤
+##蹦
+##蹩
+##蹬
+##蹭
+##蹲
+##蹴
+##蹶
+##蹺
+##蹼
+##蹿
+##躁
+##躇
+##躉
+##躊
+##躋
+##躍
+##躏
+##躪
+##身
+##躬
+##躯
+##躲
+##躺
+##軀
+##車
+##軋
+##軌
+##軍
+##軒
+##軟
+##転
+##軸
+##軼
+##軽
+##軾
+##較
+##載
+##輒
+##輓
+##輔
+##輕
+##輛
+##輝
+##輟
+##輩
+##輪
+##輯
+##輸
+##輻
+##輾
+##輿
+##轄
+##轅
+##轆
+##轉
+##轍
+##轎
+##轟
+##车
+##轧
+##轨
+##轩
+##转
+##轭
+##轮
+##软
+##轰
+##轲
+##轴
+##轶
+##轻
+##轼
+##载
+##轿
+##较
+##辄
+##辅
+##辆
+##辇
+##辈
+##辉
+##辊
+##辍
+##辐
+##辑
+##输
+##辕
+##辖
+##辗
+##辘
+##辙
+##辛
+##辜
+##辞
+##辟
+##辣
+##辦
+##辨
+##辩
+##辫
+##辭
+##辮
+##辯
+##辰
+##辱
+##農
+##边
+##辺
+##辻
+##込
+##辽
+##达
+##迁
+##迂
+##迄
+##迅
+##过
+##迈
+##迎
+##运
+##近
+##返
+##还
+##这
+##进
+##远
+##违
+##连
+##迟
+##迢
+##迤
+##迥
+##迦
+##迩
+##迪
+##迫
+##迭
+##述
+##迴
+##迷
+##迸
+##迹
+##迺
+##追
+##退
+##送
+##适
+##逃
+##逅
+##逆
+##选
+##逊
+##逍
+##透
+##逐
+##递
+##途
+##逕
+##逗
+##這
+##通
+##逛
+##逝
+##逞
+##速
+##造
+##逢
+##連
+##逮
+##週
+##進
+##逵
+##逶
+##逸
+##逻
+##逼
+##逾
+##遁
+##遂
+##遅
+##遇
+##遊
+##運
+##遍
+##過
+##遏
+##遐
+##遑
+##遒
+##道
+##達
+##違
+##遗
+##遙
+##遛
+##遜
+##遞
+##遠
+##遢
+##遣
+##遥
+##遨
+##適
+##遭
+##遮
+##遲
+##遴
+##遵
+##遶
+##遷
+##選
+##遺
+##遼
+##遽
+##避
+##邀
+##邁
+##邂
+##邃
+##還
+##邇
+##邈
+##邊
+##邋
+##邏
+##邑
+##邓
+##邕
+##邛
+##邝
+##邢
+##那
+##邦
+##邨
+##邪
+##邬
+##邮
+##邯
+##邰
+##邱
+##邳
+##邵
+##邸
+##邹
+##邺
+##邻
+##郁
+##郅
+##郊
+##郎
+##郑
+##郜
+##郝
+##郡
+##郢
+##郤
+##郦
+##郧
+##部
+##郫
+##郭
+##郴
+##郵
+##郷
+##郸
+##都
+##鄂
+##鄉
+##鄒
+##鄔
+##鄙
+##鄞
+##鄢
+##鄧
+##鄭
+##鄰
+##鄱
+##鄲
+##鄺
+##酉
+##酊
+##酋
+##酌
+##配
+##酐
+##酒
+##酗
+##酚
+##酝
+##酢
+##酣
+##酥
+##酩
+##酪
+##酬
+##酮
+##酯
+##酰
+##酱
+##酵
+##酶
+##酷
+##酸
+##酿
+##醃
+##醇
+##醉
+##醋
+##醍
+##醐
+##醒
+##醚
+##醛
+##醜
+##醞
+##醣
+##醪
+##醫
+##醬
+##醮
+##醯
+##醴
+##醺
+##釀
+##釁
+##采
+##釉
+##释
+##釋
+##里
+##重
+##野
+##量
+##釐
+##金
+##釗
+##釘
+##釜
+##針
+##釣
+##釦
+##釧
+##釵
+##鈀
+##鈉
+##鈍
+##鈎
+##鈔
+##鈕
+##鈞
+##鈣
+##鈦
+##鈪
+##鈴
+##鈺
+##鈾
+##鉀
+##鉄
+##鉅
+##鉉
+##鉑
+##鉗
+##鉚
+##鉛
+##鉤
+##鉴
+##鉻
+##銀
+##銃
+##銅
+##銑
+##銓
+##銖
+##銘
+##銜
+##銬
+##銭
+##銮
+##銳
+##銷
+##銹
+##鋁
+##鋅
+##鋒
+##鋤
+##鋪
+##鋰
+##鋸
+##鋼
+##錄
+##錐
+##錘
+##錚
+##錠
+##錢
+##錦
+##錨
+##錫
+##錮
+##錯
+##録
+##錳
+##錶
+##鍊
+##鍋
+##鍍
+##鍛
+##鍥
+##鍰
+##鍵
+##鍺
+##鍾
+##鎂
+##鎊
+##鎌
+##鎏
+##鎔
+##鎖
+##鎗
+##鎚
+##鎧
+##鎬
+##鎮
+##鎳
+##鏈
+##鏖
+##鏗
+##鏘
+##鏞
+##鏟
+##鏡
+##鏢
+##鏤
+##鏽
+##鐘
+##鐮
+##鐲
+##鐳
+##鐵
+##鐸
+##鐺
+##鑄
+##鑊
+##鑑
+##鑒
+##鑣
+##鑫
+##鑰
+##鑲
+##鑼
+##鑽
+##鑾
+##鑿
+##针
+##钉
+##钊
+##钎
+##钏
+##钒
+##钓
+##钗
+##钙
+##钛
+##钜
+##钝
+##钞
+##钟
+##钠
+##钡
+##钢
+##钣
+##钤
+##钥
+##钦
+##钧
+##钨
+##钩
+##钮
+##钯
+##钰
+##钱
+##钳
+##钴
+##钵
+##钺
+##钻
+##钼
+##钾
+##钿
+##铀
+##铁
+##铂
+##铃
+##铄
+##铅
+##铆
+##铉
+##铎
+##铐
+##铛
+##铜
+##铝
+##铠
+##铡
+##铢
+##铣
+##铤
+##铨
+##铩
+##铬
+##铭
+##铮
+##铰
+##铲
+##铵
+##银
+##铸
+##铺
+##链
+##铿
+##销
+##锁
+##锂
+##锄
+##锅
+##锆
+##锈
+##锉
+##锋
+##锌
+##锏
+##锐
+##锑
+##错
+##锚
+##锟
+##锡
+##锢
+##锣
+##锤
+##锥
+##锦
+##锭
+##键
+##锯
+##锰
+##锲
+##锵
+##锹
+##锺
+##锻
+##镀
+##镁
+##镂
+##镇
+##镉
+##镌
+##镍
+##镐
+##镑
+##镕
+##镖
+##镗
+##镛
+##镜
+##镣
+##镭
+##镯
+##镰
+##镳
+##镶
+##長
+##长
+##門
+##閃
+##閉
+##開
+##閎
+##閏
+##閑
+##閒
+##間
+##閔
+##閘
+##閡
+##関
+##閣
+##閥
+##閨
+##閩
+##閱
+##閲
+##閹
+##閻
+##閾
+##闆
+##闇
+##闊
+##闌
+##闍
+##闔
+##闕
+##闖
+##闘
+##關
+##闡
+##闢
+##门
+##闪
+##闫
+##闭
+##问
+##闯
+##闰
+##闲
+##间
+##闵
+##闷
+##闸
+##闹
+##闺
+##闻
+##闽
+##闾
+##阀
+##阁
+##阂
+##阅
+##阆
+##阇
+##阈
+##阉
+##阎
+##阐
+##阑
+##阔
+##阕
+##阖
+##阙
+##阚
+##阜
+##队
+##阡
+##阪
+##阮
+##阱
+##防
+##阳
+##阴
+##阵
+##阶
+##阻
+##阿
+##陀
+##陂
+##附
+##际
+##陆
+##陇
+##陈
+##陋
+##陌
+##降
+##限
+##陕
+##陛
+##陝
+##陞
+##陟
+##陡
+##院
+##陣
+##除
+##陨
+##险
+##陪
+##陰
+##陲
+##陳
+##陵
+##陶
+##陷
+##陸
+##険
+##陽
+##隅
+##隆
+##隈
+##隊
+##隋
+##隍
+##階
+##随
+##隐
+##隔
+##隕
+##隘
+##隙
+##際
+##障
+##隠
+##隣
+##隧
+##隨
+##險
+##隱
+##隴
+##隶
+##隸
+##隻
+##隼
+##隽
+##难
+##雀
+##雁
+##雄
+##雅
+##集
+##雇
+##雉
+##雋
+##雌
+##雍
+##雎
+##雏
+##雑
+##雒
+##雕
+##雖
+##雙
+##雛
+##雜
+##雞
+##離
+##難
+##雨
+##雪
+##雯
+##雰
+##雲
+##雳
+##零
+##雷
+##雹
+##電
+##雾
+##需
+##霁
+##霄
+##霆
+##震
+##霈
+##霉
+##霊
+##霍
+##霎
+##霏
+##霑
+##霓
+##霖
+##霜
+##霞
+##霧
+##霭
+##霰
+##露
+##霸
+##霹
+##霽
+##霾
+##靂
+##靄
+##靈
+##青
+##靓
+##靖
+##静
+##靚
+##靛
+##靜
+##非
+##靠
+##靡
+##面
+##靥
+##靦
+##革
+##靳
+##靴
+##靶
+##靼
+##鞅
+##鞋
+##鞍
+##鞏
+##鞑
+##鞘
+##鞠
+##鞣
+##鞦
+##鞭
+##韆
+##韋
+##韌
+##韓
+##韜
+##韦
+##韧
+##韩
+##韬
+##韭
+##音
+##韵
+##韶
+##韻
+##響
+##頁
+##頂
+##頃
+##項
+##順
+##須
+##頌
+##預
+##頑
+##頒
+##頓
+##頗
+##領
+##頜
+##頡
+##頤
+##頫
+##頭
+##頰
+##頷
+##頸
+##頹
+##頻
+##頼
+##顆
+##題
+##額
+##顎
+##顏
+##顔
+##願
+##顛
+##類
+##顧
+##顫
+##顯
+##顱
+##顴
+##页
+##顶
+##顷
+##项
+##顺
+##须
+##顼
+##顽
+##顾
+##顿
+##颁
+##颂
+##预
+##颅
+##领
+##颇
+##颈
+##颉
+##颊
+##颌
+##颍
+##颐
+##频
+##颓
+##颔
+##颖
+##颗
+##题
+##颚
+##颛
+##颜
+##额
+##颞
+##颠
+##颡
+##颢
+##颤
+##颦
+##颧
+##風
+##颯
+##颱
+##颳
+##颶
+##颼
+##飄
+##飆
+##风
+##飒
+##飓
+##飕
+##飘
+##飙
+##飚
+##飛
+##飞
+##食
+##飢
+##飨
+##飩
+##飪
+##飯
+##飲
+##飼
+##飽
+##飾
+##餃
+##餅
+##餉
+##養
+##餌
+##餐
+##餒
+##餓
+##餘
+##餚
+##餛
+##餞
+##餡
+##館
+##餮
+##餵
+##餾
+##饅
+##饈
+##饋
+##饌
+##饍
+##饑
+##饒
+##饕
+##饗
+##饞
+##饥
+##饨
+##饪
+##饬
+##饭
+##饮
+##饯
+##饰
+##饱
+##饲
+##饴
+##饵
+##饶
+##饷
+##饺
+##饼
+##饽
+##饿
+##馀
+##馁
+##馄
+##馅
+##馆
+##馈
+##馋
+##馍
+##馏
+##馒
+##馔
+##首
+##馗
+##香
+##馥
+##馨
+##馬
+##馭
+##馮
+##馳
+##馴
+##駁
+##駄
+##駅
+##駆
+##駐
+##駒
+##駕
+##駛
+##駝
+##駭
+##駱
+##駿
+##騁
+##騎
+##騏
+##験
+##騙
+##騨
+##騰
+##騷
+##驀
+##驅
+##驊
+##驍
+##驒
+##驕
+##驗
+##驚
+##驛
+##驟
+##驢
+##驥
+##马
+##驭
+##驮
+##驯
+##驰
+##驱
+##驳
+##驴
+##驶
+##驷
+##驸
+##驹
+##驻
+##驼
+##驾
+##驿
+##骁
+##骂
+##骄
+##骅
+##骆
+##骇
+##骈
+##骊
+##骋
+##验
+##骏
+##骐
+##骑
+##骗
+##骚
+##骛
+##骜
+##骞
+##骠
+##骡
+##骤
+##骥
+##骧
+##骨
+##骯
+##骰
+##骶
+##骷
+##骸
+##骼
+##髂
+##髅
+##髋
+##髏
+##髒
+##髓
+##體
+##髖
+##高
+##髦
+##髪
+##髮
+##髯
+##髻
+##鬃
+##鬆
+##鬍
+##鬓
+##鬚
+##鬟
+##鬢
+##鬣
+##鬥
+##鬧
+##鬱
+##鬼
+##魁
+##魂
+##魄
+##魅
+##魇
+##魍
+##魏
+##魔
+##魘
+##魚
+##魯
+##魷
+##鮑
+##鮨
+##鮪
+##鮭
+##鮮
+##鯉
+##鯊
+##鯖
+##鯛
+##鯨
+##鯰
+##鯽
+##鰍
+##鰓
+##鰭
+##鰲
+##鰻
+##鰾
+##鱈
+##鱉
+##鱔
+##鱗
+##鱷
+##鱸
+##鱼
+##鱿
+##鲁
+##鲈
+##鲍
+##鲑
+##鲛
+##鲜
+##鲟
+##鲢
+##鲤
+##鲨
+##鲫
+##鲱
+##鲲
+##鲶
+##鲷
+##鲸
+##鳃
+##鳄
+##鳅
+##鳌
+##鳍
+##鳕
+##鳖
+##鳗
+##鳝
+##鳞
+##鳥
+##鳩
+##鳳
+##鳴
+##鳶
+##鴉
+##鴕
+##鴛
+##鴦
+##鴨
+##鴻
+##鴿
+##鵑
+##鵜
+##鵝
+##鵡
+##鵬
+##鵰
+##鵲
+##鶘
+##鶩
+##鶯
+##鶴
+##鷗
+##鷲
+##鷹
+##鷺
+##鸚
+##鸞
+##鸟
+##鸠
+##鸡
+##鸢
+##鸣
+##鸥
+##鸦
+##鸨
+##鸪
+##鸭
+##鸯
+##鸳
+##鸵
+##鸽
+##鸾
+##鸿
+##鹂
+##鹃
+##鹄
+##鹅
+##鹈
+##鹉
+##鹊
+##鹌
+##鹏
+##鹑
+##鹕
+##鹘
+##鹜
+##鹞
+##鹤
+##鹦
+##鹧
+##鹫
+##鹭
+##鹰
+##鹳
+##鹵
+##鹹
+##鹼
+##鹽
+##鹿
+##麂
+##麋
+##麒
+##麓
+##麗
+##麝
+##麟
+##麥
+##麦
+##麩
+##麴
+##麵
+##麸
+##麺
+##麻
+##麼
+##麽
+##麾
+##黃
+##黄
+##黍
+##黎
+##黏
+##黑
+##黒
+##黔
+##默
+##黛
+##黜
+##黝
+##點
+##黠
+##黨
+##黯
+##黴
+##鼋
+##鼎
+##鼐
+##鼓
+##鼠
+##鼬
+##鼹
+##鼻
+##鼾
+##齁
+##齊
+##齋
+##齐
+##齒
+##齡
+##齢
+##齣
+##齦
+##齿
+##龄
+##龅
+##龈
+##龊
+##龋
+##龌
+##龍
+##龐
+##龔
+##龕
+##龙
+##龚
+##龛
+##龜
+##龟
+##︰
+##︱
+##︶
+##︿
+##﹁
+##﹂
+##﹍
+##﹏
+##﹐
+##﹑
+##﹒
+##﹔
+##﹕
+##﹖
+##﹗
+##﹙
+##﹚
+##﹝
+##﹞
+##﹡
+##﹣
+##！
+##＂
+##＃
+##＄
+##％
+##＆
+##＇
+##（
+##）
+##＊
+##，
+##－
+##．
+##／
+##：
+##；
+##＜
+##？
+##＠
+##［
+##＼
+##］
+##＾
+##＿
+##｀
+##ｆ
+##ｈ
+##ｊ
+##ｕ
+##ｗ
+##ｚ
+##｛
+##｝
+##｡
+##｢
+##｣
+##､
+##･
+##ｯ
+##ｰ
+##ｲ
+##ｸ
+##ｼ
+##ｽ
+##ﾄ
+##ﾉ
+##ﾌ
+##ﾗ
+##ﾙ
+##ﾝ
+##ﾞ
+##ﾟ
+##￣
+##￥
+##👍
+##🔥
+##😂
+##😎

From cb8e732ece762c610f5c5d53868b149cf916df75 Mon Sep 17 00:00:00 2001
From: jiangjinsheng <jiangjinsheng@huawei.com>
Date: Tue, 28 Apr 2020 18:06:20 +0800
Subject: [PATCH 157/242] add examples for nn_ops.py

---
 mindspore/ops/operations/nn_ops.py | 52 +++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 66656b559e..39a90f9424 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -111,6 +111,12 @@ class Softmax(PrimitiveWithInfer):
 
     Outputs:
         Tensor, with the same type and shape as the logits.
+
+    Examples:
+        >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
+        >>> softmax = P.Softmax()
+        >>> softmax(input_x)
+        [0.01165623, 0.03168492, 0.08612854, 0.23412167, 0.6364086]
     """
 
     @prim_attr_register
@@ -155,6 +161,12 @@ class LogSoftmax(PrimitiveWithInfer):
 
     Outputs:
         Tensor, with the same type and shape as the logits.
+
+    Examples:
+        >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
+        >>> log_softmax = P.LogSoftmax()
+        >>> log_softmax(input_x)
+        [-4.4519143, -3.4519143, -2.4519143, -1.4519144, -0.4519144]
     """
 
     @prim_attr_register
@@ -375,6 +387,11 @@ class Sigmoid(PrimitiveWithInfer):
     Outputs:
         Tensor, with the same type and shape as the input_x.
 
+    Examples:
+        >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
+        >>> sigmoid = P.Sigmoid()
+        >>> sigmoid(input_x)
+        [0.73105866, 0.880797, 0.9525742, 0.98201376, 0.9933071]
     """
 
     @prim_attr_register
@@ -438,6 +455,12 @@ class Tanh(PrimitiveWithInfer):
 
     Outputs:
         Tensor, with the same type and shape as the input_x.
+
+    Examples:
+        >>> input_x = Tensor(np.array([1, 2, 3, 4, 5]), mindspore.float32)
+        >>> tanh = P.Tanh()
+        >>> tanh(input_x)
+        [0.7615941, 0.9640276, 0.9950548, 0.9993293, 0.99990916]
     """
 
     @prim_attr_register
@@ -547,6 +570,15 @@ class BatchNorm(PrimitiveWithInfer):
         - **reserve_space_1** (Tensor) - Tensor of shape :math:`(C,)`.
         - **reserve_space_2** (Tensor) - Tensor of shape :math:`(C,)`.
         - **reserve_space_3** (Tensor) - Tensor of shape :math:`(C,)`.
+
+    Examples:
+        >>> input_x = Tensor(np.ones([128, 64, 32, 64]), mindspore.float32)
+        >>> scale = Tensor(np.ones([64]), mindspore.float32)
+        >>> bias = Tensor(np.ones([64]), mindspore.float32)
+        >>> mean = Tensor(np.ones([64]), mindspore.float32)
+        >>> variance = Tensor(np.ones([64]), mindspore.float32)
+        >>> batch_norm = P.BatchNorm()
+        >>> output = batch_norm(input_x, scale, bias, mean, variance)
     """
 
     @prim_attr_register
@@ -1189,6 +1221,12 @@ class BiasAdd(PrimitiveWithInfer):
 
     Outputs:
         Tensor, with the same shape and type as `input_x`.
+
+    Examples:
+        >>> input_x = Tensor(np.arange(6).reshape((2, 3)), mindspore.float32)
+        >>> bias = Tensor(np.random.random(3).reshape((3,)), mindspore.float32)
+        >>> bias_add = P.BiasAdd()
+        >>> bias_add(input_x, bias)
     """
 
     @prim_attr_register
@@ -1277,7 +1315,12 @@ class SoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
         Tuple of 2 Tensor, the loss shape is `(N,)`, and the dlogits with the same shape as `logits`.
 
     Examples:
-        Please refer to the usage in nn.SoftmaxCrossEntropyWithLogits source code.
+        >>> logits = Tensor([[2, 4, 1, 4, 5], [2, 1, 2, 4, 3]], mindspore.float32)
+        >>> labels = Tensor([[0, 0, 0, 0, 1], [0, 0, 0, 1, 0]], mindspore.float32)
+        >>> softmax_cross = P.SoftmaxCrossEntropyWithLogits()
+        >>> loss, backprop = softmax_cross(logits, labels)
+        ([0.5899297, 0.52374405], [[0.02760027, 0.20393994, 0.01015357, 0.20393994, -0.44563377],
+        [0.08015892, 0.02948882, 0.08015892, -0.4077012, 0.21789455]])
     """
 
     @prim_attr_register
@@ -1421,6 +1464,13 @@ class SmoothL1Loss(PrimitiveWithInfer):
 
     Outputs:
         Tensor, with the same type and shape as `prediction`.
+
+    Examples:
+        >>> loss = P.SmoothL1Loss()
+        >>> input_data = Tensor(np.array([1, 2, 3]), mindspore.float32)
+        >>> target_data = Tensor(np.array([1, 2, 2]), mindspore.float32)
+        >>> loss(input_data, target_data)
+        [0, 0, 0.5]
     """
 
     @prim_attr_register

From f77de54aa4769cb021c79ac5701442c8993fb065 Mon Sep 17 00:00:00 2001
From: dinghao <dinghao7@huawei.com>
Date: Tue, 28 Apr 2020 15:44:45 +0800
Subject: [PATCH 158/242] fix tensor dirty

---
 mindspore/ccsrc/ir/meta_tensor.cc        |  4 +++-
 mindspore/ccsrc/session/session_basic.cc |  3 ++-
 tests/st/ops/gpu/test_assign_add_op.py   | 15 +++++++++------
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/mindspore/ccsrc/ir/meta_tensor.cc b/mindspore/ccsrc/ir/meta_tensor.cc
index de59cb66d9..8718a82cbc 100644
--- a/mindspore/ccsrc/ir/meta_tensor.cc
+++ b/mindspore/ccsrc/ir/meta_tensor.cc
@@ -164,8 +164,9 @@ Tensor::Tensor(const py::float_ &input, const TypePtr &data_type) { init(py::arr
 Tensor::Tensor(const py::int_ &input, const TypePtr &data_type) { init(py::array(input), data_type); }
 
 Tensor::Tensor(const Tensor &tensor, const TypePtr &data_type)
-    : MetaTensor(tensor), dirty_(tensor.dirty_), device_address_(tensor.device_address_) {
+    : MetaTensor(tensor), device_address_(tensor.device_address_) {
   init(tensor.data_, data_type);
+  dirty_ = tensor.is_dirty();
 }
 
 Tensor &Tensor::operator=(const Tensor &tensor) {
@@ -291,6 +292,7 @@ void Tensor::init(const py::array &input, const TypeId &data_type) {
   } else {
     data_ = input;
   }
+  dirty_ = true;
 }
 
 void Tensor::init(TypeId data_type, const std::vector<int> &shape, py::array *const data) {
diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc
index cb9e5c4dc9..223cf6963c 100755
--- a/mindspore/ccsrc/session/session_basic.cc
+++ b/mindspore/ccsrc/session/session_basic.cc
@@ -127,6 +127,7 @@ BaseRef CreateOneTensor(const AnfNodePtr &node, size_t output_index, const Kerne
   MS_EXCEPTION_IF_NULL(ms_context);
   if (ms_context->enable_pynative_infer()) {
     tensor->set_device_address(AnfAlgo::GetMutableOutputAddr(node, output_index));
+    tensor->set_dirty(false);
   } else if (!address->SyncDeviceToHost(trans::GetRuntimePaddingShape(node, output_index),
                                         LongToSize(tensor->data().nbytes()), tensor->data_type(),
                                         tensor->data_c(true))) {
@@ -491,7 +492,7 @@ void SessionBasic::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_grap
           need_sync = true;
         }
       } else {
-        if (tensor->is_dirty() || !AnfAlgo::IsParameterWeight(pk_node)) {
+        if (tensor->is_dirty()) {
           need_sync = true;
         } else if (tensor->device_address() != device_address) {
           (void)tensor->data_sync();
diff --git a/tests/st/ops/gpu/test_assign_add_op.py b/tests/st/ops/gpu/test_assign_add_op.py
index f8faf2be64..b021a32f32 100644
--- a/tests/st/ops/gpu/test_assign_add_op.py
+++ b/tests/st/ops/gpu/test_assign_add_op.py
@@ -51,19 +51,22 @@ def test_assign_add():
                          [[54, 57, 60],
                           [63, 66, 69],
                           [72, 75, 78]]]])
-    x = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32))
-    y = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32))
+    x1 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32))
+    y1 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32))
+
+    x2 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32))
+    y2 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32))
 
     context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
     add = AssignAdd()
-    output1 = add(x, y)
+    output1 = add(x1, y1)
     assert (output1.asnumpy() == expect1).all()
-    output2 = add(output1, y)
+    output2 = add(output1, y1)
     assert (output2.asnumpy() == expect2).all()
 
     context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
     add = AssignAdd()
-    output1 = add(x, y)
+    output1 = add(x2, y2)
     assert (output1.asnumpy() == expect1).all()
-    output2 = add(output1, y)
+    output2 = add(output1, y2)
     assert (output2.asnumpy() == expect2).all()

From 0f89cc1da44428ee63afa9886c4c3db8d75e952c Mon Sep 17 00:00:00 2001
From: buxue <yiren19920727@163.com>
Date: Mon, 27 Apr 2020 14:42:30 +0800
Subject: [PATCH 159/242] dock AcoshGrad for GE and AvgPool AvgPoolGrad for Vm

---
 mindspore/ccsrc/kernel/tbe/tbe_adapter.cc     |  1 +
 mindspore/ccsrc/operator/ops.cc               |  1 +
 mindspore/ccsrc/operator/ops.h                |  1 +
 .../pass/const_input_to_attr_registry.cc      |  1 +
 mindspore/ccsrc/transform/convert.cc          |  2 +
 mindspore/ccsrc/transform/op_declare.cc       |  5 +
 mindspore/ccsrc/transform/op_declare.h        |  4 +-
 mindspore/nn/layer/basic.py                   |  3 +-
 mindspore/ops/_grad/grad_nn_ops.py            | 45 +++++++--
 mindspore/ops/_op_impl/tbe/__init__.py        |  2 +
 mindspore/ops/_op_impl/tbe/avg_pool.py        | 39 ++++++++
 mindspore/ops/_op_impl/tbe/avg_pool_grad.py   | 42 ++++++++
 mindspore/ops/operations/__init__.py          |  3 +-
 mindspore/ops/operations/_grad_ops.py         | 17 ++++
 mindspore/ops/operations/_inner_ops.py        | 98 +++++++++++++++++++
 mindspore/ops/operations/nn_ops.py            | 76 --------------
 tests/ut/python/ops/test_ops.py               |  4 +-
 17 files changed, 256 insertions(+), 88 deletions(-)
 create mode 100644 mindspore/ops/_op_impl/tbe/avg_pool.py
 create mode 100644 mindspore/ops/_op_impl/tbe/avg_pool_grad.py
 create mode 100644 mindspore/ops/operations/_inner_ops.py

diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
index 44750fab4f..d5be2cbd29 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
@@ -38,6 +38,7 @@ static std::map<string, string> tbe_func_adapter_map = {
   {"reduce_mean", "reduce_mean_d"},
   {"reduce_max", "reduce_max_d"},
   {"reduce_min", "reduce_min_d"},
+  {"avg_pool_grad", "avg_pool_grad_d"},
   {"conv2d_backprop_filter", "conv2d_backprop_filter_d"},
   {"conv2d_backprop_input", "conv2d_backprop_input_d"},
   {"depthwise_conv2d_native", "depthwise_conv2d"},
diff --git a/mindspore/ccsrc/operator/ops.cc b/mindspore/ccsrc/operator/ops.cc
index 407efe5689..6510ef79ea 100755
--- a/mindspore/ccsrc/operator/ops.cc
+++ b/mindspore/ccsrc/operator/ops.cc
@@ -170,6 +170,7 @@ const PrimitivePtr kPrimPooling = std::make_shared<Primitive>("Pooling");
 const PrimitivePtr kPrimPoolingGrad = std::make_shared<Primitive>("PoolingGrad");
 const PrimitivePtr kPrimMaxPool = std::make_shared<Primitive>("MaxPool");
 const PrimitivePtr kPrimMaxPoolGrad = std::make_shared<Primitive>("MaxPoolGrad");
+const PrimitivePtr kPrimAvgPoolGrad = std::make_shared<Primitive>("AvgPoolGrad");
 const PrimitivePtr kPrimFusedBatchNorm = std::make_shared<Primitive>("FusedBatchNorm");
 const PrimitivePtr kPrimConv2D = std::make_shared<Primitive>("Conv2D");
 const PrimitivePtr kPrimFusedBatchNormGrad = std::make_shared<Primitive>("FusedBatchNormGrad");
diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/operator/ops.h
index e938e5c64e..b37d068d94 100755
--- a/mindspore/ccsrc/operator/ops.h
+++ b/mindspore/ccsrc/operator/ops.h
@@ -178,6 +178,7 @@ extern const PrimitivePtr kPrimFusedBatchNorm;
 extern const PrimitivePtr kPrimConv2D;
 extern const PrimitivePtr kPrimMaxPool;
 extern const PrimitivePtr kPrimMaxPoolGrad;
+extern const PrimitivePtr kPrimAvgPoolGrad;
 extern const PrimitivePtr kPrimFusedBatchNormGrad;
 extern const PrimitivePtr kPrimReluGrad;
 extern const PrimitivePtr kPrimConv2DBackpropInput;
diff --git a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc b/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
index 0b4263685b..3153a3bef9 100644
--- a/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
+++ b/mindspore/ccsrc/pre_activate/pass/const_input_to_attr_registry.cc
@@ -25,6 +25,7 @@ namespace mindspore {
 namespace opt {
 ConstInputToAttrInfoRegistry::ConstInputToAttrInfoRegistry() {
   Register(prim::kPrimCast->name(), {1});
+  Register(prim::kPrimAvgPoolGrad->name(), {0});
   Register(prim::kPrimConv2DBackpropInput->name(), {2});
   Register(prim::kPrimConv2DBackpropFilter->name(), {2});
   Register(prim::kPrimDepthwiseConv2dNativeBackpropFilter->name(), {1});
diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
index 177f939f37..e7ea44b555 100644
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -178,6 +178,7 @@ const char kNameBinaryCrossEntropy[] = "BinaryCrossEntropy";
 const char kNameBinaryCrossEntropyGrad[] = "BinaryCrossEntropyGrad";
 const char kNameSparseApplyAdagrad[] = "SparseApplyAdagrad";
 const char kNameAcosh[] = "Acosh";
+const char kNameAcoshGrad[] = "AcoshGrad";
 const char kNameFloorMod[] = "FloorMod";
 const char kNameSpaceToDepth[] = "SpaceToDepth";
 const char kNameDepthToSpace[] = "DepthToSpace";
@@ -375,6 +376,7 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
     {string(kNameBinaryCrossEntropyGrad), ADPT_DESC(BinaryCrossEntropyGrad)},
     {string(kNameSparseApplyAdagrad), ADPT_DESC(SparseApplyAdagradD)},
     {string(kNameAcosh), ADPT_DESC(Acosh)},
+    {string(kNameAcoshGrad), ADPT_DESC(AcoshGrad)},
     {string(kNameFloorMod), ADPT_DESC(FloorMod)},
     {string(kNameSpaceToDepth), ADPT_DESC(SpaceToDepth)},
     {string(kNameDepthToSpace), ADPT_DESC(DepthToSpace)},
diff --git a/mindspore/ccsrc/transform/op_declare.cc b/mindspore/ccsrc/transform/op_declare.cc
index 477c915b15..b1195cfb1c 100644
--- a/mindspore/ccsrc/transform/op_declare.cc
+++ b/mindspore/ccsrc/transform/op_declare.cc
@@ -357,6 +357,11 @@ INPUT_MAP(Acosh) = {{1, INPUT_DESC(x)}};
 ATTR_MAP(Acosh) = EMPTY_ATTR_MAP;
 OUTPUT_MAP(Acosh) = {{0, OUTPUT_DESC(y)}};
 
+// AcoshGrad
+INPUT_MAP(AcoshGrad) = {{1, INPUT_DESC(y)}, {2, INPUT_DESC(dy)}};
+ATTR_MAP(AcoshGrad) = EMPTY_ATTR_MAP;
+OUTPUT_MAP(AcoshGrad) = {{0, OUTPUT_DESC(z)}};
+
 // Floor
 INPUT_MAP(Floor) = {{1, INPUT_DESC(x)}};
 ATTR_MAP(Floor) = EMPTY_ATTR_MAP;
diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/op_declare.h
index 3be3546455..a2dc16c285 100755
--- a/mindspore/ccsrc/transform/op_declare.h
+++ b/mindspore/ccsrc/transform/op_declare.h
@@ -327,13 +327,15 @@ DECLARE_OP_ADAPTER(Const)
 DECLARE_OP_USE_OUTPUT(Const)
 DECLARE_OP_ADAPTER(Cos)
 DECLARE_OP_USE_OUTPUT(Cos)
+
 DECLARE_OP_ADAPTER(Acos)
 DECLARE_OP_USE_OUTPUT(Acos)
-
 DECLARE_OP_ADAPTER(AcosGrad)
 DECLARE_OP_USE_OUTPUT(AcosGrad)
 DECLARE_OP_ADAPTER(Acosh)
 DECLARE_OP_USE_OUTPUT(Acosh)
+DECLARE_OP_ADAPTER(AcoshGrad)
+DECLARE_OP_USE_OUTPUT(AcoshGrad)
 
 DECLARE_OP_ADAPTER(Floor)
 DECLARE_OP_USE_OUTPUT(Floor)
diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py
index 2f8b38e818..9c8de85a68 100644
--- a/mindspore/nn/layer/basic.py
+++ b/mindspore/nn/layer/basic.py
@@ -21,6 +21,7 @@ from mindspore._checkparam import check_int_positive, check_bool
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 from mindspore.ops.functional import identity
+from mindspore.ops.operations import _inner_ops as inner
 from mindspore.common.parameter import Parameter
 from mindspore._extends import cell_attr_register
 from mindspore.common.api import ms_function
@@ -480,7 +481,7 @@ class Unfold(Cell):
     """
     def __init__(self, ksizes, strides, rates, padding="valid"):
         super(Unfold, self).__init__()
-        self.extract_image_patches = P.ExtractImagePatches(ksizes, strides, rates, padding)
+        self.extract_image_patches = inner.ExtractImagePatches(ksizes, strides, rates, padding)
         self.transpose = P.Transpose()
         self.format_NHWC = (0, 2, 3, 1)
         self.format_NCHW = (0, 3, 1, 2)
diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py
index baccdbbbb2..fc94544176 100755
--- a/mindspore/ops/_grad/grad_nn_ops.py
+++ b/mindspore/ops/_grad/grad_nn_ops.py
@@ -18,6 +18,7 @@ from mindspore.common import dtype as mstype
 from .. import functional as F
 from .. import operations as P
 from ..operations import _grad_ops as G
+from ..operations import _inner_ops as inner
 from ..composite.multitype_ops.zeros_like_impl import zeros_like
 from .grad_base import bprop_getters
 
@@ -29,6 +30,7 @@ def get_bprop_bias_add(self):
 
     def bprop(x, w, out, dout):
         return dout, bias_grad(dout)
+
     return bprop
 
 
@@ -49,18 +51,19 @@ def get_bprop_conv2d(self):
         dx = input_grad(dout, w, get_shape(x))
         dw = filter_grad(dout, x, get_shape(w))
         return dx, dw
+
     return bprop
 
 
-@bprop_getters.register(P.ExtractImagePatches)
+@bprop_getters.register(inner.ExtractImagePatches)
 def get_bprop_extract_image_patches(self):
     """Grad definition for `ExtractImagePatches` operation."""
     get_shape = P.Shape()
     reshape = P.Reshape()
-    extract_image_patches = P.ExtractImagePatches(ksizes=self.ksizes,
-                                                  strides=self.strides,
-                                                  rates=self.rates,
-                                                  padding=self.padding)
+    extract_image_patches = inner.ExtractImagePatches(ksizes=self.ksizes,
+                                                      strides=self.strides,
+                                                      rates=self.rates,
+                                                      padding=self.padding)
     concat = P.Concat(axis=-1)
     expand_dims = P.ExpandDims()
     scatter_nd = P.ScatterNd()
@@ -104,6 +107,7 @@ def get_bprop_extract_image_patches(self):
         dx = transpose(dx, (2, 0, 1, 3))
 
         return (dx,)
+
     return bprop
 
 
@@ -124,6 +128,7 @@ def get_bprop_depthwise_conv2d_native(self):
         dx = input_grad(get_shape(x), w, dout)
         dw = filter_grad(x, get_shape(w), dout)
         return dx, dw
+
     return bprop
 
 
@@ -133,11 +138,12 @@ def get_bprop_max_pool_with_argmax(self):
     maxpool_grad = G.MaxPoolGradWithArgmax(
         ksize=self.ksize,
         strides=self.strides,
-        padding=self.padding,)
+        padding=self.padding)
 
     def bprop(x, out, dout):
         dx = maxpool_grad(x, dout[0], out[1])
         return (dx,)
+
     return bprop
 
 
@@ -152,6 +158,7 @@ def get_bprop_max_pool_grad(self):
     def bprop(x, out, dout):
         dx = maxpool_grad(x, out, dout)
         return (dx,)
+
     return bprop
 
 
@@ -192,6 +199,7 @@ def get_bprop_dropout_gen_mask(self):
 
     def bprop(shape, keep_prob, out, dout):
         return (zeros_like(shape), zeros_like(keep_prob))
+
     return bprop
 
 
@@ -202,6 +210,7 @@ def get_bprop_dropout_do_mask(self):
 
     def bprop(x, y, keep_prob, out, dout):
         return (do_mask(dout, y, keep_prob), zeros_like(y), zeros_like(keep_prob))
+
     return bprop
 
 
@@ -213,6 +222,7 @@ def get_bprop_relu(self):
     def bprop(x, out, dout):
         dx = input_grad(dout, out)
         return (dx,)
+
     return bprop
 
 
@@ -224,6 +234,7 @@ def get_bprop_relu6(self):
     def bprop(x, out, dout):
         dx = input_grad(dout, x)
         return (dx,)
+
     return bprop
 
 
@@ -236,6 +247,7 @@ def get_bprop_relu_v2(self):
         mask = out[1]
         dx = input_grad(dout[0], mask)
         return (dx,)
+
     return bprop
 
 
@@ -247,6 +259,7 @@ def get_bprop_hswish(self):
     def bprop(x, out, dout):
         dx = input_grad(dout, x)
         return (dx,)
+
     return bprop
 
 
@@ -258,6 +271,7 @@ def get_bprop_hsigmoid(self):
     def bprop(x, out, dout):
         dx = input_grad(dout, x)
         return (dx,)
+
     return bprop
 
 
@@ -269,6 +283,7 @@ def get_bprop_elu(self):
     def bprop(x, out, dout):
         dx = input_grad(dout, x)
         return (dx,)
+
     return bprop
 
 
@@ -280,6 +295,7 @@ def get_bprop_sigmoid(self):
     def bprop(x, out, dout):
         dx = input_grad(out, dout)
         return (dx,)
+
     return bprop
 
 
@@ -294,6 +310,7 @@ def get_bprop_softmax(self):
     def bprop(x, out, dout):
         dx = mul(sub(dout, sum_func(mul(dout, out), axis)), out)
         return (dx,)
+
     return bprop
 
 
@@ -305,6 +322,7 @@ def get_bprop_log_softmax(self):
     def bprop(x, out, dout):
         dx = logsoftmax_grad(out, dout)
         return (dx,)
+
     return bprop
 
 
@@ -316,6 +334,7 @@ def get_bprop_tanh(self):
     def bprop(x, out, dout):
         dx = logsoftmax_grad(out, dout)
         return (dx,)
+
     return bprop
 
 
@@ -327,6 +346,7 @@ def get_bprop_gelu(self):
     def bprop(x, out, dout):
         dx = input_grad(dout, x, out)
         return (dx,)
+
     return bprop
 
 
@@ -343,6 +363,7 @@ def get_bprop_fused_batch_norm(self):
         dscale = out[1]
         dbias = out[2]
         return dx, dscale, dbias, zeros_like(mean), zeros_like(variance)
+
     return bprop
 
 
@@ -366,6 +387,7 @@ def get_bprop_batch_norm(self):
         dscale = out[1]
         dbias = out[2]
         return dx, dscale, dbias, zeros_like(mean), zeros_like(variance)
+
     return bprop
 
 
@@ -377,6 +399,7 @@ def get_bprop_layer_norm(self):
     def bprop(x, gamma, beta, out, dout):
         dx, d_gamma, d_beta = layer_norm_grad(x, dout[0], out[2], out[1], gamma)
         return dx, d_gamma, d_beta
+
     return bprop
 
 
@@ -388,6 +411,7 @@ def get_bprop_l2normalize(self):
     def bprop(x, out, dout):
         dx = input_grad(x, out, dout)
         return (dx,)
+
     return bprop
 
 
@@ -400,6 +424,7 @@ def get_bprop_softmax_cross_entropy_with_logits(self):
         grad = out[1]
         grad = grad * expand(dout[0], -1)
         return grad, zeros_like(labels)
+
     return bprop
 
 
@@ -417,6 +442,7 @@ def get_bprop_sparse_softmax_cross_entropy_with_logits(self):
             grad = F.depend(grad, out)
             grad = grad * dout
         return grad, zeros_like(labels)
+
     return bprop
 
 
@@ -428,6 +454,7 @@ def get_bprop_resize_bilinear(self):
     def bprop(x, out, dout):
         dx = resize_grad(dout, x)
         return (dx,)
+
     return bprop
 
 
@@ -437,6 +464,7 @@ def get_bprop_onehot(self):
 
     def bprop(indices, depth, on_value, off_value, out, dout):
         return zeros_like(indices), zeros_like(depth), zeros_like(on_value), zeros_like(off_value)
+
     return bprop
 
 
@@ -453,6 +481,7 @@ def get_bprop_top_kv2(self):
         updates = dout[0]
         shapes = shape_op(input_x)
         return scatter(indices, updates, shapes), zeros_like(k)
+
     return bprop
 
 
@@ -518,6 +547,7 @@ def get_bprop_lstm(self):
         dx, dhx, dcx = lstm_grad_data(y, dy, dhy, dcy, w, hx, cx, reserve, state)
         dw = lstm_grad_weight(F.depend(x, dx), hx, y, reserve, state)
         return dx, dhx, dcx, dw
+
     return bprop
 
 
@@ -529,6 +559,7 @@ def get_bprop_sigmoid_crossentropy_with_logits(self):
     def bprop(x, y, out, dout):
         dx = op(x, y, dout)
         return (dx, zeros_like(y))
+
     return bprop
 
 
@@ -545,6 +576,7 @@ def get_bprop_pad(self):
         shp = shape_op(x)
         dx = P.Slice()(dout, begin, shp)
         return (dx,)
+
     return bprop
 
 
@@ -556,6 +588,7 @@ def get_bprop_mirror_pad(self):
     def bprop(x, paddings, out, dout):
         dx = mirror_pad_grad(dout, paddings, x)
         return (dx, zeros_like(paddings))
+
     return bprop
 
 
diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py
index f9240ee325..ce1e02e915 100644
--- a/mindspore/ops/_op_impl/tbe/__init__.py
+++ b/mindspore/ops/_op_impl/tbe/__init__.py
@@ -151,3 +151,5 @@ from .greater_equal import _greater_equal_tbe
 from .not_equal import _not_equal_tbe
 from .floor_mod import _floor_mod_tbe
 from .scatter_nd_update import _scatter_nd_update_tbe
+from .avg_pool import _avg_pool_tbe
+from .avg_pool_grad import _avg_pool_grad_tbe
diff --git a/mindspore/ops/_op_impl/tbe/avg_pool.py b/mindspore/ops/_op_impl/tbe/avg_pool.py
new file mode 100644
index 0000000000..5db5947b01
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/avg_pool.py
@@ -0,0 +1,39 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""AvgPool op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+avg_pool_op_info = TBERegOp("AvgPool") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("avg_pool.so") \
+    .compute_cost(10) \
+    .kernel_name("avg_pool") \
+    .partial_flag(True) \
+    .attr("ksize", "required", "listInt", "all") \
+    .attr("strides", "required", "listInt", "all") \
+    .attr("padding", "required", "str", "all") \
+    .attr("data_format", "optional", "str", "all") \
+    .input(0, "x", False, "required", "all") \
+    .output(0, "y", False, "required", "all") \
+    .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \
+    .get_op_info()
+
+
+@op_info_register(avg_pool_op_info)
+def _avg_pool_tbe():
+    """AvgPool TBE register"""
+    return
diff --git a/mindspore/ops/_op_impl/tbe/avg_pool_grad.py b/mindspore/ops/_op_impl/tbe/avg_pool_grad.py
new file mode 100644
index 0000000000..693636edcd
--- /dev/null
+++ b/mindspore/ops/_op_impl/tbe/avg_pool_grad.py
@@ -0,0 +1,42 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""AvgPoolGrad op"""
+from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
+
+avg_pool_grad_op_info = TBERegOp("AvgPoolGrad") \
+    .fusion_type("OPAQUE") \
+    .async_flag(False) \
+    .binfile_name("avg_pool_grad_d.so") \
+    .compute_cost(10) \
+    .kernel_name("avg_pool_grad_d") \
+    .partial_flag(True) \
+    .attr("x_origin", "required", "listInt", "all") \
+    .attr("ksize", "required", "listInt", "all") \
+    .attr("strides", "required", "listInt", "all") \
+    .attr("padding", "required", "str", "all") \
+    .attr("data_format", "optional", "str", "all") \
+    .input(0, "input_grad", False, "required", "all") \
+    .input(1, "mean_matrix", False, "optional", "all") \
+    .input(2, "kernel_matrix", False, "optional", "all") \
+    .output(0, "out_grad", True, "required", "all") \
+    .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_C1HWNCoC0, DataType.F16_5HD) \
+    .get_op_info()
+
+
+@op_info_register(avg_pool_grad_op_info)
+def _avg_pool_grad_tbe():
+    """AvgPoolGrad TBE register"""
+    return
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index 868d3b359e..d83f5accd0 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -57,7 +57,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
                      Gelu, Elu,
                      GetNext, L2Normalize, LayerNorm, L2Loss,
                      LogSoftmax,
-                     MaxPool, ExtractImagePatches,
+                     MaxPool,
                      AvgPool, Conv2DBackpropInput, ConfusionMulGrad,
                      MaxPoolWithArgmax, OneHot, Pad, MirrorPad, PReLU, ReLU, ReLU6, ReLUV2, HSwish, HSigmoid,
                      ResizeBilinear, Sigmoid,
@@ -89,7 +89,6 @@ __all__ = [
     'Sqrt',
     'Square',
     'Conv2D',
-    'ExtractImagePatches',
     'Flatten',
     'MaxPoolWithArgmax',
     'FusedBatchNorm',
diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py
index e130dcc382..747caa7a96 100644
--- a/mindspore/ops/operations/_grad_ops.py
+++ b/mindspore/ops/operations/_grad_ops.py
@@ -59,6 +59,23 @@ class ACosGrad(PrimitiveWithInfer):
         return x
 
 
+class AcoshGrad(PrimitiveWithInfer):
+    """Performs grad of Acosh operation."""
+
+    @prim_attr_register
+    def __init__(self):
+        """init AcoshGrad"""
+
+    def infer_shape(self, x, dout):
+        validator.check("x shape", x, "dout shape", dout, Rel.EQ, self.name)
+        return x
+
+    def infer_dtype(self, x, dout):
+        args = {"x": x, "dout": dout}
+        validator.check_tensor_type_same(args, mstype.number_type, self.name)
+        return x
+
+
 class BatchNormGrad(PrimitiveWithInfer):
     """Performs grad of BatchNorm operation."""
 
diff --git a/mindspore/ops/operations/_inner_ops.py b/mindspore/ops/operations/_inner_ops.py
new file mode 100644
index 0000000000..632f9c0a20
--- /dev/null
+++ b/mindspore/ops/operations/_inner_ops.py
@@ -0,0 +1,98 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Inner operators."""
+
+from ..._checkparam import Validator as validator
+from ...common import dtype as mstype
+from ..primitive import  PrimitiveWithInfer, prim_attr_register
+
+
+class ExtractImagePatches(PrimitiveWithInfer):
+    """
+    Extract patches from images.
+    The input tensor must be a 4-D tensor and the data format is NHWC.
+
+    Args:
+        ksizes (Union[tuple[int], list[int]]): The size of sliding window, should be a tuple or list of int,
+            and the format is [1, ksize_row, ksize_col, 1].
+        strides (Union[tuple[int], list[int]]): Distance between the centers of the two consecutive patches,
+            should be a tuple or list of int, and the format is [1, stride_row, stride_col, 1].
+        rates (Union[tuple[int], list[int]]): In each extracted patch, the gap between the corresponding dim
+            pixel positions, should be a tuple or list of int, and the format is [1, rate_row, rate_col, 1].
+        padding (str): The type of padding algorithm, is a string whose value is "same" or "valid",
+            not case sensitive. Default: "valid".
+
+            - same: Means that the patch can take the part beyond the original image, and this part is filled with 0.
+
+            - valid: Means that the patch area taken must be completely contained in the original image.
+
+    Inputs:
+        - **input_x** (Tensor) - A 4-D tensor whose shape is [in_batch, in_row, in_col, in_depth] and
+          data type is int8, float16, uint8.
+
+    Outputs:
+        Tensor, a 4-D tensor whose data type is same as 'input_x',
+        and the shape is [out_batch, out_row, out_col, out_depth], the out_batch is same as the in_batch.
+    """
+
+    @prim_attr_register
+    def __init__(self, ksizes, strides, rates, padding="valid"):
+        """init"""
+        def _check_tuple_or_list(arg_name, arg_val, prim_name):
+            validator.check_value_type(f"{arg_name}s", ksizes, [tuple, list], self.name)
+            if len(arg_val) != 4 or arg_val[0] != 1 or arg_val[3] != 1:
+                raise ValueError(f"For \'{prim_name}\' the format of {arg_name}s should be [1, {arg_name}_row, "
+                                 f"{arg_name}_col, 1], but got {arg_val}.")
+            if not isinstance(arg_val[1], int) or not isinstance(arg_val[2], int) or arg_val[1] < 1 or arg_val[2] < 1:
+                raise ValueError(f"For '{prim_name}' the {arg_name}_row and {arg_name}_col in {arg_name}s should be an "
+                                 f"positive integer number, but got {arg_name}_row is {arg_val[1]}, {arg_name}_col "
+                                 f"is {arg_val[2]}")
+
+        _check_tuple_or_list("ksize", ksizes, self.name)
+        _check_tuple_or_list("stride", strides, self.name)
+        _check_tuple_or_list("rate", rates, self.name)
+        self.padding = validator.check_string('padding', padding.upper(), ['VALID', 'SAME'], self.name)
+        self.add_prim_attr("padding", self.padding)
+
+    def infer_shape(self, input_x):
+        """infer shape"""
+        in_batch, in_row, in_col, in_depth = input_x
+        _, ksize_row, ksize_col, _ = self.ksizes
+        _, stride_row, stride_col, _ = self.strides
+        _, rate_row, rate_col, _ = self.rates
+        if len(input_x) != 4:
+            raise ValueError("The `input_x` should be a 4-D tensor, "
+                             f"but got a {len(input_x)}-D tensor whose shape is {input_x}")
+
+        out_batch = in_batch
+        out_depth = ksize_row * ksize_col * in_depth
+
+        if self.padding == "VALID":
+            out_row = \
+                (in_row - (ksize_row + (ksize_row - 1) * (rate_row - 1))) // stride_row + 1
+            out_col = \
+                (in_col - (ksize_col + (ksize_col - 1) * (rate_col - 1))) // stride_col + 1
+        else:
+            out_row = (in_row - 1) // stride_row + 1
+            out_col = (in_col - 1) // stride_col + 1
+
+        out_shape = [out_batch, out_row, out_col, out_depth]
+        return out_shape
+
+    def infer_dtype(self, input_x):
+        """infer dtype"""
+        validator.check_tensor_type_same({"input_x": input_x}, (mstype.int8, mstype.float16, mstype.float32), self.name)
+        return input_x
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 66656b559e..c355707242 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -2654,82 +2654,6 @@ class ApplyFtrl(PrimitiveWithInfer):
         return var_type
 
 
-class ExtractImagePatches(PrimitiveWithInfer):
-    """
-    Extract patches from images.
-    The input tensor must be a 4-D tensor and the data format is NHWC.
-
-    Args:
-        ksizes (Union[tuple[int], list[int]]): The size of sliding window, should be a tuple or list of int,
-            and the format is [1, ksize_row, ksize_col, 1].
-        strides (Union[tuple[int], list[int]]): Distance between the centers of the two consecutive patches,
-            should be a tuple or list of int, and the format is [1, stride_row, stride_col, 1].
-        rates (Union[tuple[int], list[int]]): In each extracted patch, the gap between the corresponding dim
-            pixel positions, should be a tuple or list of int, and the format is [1, rate_row, rate_col, 1].
-        padding (str): The type of padding algorithm, is a string whose value is "same" or "valid",
-            not case sensitive. Default: "valid".
-
-            - same: Means that the patch can take the part beyond the original image, and this part is filled with 0.
-
-            - valid: Means that the patch area taken must be completely contained in the original image.
-
-    Inputs:
-        - **input_x** (Tensor) - A 4-D tensor whose shape is [in_batch, in_row, in_col, in_depth] and
-          data type is int8, float16, uint8.
-
-    Outputs:
-        Tensor, a 4-D tensor whose data type is same as 'input_x',
-        and the shape is [out_batch, out_row, out_col, out_depth], the out_batch is same as the in_batch.
-    """
-
-    @prim_attr_register
-    def __init__(self, ksizes, strides, rates, padding="valid"):
-        """init"""
-        def _check_tuple_or_list(arg_name, arg_val, prim_name):
-            validator.check_value_type(f"{arg_name}s", ksizes, [tuple, list], self.name)
-            if len(arg_val) != 4 or arg_val[0] != 1 or arg_val[3] != 1:
-                raise ValueError(f"For \'{prim_name}\' the format of {arg_name}s should be [1, {arg_name}_row, "
-                                 f"{arg_name}_col, 1], but got {arg_val}.")
-            if not isinstance(arg_val[1], int) or not isinstance(arg_val[2], int) or arg_val[1] < 1 or arg_val[2] < 1:
-                raise ValueError(f"For '{prim_name}' the {arg_name}_row and {arg_name}_col in {arg_name}s should be an "
-                                 f"positive integer number, but got {arg_name}_row is {arg_val[1]}, {arg_name}_col "
-                                 f"is {arg_val[2]}")
-
-        _check_tuple_or_list("ksize", ksizes, self.name)
-        _check_tuple_or_list("stride", strides, self.name)
-        _check_tuple_or_list("rate", rates, self.name)
-        self.padding = validator.check_string('padding', padding.upper(), ['VALID', 'SAME'], self.name)
-        self.add_prim_attr("padding", self.padding)
-
-    def infer_shape(self, input_x):
-        in_batch, in_row, in_col, in_depth = input_x
-        _, ksize_row, ksize_col, _ = self.ksizes
-        _, stride_row, stride_col, _ = self.strides
-        _, rate_row, rate_col, _ = self.rates
-        if len(input_x) != 4:
-            raise ValueError("The `input_x` should be a 4-D tensor, "
-                             f"but got a {len(input_x)}-D tensor whose shape is {input_x}")
-
-        out_batch = in_batch
-        out_depth = ksize_row * ksize_col * in_depth
-
-        if self.padding == "VALID":
-            out_row = \
-                (in_row - (ksize_row + (ksize_row - 1) * (rate_row - 1))) // stride_row + 1
-            out_col = \
-                (in_col - (ksize_col + (ksize_col - 1) * (rate_col - 1))) // stride_col + 1
-        else:
-            out_row = (in_row - 1) // stride_row + 1
-            out_col = (in_col - 1) // stride_col + 1
-
-        out_shape = [out_batch, out_row, out_col, out_depth]
-        return out_shape
-
-    def infer_dtype(self, input_x):
-        validator.check_tensor_type_same({"input_x": input_x}, (mstype.int8, mstype.float16, mstype.float32), self.name)
-        return input_x
-
-
 class ConfusionMulGrad(PrimitiveWithInfer):
     """
     `output0` is the result of which input0 dot multily input1.
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 3b8c50533b..68ff816fb3 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -265,8 +265,8 @@ test_case_math_ops = [
         'desc_bprop': [[2, 3]]}),
     ('Acosh', {
         'block': P.Acosh(),
-        'desc_inputs': [Tensor(np.random.rand(4).astype(np.float16))],
-        'skip': ['backward']}),
+        'desc_inputs': [[3, 4, 5]],
+        'desc_bprop': [[3, 4, 5]]}),
     ('Sin', {
         'block': P.Sin(),
         'desc_inputs': [[2, 3]],

From 3a4c28fa33c1c7c67148c878eb0826888821be8c Mon Sep 17 00:00:00 2001
From: lizhenyu <lizhenyu13@huawei.com>
Date: Tue, 28 Apr 2020 17:38:45 +0800
Subject: [PATCH 160/242] change directory of akg cuda kernel

---
 mindspore/_akg/op_build.py                    |  9 +-
 .../ccsrc/device/gpu/gpu_kernel_build.cc      |  6 +-
 .../ccsrc/device/gpu/gpu_kernel_runtime.cc    |  2 +
 mindspore/ccsrc/kernel/common_utils.cc        | 96 ++++++++-----------
 mindspore/ccsrc/kernel/common_utils.h         |  9 +-
 5 files changed, 53 insertions(+), 69 deletions(-)

diff --git a/mindspore/_akg/op_build.py b/mindspore/_akg/op_build.py
index 44a250bd9e..aa6a65cff1 100644
--- a/mindspore/_akg/op_build.py
+++ b/mindspore/_akg/op_build.py
@@ -24,13 +24,13 @@ import _akg
 from _akg import save_gpu_param as gpu_utils
 from _akg.utils import validation_check as vc_util
 
-MS_CUDA_KERNEL_PATH = "/tmp/cuda_meta/"
 
 @vc_util.check_input_type(list, (list, tuple), (list, tuple), (types.FunctionType, type(None)), str, str, dict)
 def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attrs):
     """op_build"""
+    kernel_meta_path = "./cuda_meta_" + str(os.getpid()) + "/"
     if device == "cuda":
-        cuda_path = os.path.realpath(MS_CUDA_KERNEL_PATH)
+        cuda_path = os.path.realpath(kernel_meta_path)
         if not os.path.isdir(cuda_path):
             os.makedirs(cuda_path)
         if not opnames:
@@ -43,7 +43,7 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr
             logging.error("no schedule func found %s", str(schedule_name))
             return None
 
-        ptx_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".ptx")
+        ptx_file = os.path.realpath(kernel_meta_path + kernel_name + ".ptx")
         if os.path.exists(ptx_file):
             os.chmod(ptx_file, 0o600)
         try:
@@ -55,7 +55,8 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr
                     foo = _akg.tvm.build(s, args, device, name=kernel_name)
                     ptx_code = foo.imported_modules[0].get_source("ptx")
                     file.write(ptx_code)
-                    json_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".json")
+                    json_file = os.path.realpath(
+                        kernel_meta_path + kernel_name + ".json")
                     kernel_info = (ptx_code, json_file, kernel_name)
                     gpu_utils.save_gpu_params(s, args, kernel_info)
             os.chmod(ptx_file, 0o400)
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc
index 2a2a2be065..f9d2cb878f 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_build.cc
@@ -28,11 +28,7 @@ namespace gpu {
 namespace py = pybind11;
 void GpuBuild(const KernelGraphPtr &kernel_graph) {
   kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
-  if (!bin_map->ReadIndex(kernel::kGpuKernelMeta)) {
-    MS_LOG(INFO) << "kernel cache miss, cache directory will be created later.";
-  } else {
-    MS_LOG(INFO) << "cache initialize to[" << kernel::kGpuKernelMeta << "].";
-  }
+  bin_map->Initialize();
   MS_EXCEPTION_IF_NULL(kernel_graph);
   auto kernels = kernel_graph->execution_order();
   for (const auto &kernel : kernels) {
diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
index 7eea5501d5..2d53097dd8 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
@@ -27,6 +27,7 @@
 #include "device/gpu/gpu_common.h"
 #include "common/utils.h"
 #include "device/gpu/gpu_memory_manager.h"
+#include "kernel/common_utils.h"
 
 namespace mindspore {
 namespace device {
@@ -104,6 +105,7 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
   if (mem_manager_ != nullptr) {
     mem_manager_->FreeDeviceMemory();
   }
+  kernel::KernelMeta::GetInstance()->RemoveKernelCache();
 }
 
 void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/kernel/common_utils.cc
index 5abaff412e..8316116486 100644
--- a/mindspore/ccsrc/kernel/common_utils.cc
+++ b/mindspore/ccsrc/kernel/common_utils.cc
@@ -114,53 +114,35 @@ bool IsAtomicNode(const CNodePtr &kernel_node) {
   return atomic_flag;
 }
 
-bool KernelMeta::ReadIndex(const std::string &bin_dir) {
-  DIR *dir = opendir(bin_dir.c_str());
-  if (dir == nullptr) {
+void KernelMeta::Initialize() {
+  kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/";
+  // remove old kernel cache
+  RemoveKernelCache();
+
 #if defined(_WIN32) || defined(_WIN64)
-    auto ret = mkdir(bin_dir.c_str());
+  auto ret = mkdir(kernel_meta_path_.c_str());
 #else
-    auto ret = mkdir(bin_dir.c_str(), S_IRWXG | S_IRWXU);
+  auto ret = mkdir(kernel_meta_path_.c_str(), S_IRWXG | S_IRWXU);
 #endif
-    if (ret != 0) {
-      MS_LOG(INFO) << "kernel dir not exist[" << bin_dir << "].";
-      return false;
-    }
-    dir = opendir(bin_dir.c_str());
+  if (ret != 0) {
+    MS_LOG(INFO) << "kernel dir [" << kernel_meta_path_ << "], will be created later";
   }
+  initialized_ = true;
+}
 
-  struct dirent *entry;
-  while ((entry = readdir(dir)) != nullptr) {
-    string bin_dir_tmp = bin_dir;
-    std::string cce_json = entry->d_name;
-    if (cce_json.length() <= 5) {
-      continue;
-    }
-
-    std::string suffix = cce_json.substr(cce_json.length() - 5);
-    if (suffix != kJsonSuffix) {
-      continue;
-    }
-
-    auto sp = cce_json.rfind('/');
-    if (sp != std::string::npos) {
-      continue;
-    }
-
-    sp = cce_json.rfind('.');
-    if (sp == std::string::npos) {
-      continue;
+void KernelMeta::RemoveKernelCache() {
+  if (access(kernel_meta_path_.c_str(), 0) == 0) {
+    DIR *dir = opendir(kernel_meta_path_.c_str());
+    MS_EXCEPTION_IF_NULL(dir);
+    struct dirent *entry;
+    while ((entry = readdir(dir)) != nullptr) {
+      std::string kernel_file = entry->d_name;
+      std::string kernel_file_realpath = kernel_meta_path_ + kernel_file;
+      (void)remove(kernel_file_realpath.c_str());
     }
-    auto kernel_name = cce_json.substr(0, sp);
-    (void)bin_dir_tmp.append("/");
-    (void)bin_dir_tmp.append(cce_json);
-    kernel_meta_map_[kernel_name] = bin_dir_tmp;
+    (void)closedir(dir);
+    (void)rmdir(kernel_meta_path_.c_str());
   }
-  (void)closedir(dir);
-
-  MS_LOG(INFO) << "Cache kernel initialized, kernel size[" << kernel_meta_map_.size() << "].";
-  initialized_ = true;
-  return true;
 }
 
 std::string KernelMeta::Search(const std::string &kernel_name) const {
@@ -176,11 +158,11 @@ std::string KernelMeta::Search(const std::string &kernel_name) const {
   }
 }
 
-bool KernelMeta::Insert(const std::string &kernel_name, const std::string &cce_json) {
+bool KernelMeta::Insert(const std::string &kernel_name, const std::string &kernel_json) {
   if (!initialized_) {
     return false;
   }
-  kernel_meta_map_[kernel_name] = cce_json;
+  kernel_meta_map_[kernel_name] = kernel_json;
   return true;
 }
 
@@ -191,8 +173,8 @@ bool CheckCache(const std::string &kernel_name) {
     MS_LOG(DEBUG) << "kernel cache is invalid.";
     return false;
   }
-  std::string cce_json = bin_map->Search(kernel_name);
-  bool ret = (!cce_json.empty());
+  std::string kernel_json = bin_map->Search(kernel_name);
+  bool ret = (!kernel_json.empty());
   if (ret) {
     MS_LOG(INFO) << "Kernel name:" << kernel_name << " has registed.";
   } else {
@@ -209,12 +191,12 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro
     return nullptr;
   }
 
-  std::string cce_json = bin_map->Search(kernel_name);
-  if (!cce_json.empty()) {
+  std::string kernel_json = bin_map->Search(kernel_name);
+  if (!kernel_json.empty()) {
     KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
     // just a tmp solution.
-    if (!kernel_pack->ReadFromJsonFile(cce_json, processor)) {
-      MS_LOG(DEBUG) << "Read cache json and bin file failed[" << cce_json << "].";
+    if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
+      MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "].";
       return nullptr;
     } else {
       return kernel_pack;
@@ -227,26 +209,26 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro
 
 KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor) {
   MS_LOG(INFO) << "kernel name:" << kernel_name << ", processr:" << processor;
-  std::string cce_json;
+  KernelMeta *bin_map = KernelMeta::GetInstance();
+  std::string kernel_json;
   if (processor == kProcessorAiCore || processor == kProcessorAiCpu) {
-    cce_json = kCceKernelMeta;
+    kernel_json = kCceKernelMeta;
   } else {
-    cce_json = kGpuKernelMeta;
+    kernel_json = bin_map->GetKernelMetaPath();
   }
-  (void)cce_json.append(kernel_name).append(kJsonSuffix);
+  (void)kernel_json.append(kernel_name).append(kJsonSuffix);
   KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
-  if (!kernel_pack->ReadFromJsonFile(cce_json, processor)) {
-    MS_LOG(DEBUG) << "Read json and bin file failed[" << cce_json << "].";
+  if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
+    MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "].";
     return nullptr;
   }
 
-  KernelMeta *bin_map = KernelMeta::GetInstance();
   if (bin_map == nullptr) {
     MS_LOG(DEBUG) << "kernel cache is invalid.";
     return nullptr;
   }
-  if (bin_map->Insert(kernel_name, cce_json)) {
-    MS_LOG(INFO) << "Insert to cache success[" << cce_json << "], kernelname[" << kernel_name << "].";
+  if (bin_map->Insert(kernel_name, kernel_json)) {
+    MS_LOG(INFO) << "Insert to cache success[" << kernel_json << "], kernelname[" << kernel_name << "].";
   }
   return kernel_pack;
 }
diff --git a/mindspore/ccsrc/kernel/common_utils.h b/mindspore/ccsrc/kernel/common_utils.h
index 07f191cc7b..47fe96c4c9 100644
--- a/mindspore/ccsrc/kernel/common_utils.h
+++ b/mindspore/ccsrc/kernel/common_utils.h
@@ -30,7 +30,7 @@
 namespace mindspore {
 namespace kernel {
 constexpr auto kCceKernelMeta = "./kernel_meta/";
-constexpr auto kGpuKernelMeta = "/tmp/cuda_meta/";
+constexpr auto kGpuKernelMeta = "./cuda_meta";
 constexpr auto kProcessorAiCore = "aicore";
 constexpr auto kProcessorAiCpu = "aicpu";
 constexpr auto kProcessorCuda = "cuda";
@@ -51,9 +51,11 @@ using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;
 class KernelMeta {
  public:
   KernelMeta() = default;
-  bool ReadIndex(const std::string &bin_dir);
+  void Initialize();
+  void RemoveKernelCache();
   std::string Search(const std::string &kernel_name) const;
-  bool Insert(const std::string &kernel_name, const std::string &cce_json);
+  bool Insert(const std::string &kernel_name, const std::string &kernel_json);
+  std::string GetKernelMetaPath() { return kernel_meta_path_; }
 
   static KernelMeta *GetInstance() {
     static KernelMeta kernel_meta;
@@ -63,6 +65,7 @@ class KernelMeta {
 
  private:
   bool initialized_ = false;
+  std::string kernel_meta_path_;
   std::unordered_map<std::string, std::string> kernel_meta_map_;
 };
 

From d03e04b828f010f3584311611d3374757be2e524 Mon Sep 17 00:00:00 2001
From: jjfeing <jiangjianfei3@huawei.com>
Date: Tue, 28 Apr 2020 20:25:15 +0800
Subject: [PATCH 161/242] support buffer fusion

---
 .../parallel_compile/tbe_compiler/common.py   |  10 +-
 mindspore/ccsrc/kernel/kernel_fusion.cc       |   3 +-
 mindspore/ccsrc/kernel/tbe/tbe_adapter.cc     |  46 +++++
 mindspore/ccsrc/kernel/tbe/tbe_adapter.h      |  13 +-
 .../ccsrc/kernel/tbe/tbe_kernel_build.cc      | 157 ++++++++++++------
 mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h |  15 +-
 mindspore/ccsrc/utils/utils.h                 |   2 +
 mindspore/ops/_op_impl/tbe/reduce_mean.py     |   1 +
 mindspore/ops/operations/nn_ops.py            |   1 +
 9 files changed, 182 insertions(+), 66 deletions(-)

diff --git a/mindspore/_extends/parallel_compile/tbe_compiler/common.py b/mindspore/_extends/parallel_compile/tbe_compiler/common.py
index 6258cf8d45..39866d2bac 100644
--- a/mindspore/_extends/parallel_compile/tbe_compiler/common.py
+++ b/mindspore/_extends/parallel_compile/tbe_compiler/common.py
@@ -122,10 +122,12 @@ def get_args(op_info, arg_type):
 
     elif arg_type == 'attrs':
         for item in op_info[arg_type]:
-            if 'value' not in item:
-                raise ValueError("Json string Errors, attr key:value not found.")
-            if item["name"] != "isRef":
-                args.append(item['value'])
+            if item["valid"]:
+                if 'value' not in item:
+                    raise ValueError("Json string Errors, attr key:value not found.")
+                if item["name"] != "isRef":
+                    args.append(item['value'])
+
     return args
 
 
diff --git a/mindspore/ccsrc/kernel/kernel_fusion.cc b/mindspore/ccsrc/kernel/kernel_fusion.cc
index cd8936f218..4e1ad97e23 100644
--- a/mindspore/ccsrc/kernel/kernel_fusion.cc
+++ b/mindspore/ccsrc/kernel/kernel_fusion.cc
@@ -108,7 +108,8 @@ std::map<int32_t, KernelModPtr> KernelFusion(const std::vector<FusionScopeInfo>
     }
 
     if ((task_result != nullptr) && (strcmp(task_result, "Success") != 0)) {
-      MS_LOG(DEBUG) << "fuison op build failed, err log: " << task_result << "  change to single op build.";
+      MS_LOG(INFO) << "Fusion warning: Fuison op build failed, err log: " << task_result
+                   << "  change to single op build.";
       build_failed_num++;
     }
     auto kernel_mod_item = build_manger->TaskFinishProcess(task_id, false);
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
index 44750fab4f..2e2e27cbce 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
@@ -153,6 +153,52 @@ void TbeAdapter::InputOrderPass(const std::string &op_name, std::vector<std::vec
   }
 }
 
+void TbeAdapter::FusionInputOrderPass(const std::string &op_name, const std::vector<nlohmann::json> &inputs_list,
+                                      std::vector<nlohmann::json> *inputs_json) {
+  MS_EXCEPTION_IF_NULL(inputs_json);
+  if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) {
+    (void)std::copy(inputs_list.begin(), inputs_list.end(), std::back_inserter((*inputs_json)));
+  } else {
+    if (op_name == "MinimumGrad" || op_name == "MaximumGrad") {
+      inputs_json->emplace_back(inputs_list[2]);
+      inputs_json->emplace_back(inputs_list[0]);
+      inputs_json->emplace_back(inputs_list[1]);
+      for (size_t i = 3; i < inputs_list.size(); ++i) {
+        inputs_json->emplace_back(inputs_list[i]);
+      }
+    } else {
+      inputs_json->emplace_back(inputs_list[1]);
+      inputs_json->emplace_back(inputs_list[0]);
+      for (size_t i = 2; i < inputs_list.size(); ++i) {
+        inputs_json->emplace_back(inputs_list[i]);
+      }
+    }
+  }
+}
+
+void TbeAdapter::FusionDataOrderPass(const std::string &op_name, const std::vector<AnfNodePtr> &data_layer,
+                                     std::vector<AnfNodePtr> *reorder_data_layer) {
+  MS_EXCEPTION_IF_NULL(reorder_data_layer);
+  if (input_order_adjusted_ops.find(op_name) == input_order_adjusted_ops.end()) {
+    (void)std::copy(data_layer.begin(), data_layer.end(), std::back_inserter((*reorder_data_layer)));
+  } else {
+    if (op_name == "MinimumGrad" || op_name == "MaximumGrad") {
+      reorder_data_layer->emplace_back(data_layer[2]);
+      reorder_data_layer->emplace_back(data_layer[0]);
+      reorder_data_layer->emplace_back(data_layer[1]);
+      for (size_t i = 3; i < data_layer.size(); ++i) {
+        reorder_data_layer->emplace_back(data_layer[i]);
+      }
+    } else {
+      reorder_data_layer->emplace_back(data_layer[1]);
+      reorder_data_layer->emplace_back(data_layer[0]);
+      for (size_t i = 2; i < data_layer.size(); ++i) {
+        reorder_data_layer->emplace_back(data_layer[i]);
+      }
+    }
+  }
+}
+
 std::map<std::string, FAttrsPass> TbeAdapter::build_json_attr_pass_map_ = {
   {"MaximumGrad", TbeAdapter::MaximumGradAttrJsonPass},
   {"MinimumGrad", TbeAdapter::MinimumGradAttrJsonPass},
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h b/mindspore/ccsrc/kernel/tbe/tbe_adapter.h
index 27f6d315f6..0208d6c6a6 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.h
+++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.h
@@ -44,15 +44,12 @@ class TbeAdapter {
   static void GenTopKV2IndicesTensorInfo(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index,
                                          std::vector<nlohmann::json> *input_list, kCreaterType creater_type);
 
+  static void FusionInputOrderPass(const std::string &op_name, const std::vector<nlohmann::json> &inputs_list,
+                                   std::vector<nlohmann::json> *inputs_json);
+  static void FusionDataOrderPass(const std::string &op_name, const std::vector<AnfNodePtr> &data_layer,
+                                  std::vector<AnfNodePtr> *reorder_data_layer);
+
  private:
-  static void Conv2DAttrJsonPass(const AnfNodePtr &anf_node, const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
-                                 nlohmann::json *attrs_json);
-  static void Conv2DBackpropFilterAttrJsonPass(const AnfNodePtr &anf_node,
-                                               const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
-                                               nlohmann::json *attrs_json);
-  static void Conv2DBackpropInputAttrJsonPass(const AnfNodePtr &anf_node,
-                                              const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
-                                              nlohmann::json *attrs_json);
   static void MaximumGradAttrJsonPass(const AnfNodePtr &anf_node,
                                       const std::vector<std::shared_ptr<OpAttr>> &op_info_attrs,
                                       nlohmann::json *attrs_json);
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
index 939e7146e6..24823b9275 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
@@ -375,20 +375,26 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_no
   MS_EXCEPTION_IF_NULL(primitive);
   for (const auto &attr_ptr : attrs_ptr) {
     std::string attr_name = attr_ptr->name();
+    nlohmann::json attr_obj;
+    attr_obj["name"] = attr_name;
     if (primitive->GetAttr(attr_name) != nullptr) {
-      nlohmann::json attr_obj;
       auto value = primitive->GetAttr(attr_name);
       std::string type = attr_ptr->type();
       ParseAttrValue(type, value, &attr_obj);
-      attr_obj["name"] = attr_name;
       attr_obj["valid"] = true;
-      (*attrs_json).push_back(attr_obj);
     } else {
-      if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD && op_info->impl_path() != "") {
-        MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name
-                          << " is required, but not set.";
+      if (op_info->impl_path().empty()) {
+        attr_obj["valid"] = false;
+      } else {
+        if (attr_ptr->param_type() == "required" && creater_type_ == SINGLE_BUILD) {
+          MS_LOG(EXCEPTION) << "op name: " << op_info->op_name() << " attr: " << attr_name
+                            << " is required, but not set.";
+        } else {
+          attr_obj["valid"] = false;
+        }
       }
     }
+    (*attrs_json).push_back(attr_obj);
   }
   return true;
 }
@@ -484,7 +490,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
   MS_EXCEPTION_IF_NULL(fusion_kernel);
   // get input layer info
   std::vector<std::vector<mindspore::AnfNodePtr>> input_layers;
-  if (!GetInputLayers(input_nodes, compute_nodes, &input_layers)) {
+  std::map<const AnfNodePtr, FusionDataType> spec_data_input;
+  if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) {
     return false;
   }
   // gen fusion scopre_op jsom
@@ -505,8 +512,8 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
   for (const auto &layer : input_layers) {
     for (const auto &data_input : layer) {
       nlohmann::json data_str;
-      if (!GenFusionDataInputJson(data_input, &data_str, &index)) {
-        MS_LOG(DEBUG) << "GenFusionDataInputJson faild.";
+      if (!GenFusionDataInputJson(data_input, spec_data_input, &data_str, &index)) {
+        MS_LOG(INFO) << "Fusion error: gen fusion datainput json faild.";
         return false;
       }
       data_list.push_back(data_str);
@@ -519,7 +526,7 @@ bool TbeKernelBuild::GenFusionScopeJson(const vector<mindspore::AnfNodePtr> &inp
 }
 
 void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
-                                 size_t desc_output_idx, nlohmann::json *output_desc) {
+                                 size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) {
   std::string output_desc_name = anf_node->fullname_with_scope();
   if (node_out_idx > 0) {
     output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx);
@@ -539,58 +546,109 @@ void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_
   (*output_desc)["shape"] = shape;
   auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
   if (format == kOpFormat_DEFAULT) {
-    if (ori_shape.size() == 4) {
-      format = kOpFormat_NCHW;
-    } else {
-      format = kOpFormat_ND;
-    }
+    format = ori_shape.size() == 4 ? kOpFormat_NCHW : kOpFormat_ND;
   }
   (*output_desc)["format"] = format;
   (*output_desc)["ori_format"] = kOpFormat_NCHW;
   (*output_desc)["output_index"] = desc_output_idx;
+  if (fusion_data_type == kFusionAddN && format == kOpFormat_NC1HWC0) {
+    std::vector<size_t> spec_shape = {};
+    spec_shape.emplace_back(shape[0]);
+    spec_shape.emplace_back(shape[1]);
+    spec_shape.emplace_back(shape[2] * shape[3]);
+    spec_shape.emplace_back(shape[4]);
+    (*output_desc)["shape"] = spec_shape;
+  } else if (fusion_data_type == kFusionReLUGradV2 && (*output_desc)["data_type"] == "uint8") {
+    std::vector<size_t> spec_shape = {};
+    spec_shape.emplace_back(shape[0]);
+    spec_shape.emplace_back(shape[1]);
+    spec_shape.emplace_back(shape[2] * shape[3]);
+    spec_shape.emplace_back(16);
+    (*output_desc)["shape"] = spec_shape;
+    (*output_desc)["data_type"] = "bool";
+  }
 }
 
 void TbeKernelBuild::GenReusedOutputDesc(const shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
                                          size_t output_index, nlohmann::json *output_desc) {
   std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index);
   (*output_desc)["name"] = NormalizeFullScopeName(output_desc_name);
-  (*output_desc)["data_type"] = tbe::TypeIdToString(kNumberTypeFloat32);
   (*output_desc)["output_index"] = output_index;
   std::vector<size_t> shape;
   (*output_desc)["shape"] = shape;
 }
 
-bool TbeKernelBuild::GetInputLayers(const vector<mindspore::AnfNodePtr> &input_nodes,
-                                    const vector<mindspore::AnfNodePtr> &compute_nodes,
-                                    std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers) {
+bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name,
+                                        const std::vector<mindspore::AnfNodePtr> &reorder_layer,
+                                        std::map<const AnfNodePtr, FusionDataType> *spec_data_input) {
+  if ((op_name == kReluGradV2OpName || op_name == kAddNOpName) && reorder_layer.empty()) {
+    MS_LOG(INFO) << "Fusion error: node(" << op_name << " )'s input is null. ";
+    return false;
+  }
+  MS_LOG(INFO) << "Fusion info: op_name: " << op_name << "input layer size: " << reorder_layer.size();
+  if (op_name == kReluGradV2OpName) {
+    (*spec_data_input)[reorder_layer[0]] = kFusionReLUGradV2;
+  } else if (op_name == kAddNOpName) {
+    for (const auto &it : reorder_layer) {
+      (*spec_data_input)[it] = kFusionAddN;
+    }
+  }
+  return true;
+}
+
+bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
+                                    const std::vector<mindspore::AnfNodePtr> &compute_nodes,
+                                    std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers,
+                                    std::map<const AnfNodePtr, FusionDataType> *spec_data_input) {
+  auto result = std::find_if(compute_nodes.begin(), compute_nodes.end(), [](const auto &it) {
+    auto op_name = AnfAlgo::GetCNodeName(it);
+    return op_name == kConv2DBackpropInputOpName;
+  });
+  bool need_spec = (result != compute_nodes.end());
   size_t input_size = 0;
   for (const auto &compute_node : compute_nodes) {
-    std::vector<mindspore::AnfNodePtr> layer;
+    std::vector<mindspore::AnfNodePtr> layer = {};
+    std::vector<mindspore::AnfNodePtr> reorder_layer = {};
     MS_EXCEPTION_IF_NULL(compute_node);
+    auto op_name = AnfAlgo::GetCNodeName(compute_node);
     auto ccompute_node = compute_node->cast<CNodePtr>();
     if (ccompute_node == nullptr) {
-      MS_LOG(DEBUG) << "fusion compute node must be cnode";
+      MS_LOG(INFO) << "Fusion error: fusion compute node must be cnode";
       return false;
     }
+    MS_LOG(INFO) << "Fusion info: compute name: " << compute_node->fullname_with_scope();
     for (size_t i = 1; i < ccompute_node->inputs().size(); ++i) {
       auto input = ccompute_node->input(i);
       auto find_iter = std::find(input_nodes.begin(), input_nodes.end(), input);
       if (find_iter != input_nodes.end()) {
+        MS_LOG(INFO) << "Fusion info: add compute node's [" << i << "] input: " << input->fullname_with_scope();
         layer.emplace_back((*find_iter));
+      } else {
+        MS_LOG(INFO) << "Fusion warnig: this input [" << i << "] may be pre compute(" << input->fullname_with_scope()
+                     << ") node's output.";
+      }
+    }
+    TbeAdapter::FusionDataOrderPass(op_name, layer, &reorder_layer);
+    if (need_spec) {
+      MS_LOG(INFO) << "Fusion info: match conv2d backprop input + ... patten.";
+      if (!GetSpecInputLayers(op_name, reorder_layer, spec_data_input)) {
+        return false;
       }
     }
-    input_size += layer.size();
-    input_layers->emplace_back(layer);
+    input_size += reorder_layer.size();
+    input_layers->emplace_back(reorder_layer);
   }
   if (input_nodes.size() != input_size) {
-    MS_LOG(DEBUG) << "fusion scope error, layer input:" << input_size << ", input_node:" << input_nodes.size();
+    MS_LOG(INFO) << "Fusion error: fusion scope error, layer input:" << input_size
+                 << ", input_node:" << input_nodes.size();
     return false;
   }
   return true;
 }
 
-bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode> &data_input, nlohmann::json *data_str,
-                                            size_t *index) {
+bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input,
+                                            const std::map<const AnfNodePtr, FusionDataType> &spec_data_input,
+                                            nlohmann::json *data_str, size_t *index) {
   MS_EXCEPTION_IF_NULL(data_str);
   MS_EXCEPTION_IF_NULL(index);
   std::vector<nlohmann::json> output_desc_list;
@@ -604,13 +662,17 @@ bool TbeKernelBuild::GenFusionDataInputJson(const shared_ptr<mindspore::AnfNode>
     output_desc_list.push_back(output_desc);
     (*index)++;
   } else {
+    FusionDataType fusion_data_type = kFusionNormal;
+    if (spec_data_input.find(data_input) != spec_data_input.end()) {
+      fusion_data_type = spec_data_input.at(data_input);
+    }
     auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0);
     auto real_node = kernel_idx.first;
     size_t real_idx = kernel_idx.second;
     MS_LOG(INFO) << "real name " << real_node->fullname_with_scope() << " index:" << real_idx;
     // "output_desc"
     nlohmann::json output_desc;
-    GenDescJson(real_node, real_idx, real_idx, &output_desc);
+    GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type);
     output_desc_list.push_back(output_desc);
     (*data_str)["name"] = NormalizeFullScopeName(real_node->fullname_with_scope());
   }
@@ -632,11 +694,12 @@ bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
     auto real_input_size = cnode->inputs().size() - 1;
     auto dyn_input_size = dyn_input_sizes.size();
     if (dyn_input_size != 1) {
-      MS_LOG(DEBUG) << "fusion build not support dyn_input_sizes > 1";
+      MS_LOG(INFO) << "Fusion error: fusion build not support dyn_input_sizes > 1";
       return ret;
     }
     if (IntToSize(dyn_input_sizes[0]) != real_input_size) {
-      MS_LOG(DEBUG) << " dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size" << real_input_size;
+      MS_LOG(INFO) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size"
+                   << real_input_size;
       return ret;
     }
     ret = true;
@@ -663,6 +726,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
                                                std::vector<nlohmann::json> *input_desc_list, size_t *index) {
   MS_EXCEPTION_IF_NULL(cnode);
   MS_EXCEPTION_IF_NULL(input_desc_list);
+  std::vector<nlohmann::json> input_desc_list_tmp = {};
   bool is_dynamic_input = IsDynamicInput(cnode);
   for (size_t i = 1; i < cnode->inputs().size(); ++i) {
     auto input = cnode->input(i);
@@ -676,7 +740,7 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
       MS_LOG(INFO) << "node has dynamic input.";
       input_desc["dyn_index"] = (i - 1);
     }
-    (*input_desc_list).emplace_back(input_desc);
+    input_desc_list_tmp.emplace_back(input_desc);
   }
   size_t optional_num = GetOptionalInput(cnode, is_dynamic_input);
   if (optional_num > 0) {
@@ -686,35 +750,24 @@ bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
       optional_input_desc["name"] = std::string(kOptional) + std::to_string(*index);
       (*index)++;
       (*layer_iter)->emplace_back(nullptr);
-      (*input_desc_list).emplace_back(optional_input_desc);
+      input_desc_list_tmp.emplace_back(optional_input_desc);
     }
   }
+  auto op_name = AnfAlgo::GetCNodeName(cnode);
+  TbeAdapter::FusionInputOrderPass(op_name, input_desc_list_tmp, input_desc_list);
   return true;
 }
 
 std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int> &output_used_nums) {
   std::vector<size_t> desc_output_index = {};
-  bool find_reused = false;
-  size_t reused_num = 0;
   for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
     auto output_use_num_item = output_used_nums[idx];
     MS_LOG(INFO) << "output used num[" << idx << "] = " << output_use_num_item;
-    if (output_use_num_item == 1 || output_use_num_item == 0) {
+    desc_output_index.emplace_back(idx);
+    if (output_use_num_item > 1) {
       desc_output_index.emplace_back(idx);
-    } else {
-      if (!find_reused) {
-        desc_output_index.emplace_back(idx);
-      } else {
-        desc_output_index.emplace_back(desc_output_index[idx - 1]);
-      }
-      reused_num += (output_use_num_item - 1);
-      find_reused = true;
     }
   }
-  auto pad_value = output_used_nums.size() == 1 ? 0 : desc_output_index[desc_output_index.size() - 1] + 1;
-  for (size_t i = 0; i < reused_num; ++i) {
-    desc_output_index.emplace_back(pad_value);
-  }
   return desc_output_index;
 }
 
@@ -811,6 +864,7 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto
         }
         auto ret = GetIOSizeImpl(data_output);
         input_size_list->push_back(ret);
+        MS_LOG(INFO) << "Fusion info: scope input name： " << op["name"] << ", size: " << ret;
       }
     }
   }
@@ -819,26 +873,31 @@ bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list, const vecto
     auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0);
     auto real_node = kernel_idx.first;
     size_t real_idx = kernel_idx.second;
+    auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope());
+    MS_LOG(INFO) << "Fusion info: real node name: " << normal_name << ", real output index: " << real_idx;
     for (const auto &op : fusion_op_list) {
-      auto normal_name = NormalizeFullScopeName(real_node->fullname_with_scope());
       if (op["name"] == normal_name) {
         auto op_output_desces = op["output_desc"];
         if (output_node != real_node) {
           // tuple_get item
-          MS_LOG(DEBUG) << "output is a tuple getitem node";
+          MS_LOG(INFO) << "output is a tuple getitem node";
           auto output_desc = op_output_desces[real_idx];
           if (output_desc["shape"].empty()) {
-            continue;
+            MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx;
+            return false;
           }
           auto ret = GetIOSizeImpl(output_desc);
           output_size_list->push_back(ret);
+          MS_LOG(INFO) << "Fusion info: scope output index： " << real_idx << ", size: " << ret;
         } else {
           for (const auto &output_desc : op_output_desces) {
             if (output_desc["shape"].empty()) {
+              MS_LOG(INFO) << "Fusion info: output_desc's shape is empty, may be this node output";
               continue;
             }
             auto ret = GetIOSizeImpl(output_desc);
             output_size_list->push_back(ret);
+            MS_LOG(INFO) << "Fusion info: scope output size: " << ret;
           }
         }
       }
diff --git a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
index 1a3eee7fd9..f6e28327d4 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
+++ b/mindspore/ccsrc/kernel/tbe/tbe_kernel_build.h
@@ -35,6 +35,8 @@ namespace kernel {
 // kernel operate type used for generate json
 
 class TbeKernelBuild {
+  enum FusionDataType { kFusionNormal = 0, kFusionAddN, kFusionReLUGradV2 };
+
  public:
   static bool GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
                         std::vector<size_t> *output_size_list);
@@ -48,8 +50,9 @@ class TbeKernelBuild {
  private:
   TbeKernelBuild() = default;
   ~TbeKernelBuild() = default;
-  static bool GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input, nlohmann::json *data_str,
-                                     size_t *index);
+  static bool GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input,
+                                     const std::map<const AnfNodePtr, FusionDataType> &spec_data_input,
+                                     nlohmann::json *data_str, size_t *index);
   static bool GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node,
                                    std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
                                    nlohmann::json *compute_op_str, std::string *fusion_kernel_name, size_t *index);
@@ -60,13 +63,17 @@ class TbeKernelBuild {
   static bool GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
                                          std::vector<nlohmann::json> *output_desc_list);
   static void GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
-                          size_t desc_output_idx, nlohmann::json *output_desc);
+                          size_t desc_output_idx, nlohmann::json *output_desc,
+                          FusionDataType fusion_data_type = kFusionNormal);
   static void GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
                                   size_t output_index, nlohmann::json *output_desc);
   static size_t GetIOSizeImpl(const nlohmann::json &desc);
+  static bool GetSpecInputLayers(const std::string &op_name, const std::vector<mindspore::AnfNodePtr> &reorder_layer,
+                                 std::map<const AnfNodePtr, FusionDataType> *spec_data_input);
   static bool GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
                              const std::vector<mindspore::AnfNodePtr> &compute_nodes,
-                             std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers);
+                             std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers,
+                             std::map<const AnfNodePtr, FusionDataType> *spec_data_input);
   static bool IsDynamicInput(const CNodePtr &cnode);
   static size_t GetOptionalInput(const CNodePtr &cnode, bool is_dynamic_input);
 };
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index 904acff975..59d7f27c11 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -122,6 +122,8 @@ constexpr auto kSendOpName = "Send";
 constexpr auto kRecvOpName = "Recv";
 constexpr auto kReluV2OpName = "ReLUV2";
 constexpr auto kReluGradV2OpName = "ReluGradV2";
+constexpr auto kAddNOpName = "AddN";
+constexpr auto kConv2DBackpropInputOpName = "Conv2DBackpropInput";
 constexpr auto kFusionOpConv2DBackpropInputReluGradV2Name = "FusionOp_Conv2DBackpropInput_ReluGradV2";
 constexpr auto kFusionOpConv2DBackpropInputAddNReluGradV2Name = "FusionOp_Conv2DBackpropInput_AddN_ReluGradV2";
 
diff --git a/mindspore/ops/_op_impl/tbe/reduce_mean.py b/mindspore/ops/_op_impl/tbe/reduce_mean.py
index 47548e9036..67b96933a1 100644
--- a/mindspore/ops/_op_impl/tbe/reduce_mean.py
+++ b/mindspore/ops/_op_impl/tbe/reduce_mean.py
@@ -31,6 +31,7 @@ reduce_mean_op_info = TBERegOp("ReduceMean") \
     .dtype_format(DataType.U8_Default, DataType.U8_Default) \
     .dtype_format(DataType.F16_Default, DataType.F16_Default) \
     .dtype_format(DataType.F32_Default, DataType.F32_Default) \
+    .dtype_format(DataType.F16_5HD, DataType.F16_5HD) \
     .get_op_info()
 
 
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index a3edd8f826..102b5b50e4 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -701,6 +701,7 @@ class Conv2D(PrimitiveWithInfer):
         self.add_prim_attr('data_format', "NCHW")
         self.out_channel = validator.check_integer('out_channel', out_channel, 0, Rel.GT, self.name)
         self.group = validator.check_integer('group', group, 0, Rel.GT, self.name)
+        self.add_prim_attr('offset_a', 0)
 
     def infer_shape(self, x_shape, w_shape):
         validator.check_integer("weight rank", len(w_shape), 4, Rel.EQ, self.name)

From 23a586b61da7ba6099f5c41ca37c88c620c98683 Mon Sep 17 00:00:00 2001
From: "Etone.Chan" <etone.chan@huawei.com>
Date: Tue, 28 Apr 2020 20:25:15 +0800
Subject: [PATCH 162/242] set RefInfo of Buffer Fusion kernel

---
 .../ascend/buffer_fusion/buffer_fusion.cc     | 33 ++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
index 8581f1165d..851831383b 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
@@ -24,6 +24,7 @@
 #include <memory>
 #include <string>
 #include <algorithm>
+#include <iterator>
 
 #include "kernel/kernel_fusion.h"
 #include "debug/anf_ir_dump.h"
@@ -461,6 +462,36 @@ void GetFusionScopeOutputNodeList(session::KernelGraph *kernel_graph,
   }
 }
 
+void SetFusionOpRefInfos(session::KernelGraph *kernel_graph, const std::vector<AnfNodePtr> &outputs_list,
+                         const AnfNodePtr &fusion_kernel) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  auto manager = kernel_graph->manager();
+  MS_EXCEPTION_IF_NULL(manager);
+  for (size_t idx = 0; idx < outputs_list.size(); ++idx) {
+    auto output = outputs_list[idx];
+    if (output->isa<CNode>() && AnfAlgo::GetCNodeName(output) == prim::kPrimTupleGetItem->name()) {
+      auto real_output = AnfAlgo::VisitKernel(output, 0);
+      auto output_cnode = output->cast<CNodePtr>();
+      MS_EXCEPTION_IF_NULL(output_cnode);
+      auto input2 = output_cnode->input(2);
+      auto output_idx = GetValue<int>(GetValueNode(input2));
+      session::AnfWithOutIndex out_pair(real_output.first, output_idx);
+      if (kernel_graph->IsInRefOutputMap(out_pair)) {
+        auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair);
+        session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx);
+        kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair);
+      }
+    } else {
+      session::AnfWithOutIndex out_pair(output, 0);
+      if (kernel_graph->IsInRefOutputMap(out_pair)) {
+        auto origin_pair = kernel_graph->GetRefCorrespondOutput(out_pair);
+        session::AnfWithOutIndex fusion_final_pair(fusion_kernel, idx);
+        kernel_graph->AddRefCorrespondPairs(fusion_final_pair, origin_pair);
+      }
+    }
+  }
+}
+
 void MatchConvBnreduce(const CNodePtr &cnode, const session::KernelGraph &kernel_graph,
                        std::unordered_set<AnfNodePtr> *fused_set, FusedNodeRecord *candidate_fusion) {
   MS_EXCEPTION_IF_NULL(cnode);
@@ -708,7 +739,7 @@ bool BufferFusion::ReplaceFusionOp(std::unordered_map<int32_t, BufferFusionInfo_
   }
   AnfAlgo::SetOutputInferTypeAndShape(types, shapes, buffer_fusion.get());
   AnfAlgo::SetKernelMod(kernel_ptr, buffer_fusion.get());
-  // replace node
+  SetFusionOpRefInfos(kernel_graph, buffer_fusion_info.outputs_list, buffer_fusion);
   ReplaceOldNode(buffer_fusion_infos, fusion_id, buffer_fusion, kernel_graph);
   return true;
 }

From 20c79c3f7c1233b1a4009da6d5b1674dc5d59fb4 Mon Sep 17 00:00:00 2001
From: wandongdong <wandongdong1@huawei.com>
Date: Tue, 28 Apr 2020 20:07:42 +0800
Subject: [PATCH 163/242] fix launch bug and add RANK_TABLE_FILE and remove
 hccl context

---
 example/mobilenetv2_imagenet2012/launch.py | 31 +++++++++-------------
 example/mobilenetv2_imagenet2012/train.py  | 11 ++++----
 2 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/example/mobilenetv2_imagenet2012/launch.py b/example/mobilenetv2_imagenet2012/launch.py
index 5a8977c64b..bd28e20149 100644
--- a/example/mobilenetv2_imagenet2012/launch.py
+++ b/example/mobilenetv2_imagenet2012/launch.py
@@ -15,7 +15,6 @@
 """launch train script"""
 import os
 import sys
-import subprocess
 import json
 from argparse import ArgumentParser
 
@@ -125,25 +124,19 @@ def main():
     sys.stdout.flush()
 
     # spawn the processes
-    current_env = os.environ.copy()
-    current_env["RANK_SIZE"] = str(args.nproc_per_node)
-    if args.nproc_per_node > 1:
-        current_env["MINDSPORE_HCCL_CONFIG_PATH"] = table_fn
-    processes = []
-    cmds = []
     for rank_id in range(0, args.nproc_per_node):
-        current_env["RANK_ID"] = str(rank_id)
-        current_env["DEVICE_ID"] = visible_devices[rank_id]
-        cmd = [sys.executable, "-u"]
-        cmd.append(args.training_script)
-        cmd.extend(args.training_script_args)
-        process = subprocess.Popen(cmd, env=current_env)
-        processes.append(process)
-        cmds.append(cmd)
-    for process, cmd in zip(processes, cmds):
-        process.wait()
-        if process.returncode != 0:
-            raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)
+        device_id = visible_devices[rank_id]
+        device_dir = os.path.join(os.getcwd(), 'device{}'.format(rank_id))
+        rank_process = 'export RANK_SIZE={} && export RANK_ID={} && export DEVICE_ID={} && '.format(args.nproc_per_node,
+                                                                                                    rank_id, device_id)
+        if args.nproc_per_node > 1:
+            rank_process += 'export MINDSPORE_HCCL_CONFIG_PATH={} && '.format(table_fn)
+            rank_process += 'export RANK_TABLE_FILE={} && '.format(table_fn)
+        rank_process += 'rm -rf {dir} && mkdir {dir} && cd {dir} && python {script} '.format(dir=device_dir,
+                                                                                             script=args.training_script
+                                                                                             )
+        rank_process += ' '.join(args.training_script_args) + ' > log{}.log 2>&1 &'.format(rank_id)
+        os.system(rank_process)
 
 
 if __name__ == "__main__":
diff --git a/example/mobilenetv2_imagenet2012/train.py b/example/mobilenetv2_imagenet2012/train.py
index 584e89fe43..d97eab5f04 100644
--- a/example/mobilenetv2_imagenet2012/train.py
+++ b/example/mobilenetv2_imagenet2012/train.py
@@ -23,6 +23,7 @@ from lr_generator import get_lr
 from config import config
 from mindspore import context
 from mindspore import Tensor
+from mindspore import nn
 from mindspore.model_zoo.mobilenet import mobilenet_v2
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.nn.optim.momentum import Momentum
@@ -110,16 +111,17 @@ class Monitor(Callback):
 
 if __name__ == '__main__':
     if run_distribute:
-        context.set_context(enable_hccl=True)
         context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
                                           parameter_broadcast=True, mirror_mean=True)
         auto_parallel_context().set_all_reduce_fusion_split_indices([140])
         init()
-    else:
-        context.set_context(enable_hccl=False)
 
     epoch_size = config.epoch_size
     net = mobilenet_v2(num_classes=config.num_classes)
+    net.add_flags_recursive(fp16=True)
+    for _, cell in net.cells_and_names():
+        if isinstance(cell, nn.Dense):
+            cell.add_flags_recursive(fp32=True)
     loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
 
     print("train args: ", args_opt, "\ncfg: ", config,
@@ -135,8 +137,7 @@ if __name__ == '__main__':
     opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
                    config.weight_decay, config.loss_scale)
 
-    model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, amp_level='O0',
-                  keep_batchnorm_fp32=False)
+    model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale)
 
     cb = None
     if rank_id == 0:

From d428fc1166fb7942275a7e28b25700a3074d0fb6 Mon Sep 17 00:00:00 2001
From: wsc <wangshaocong1@huawei.com>
Date: Tue, 28 Apr 2020 10:31:33 +0800
Subject: [PATCH 164/242] Modify the ST test script and example script of bert
 model

---
 example/bert_clue/README.md                   |   2 +-
 example/bert_clue/config.py                   |  24 ++-
 example/bert_clue/dataset.py                  |   2 +-
 example/bert_clue/run_distribute_pretrain.sh  |   6 +-
 example/bert_clue/run_standalone_pretrain.sh  |   6 +-
 .../models/bert/bert_tdt_lossscale.py         |   9 +-
 .../models/bert/bert_tdt_no_lossscale.py      | 190 ------------------
 7 files changed, 28 insertions(+), 211 deletions(-)
 delete mode 100644 tests/st/networks/models/bert/bert_tdt_no_lossscale.py

diff --git a/example/bert_clue/README.md b/example/bert_clue/README.md
index 55b200e941..3c66816ff3 100644
--- a/example/bert_clue/README.md
+++ b/example/bert_clue/README.md
@@ -78,7 +78,7 @@ It contains of parameters of BERT model and options for training, which is set i
 ### Options:
 ```
 Pre-Training:
-    bert_network                    version of BERT model: base | large, default is base
+    bert_network                    version of BERT model: base | nezha, default is base
     loss_scale_value                initial value of loss scale: N, default is 2^32
     scale_factor                    factor used to update loss scale: N, default is 2
     scale_window                    steps for once updatation of loss scale: N, default is 1000   
diff --git a/example/bert_clue/config.py b/example/bert_clue/config.py
index 2d49121c50..a16dba83c7 100644
--- a/example/bert_clue/config.py
+++ b/example/bert_clue/config.py
@@ -26,30 +26,36 @@ cfg = edict({
     'optimizer': 'Lamb',
     'AdamWeightDecayDynamicLR': edict({
         'learning_rate': 3e-5,
-        'end_learning_rate': 0.0,
+        'end_learning_rate': 1e-7,
         'power': 5.0,
         'weight_decay': 1e-5,
         'eps': 1e-6,
     }),
     'Lamb': edict({
         'start_learning_rate': 3e-5,
-        'end_learning_rate': 0.0,
+        'end_learning_rate': 1e-7,
         'power': 10.0,
         'warmup_steps': 10000,
         'weight_decay': 0.01,
         'eps': 1e-6,
-        'decay_filter': lambda x: False,
     }),
     'Momentum': edict({
         'learning_rate': 2e-5,
         'momentum': 0.9,
     }),
 })
+
+'''
+Including two kinds of network: \
+base: Goole BERT-base(the base version of BERT model).
+large: BERT-NEZHA(a Chinese pretrained language model developed by Huawei, which introduced a improvement of \
+       Functional Relative Posetional Encoding as an effective positional encoding scheme).
+'''
 if cfg.bert_network == 'base':
     bert_net_cfg = BertConfig(
-        batch_size=16,
+        batch_size=32,
         seq_length=128,
-        vocab_size=21136,
+        vocab_size=21128,
         hidden_size=768,
         num_hidden_layers=12,
         num_attention_heads=12,
@@ -66,13 +72,13 @@ if cfg.bert_network == 'base':
         dtype=mstype.float32,
         compute_type=mstype.float16,
     )
-else:
+if cfg.bert_network == 'nezha':
     bert_net_cfg = BertConfig(
-        batch_size=16,
+        batch_size=32,
         seq_length=128,
-        vocab_size=21136,
+        vocab_size=21128,
         hidden_size=1024,
-        num_hidden_layers=12,
+        num_hidden_layers=24,
         num_attention_heads=16,
         intermediate_size=4096,
         hidden_act="gelu",
diff --git a/example/bert_clue/dataset.py b/example/bert_clue/dataset.py
index 671f0dca0f..d54f2a6660 100644
--- a/example/bert_clue/dataset.py
+++ b/example/bert_clue/dataset.py
@@ -31,7 +31,7 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
     files = os.listdir(data_dir)
     data_files = []
     for file_name in files:
-        data_files.append(data_dir+file_name)
+        data_files.append(os.path.join(data_dir, file_name))
     ds = de.TFRecordDataset(data_files, schema_dir,
                             columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
                                           "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
diff --git a/example/bert_clue/run_distribute_pretrain.sh b/example/bert_clue/run_distribute_pretrain.sh
index 93d68d8e9d..86d3747e0b 100644
--- a/example/bert_clue/run_distribute_pretrain.sh
+++ b/example/bert_clue/run_distribute_pretrain.sh
@@ -16,17 +16,15 @@
 
 echo "=============================================================================================================="
 echo "Please run the scipt as: "
-echo "sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_HCCL_CONFIG_PATH MINDSPORE_PATH"
-echo "for example: sh run_distribute_pretrain.sh 8 40 /path/zh-wiki/ /path/Schema.json /path/hccl.json /path/mindspore"
+echo "sh run_distribute_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_HCCL_CONFIG_PATH"
+echo "for example: sh run_distribute_pretrain.sh 8 40 /path/zh-wiki/ /path/Schema.json /path/hccl.json"
 echo "It is better to use absolute path."
 echo "=============================================================================================================="
 
 EPOCH_SIZE=$2
 DATA_DIR=$3
 SCHEMA_DIR=$4
-MINDSPORE_PATH=$6
 
-export PYTHONPATH=$MINDSPORE_PATH/build/package:$PYTHONPATH
 export MINDSPORE_HCCL_CONFIG_PATH=$5
 export RANK_SIZE=$1
 
diff --git a/example/bert_clue/run_standalone_pretrain.sh b/example/bert_clue/run_standalone_pretrain.sh
index aeffca7b04..bc4bcb5420 100644
--- a/example/bert_clue/run_standalone_pretrain.sh
+++ b/example/bert_clue/run_standalone_pretrain.sh
@@ -16,16 +16,14 @@
 
 echo "=============================================================================================================="
 echo "Please run the scipt as: "
-echo "sh run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR MINDSPORE_PATH"
-echo "for example: sh run_standalone_pretrain.sh 0 40 /path/zh-wiki/ /path/Schema.json /path/mindspore"
+echo "sh run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR"
+echo "for example: sh run_standalone_pretrain.sh 0 40 /path/zh-wiki/ /path/Schema.json"
 echo "=============================================================================================================="
 
 DEVICE_ID=$1
 EPOCH_SIZE=$2
 DATA_DIR=$3
 SCHEMA_DIR=$4
-MINDSPORE_PATH=$5
-export PYTHONPATH=$MINDSPORE_PATH/build/package:$PYTHONPATH
 
 python run_pretrain.py  \
     --distribute="false" \
diff --git a/tests/st/networks/models/bert/bert_tdt_lossscale.py b/tests/st/networks/models/bert/bert_tdt_lossscale.py
index cfd0b55697..ec46633657 100644
--- a/tests/st/networks/models/bert/bert_tdt_lossscale.py
+++ b/tests/st/networks/models/bert/bert_tdt_lossscale.py
@@ -135,9 +135,10 @@ class ModelCallback(Callback):
 
     def step_end(self, run_context):
         cb_params = run_context.original_args()
-        self.loss_list.append(cb_params.net_outputs[0])
+        self.loss_list.append(cb_params.net_outputs[0].asnumpy()[0])
         self.overflow_list.append(cb_params.net_outputs[1])
         self.lossscale_list.append(cb_params.net_outputs[2])
+        print("epoch: {}, outputs are: {}".format(cb_params.cur_epoch_num, str(cb_params.net_outputs)))
 
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
@@ -192,7 +193,11 @@ def test_bert_tdt():
             if count == scale_window:
                 count = 0
                 assert callback.lossscale_list[i] == callback.lossscale_list[i - 1] * Tensor(2.0, mstype.float32)
-
+    # assertion occurs while the loss value is wrong
+    loss_value = np.array(callback.loss_list)
+    expect_value = [12.1918125, 11.966035, 11.972114, 11.982671, 11.976399, 12.616986, 12.180658, 12.850562, 12.415608, 12.640145]
+    print("loss value: {}".format(loss_value))
+    assert np.allclose(loss_value, expect_value, 0.00001, 0.00001)
 
 if __name__ == '__main__':
     test_bert_tdt()
diff --git a/tests/st/networks/models/bert/bert_tdt_no_lossscale.py b/tests/st/networks/models/bert/bert_tdt_no_lossscale.py
deleted file mode 100644
index 7d30592044..0000000000
--- a/tests/st/networks/models/bert/bert_tdt_no_lossscale.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""train bert network without lossscale"""
-
-import os
-import pytest
-import numpy as np
-import mindspore.context as context
-import mindspore.common.dtype as mstype
-import mindspore.dataset.engine.datasets as de
-import mindspore.dataset.transforms.c_transforms as C
-from mindspore import Tensor
-from mindspore.train.model import Model
-from mindspore.train.callback import Callback
-from mindspore.model_zoo.Bert_NEZHA import BertConfig, BertNetworkWithLoss, BertTrainOneStepCell
-from mindspore.nn.optim import Momentum
-from mindspore import log as logger
-
-_current_dir = os.path.dirname(os.path.realpath(__file__))
-DATA_DIR = ["/home/workspace/mindspore_dataset/bert/example/examples.tfrecord"]
-SCHEMA_DIR = "/home/workspace/mindspore_dataset/bert/example/datasetSchema.json"
-
-
-def get_config(version='base', batch_size=1):
-    """get config"""
-    if version == 'base':
-        bert_config = BertConfig(
-            batch_size=batch_size,
-            seq_length=128,
-            vocab_size=21136,
-            hidden_size=768,
-            num_hidden_layers=2,
-            num_attention_heads=12,
-            intermediate_size=3072,
-            hidden_act="gelu",
-            hidden_dropout_prob=0.1,
-            attention_probs_dropout_prob=0.1,
-            max_position_embeddings=512,
-            type_vocab_size=2,
-            initializer_range=0.02,
-            use_relative_positions=True,
-            input_mask_from_dataset=True,
-            token_type_ids_from_dataset=True,
-            dtype=mstype.float32,
-            compute_type=mstype.float32)
-    elif version == 'large':
-        bert_config = BertConfig(
-            batch_size=batch_size,
-            seq_length=128,
-            vocab_size=21136,
-            hidden_size=1024,
-            num_hidden_layers=2,
-            num_attention_heads=16,
-            intermediate_size=4096,
-            hidden_act="gelu",
-            hidden_dropout_prob=0.0,
-            attention_probs_dropout_prob=0.0,
-            max_position_embeddings=512,
-            type_vocab_size=2,
-            initializer_range=0.02,
-            use_relative_positions=True,
-            input_mask_from_dataset=True,
-            token_type_ids_from_dataset=True,
-            dtype=mstype.float32,
-            compute_type=mstype.float16)
-    elif version == 'large_mixed':
-        bert_config = BertConfig(
-            batch_size=batch_size,
-            seq_length=128,
-            vocab_size=21136,
-            hidden_size=1024,
-            num_hidden_layers=24,
-            num_attention_heads=16,
-            intermediate_size=4096,
-            hidden_act="gelu",
-            hidden_dropout_prob=0.0,
-            attention_probs_dropout_prob=0.0,
-            max_position_embeddings=512,
-            type_vocab_size=2,
-            initializer_range=0.02,
-            use_relative_positions=True,
-            input_mask_from_dataset=True,
-            token_type_ids_from_dataset=True,
-            dtype=mstype.float32,
-            compute_type=mstype.float32)
-    else:
-        bert_config = BertConfig(batch_size=batch_size)
-    return bert_config
-
-
-def me_de_train_dataset():
-    """test me de train dataset"""
-    # apply repeat operations
-    repeat_count = 1
-    ds = de.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["input_ids", "input_mask", "segment_ids",
-                                                                "next_sentence_labels", "masked_lm_positions",
-                                                                "masked_lm_ids", "masked_lm_weights"], shuffle=False)
-    type_cast_op = C.TypeCast(mstype.int32)
-    ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
-    ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
-    ds = ds.map(input_columns="segment_ids", operations=type_cast_op)
-    ds = ds.map(input_columns="input_mask", operations=type_cast_op)
-    ds = ds.map(input_columns="input_ids", operations=type_cast_op)
-    # apply batch operations
-    batch_size = 16
-    ds = ds.batch(batch_size, drop_remainder=True)
-    ds = ds.repeat(repeat_count)
-    return ds
-
-
-def weight_variable(shape):
-    """weight variable"""
-    np.random.seed(1)
-    ones = np.random.uniform(-0.1, 0.1, size=shape).astype(np.float32)
-    return Tensor(ones)
-
-
-class ModelCallback(Callback):
-    def __init__(self):
-        super(ModelCallback, self).__init__()
-        self.loss_list = []
-
-    def step_end(self, run_context):
-        cb_params = run_context.original_args()
-        self.loss_list.append(cb_params.net_outputs.asnumpy()[0])
-        logger.info("epoch: {}, outputs are {}".format(cb_params.cur_epoch_num, str(cb_params.net_outputs)))
-
-
-@pytest.mark.level0
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_onecard
-def test_bert_tdt():
-    """test bert tdt"""
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", reserve_class_name_in_scope=False)
-    context.set_context(enable_task_sink=True)
-    context.set_context(enable_loop_sink=True)
-    context.set_context(enable_mem_reuse=True)
-    parallel_callback = ModelCallback()
-    ds = me_de_train_dataset()
-    version = os.getenv('VERSION', 'large')
-    batch_size = int(os.getenv('BATCH_SIZE', '16'))
-    config = get_config(version=version, batch_size=batch_size)
-    netwithloss = BertNetworkWithLoss(config, True)
-    optimizer = Momentum(netwithloss.trainable_params(), learning_rate=2e-5, momentum=0.9)
-    netwithgrads = BertTrainOneStepCell(netwithloss, optimizer=optimizer)
-    netwithgrads.set_train(True)
-    model = Model(netwithgrads)
-    params = netwithloss.trainable_params()
-    for param in params:
-        value = param.default_input
-        name = param.name
-        if isinstance(value, Tensor):
-            if name.split('.')[-1] in ['weight']:
-                if name.split('.')[-3] in ['cls2']:
-                    logger.info("***************** BERT param name is 1 {}".format(name))
-                    param.default_input = weight_variable(value.asnumpy().shape)
-                else:
-                    logger.info("***************** BERT param name is 2 {}".format(name))
-                    tempshape = value.asnumpy().shape
-                    shape = (tempshape[1], tempshape[0])
-                    weight_value = weight_variable(shape).asnumpy()
-                    param.default_input = Tensor(np.transpose(weight_value, [1, 0]))
-            else:
-                logger.info("***************** BERT param name is 3 {}".format(name))
-                param.default_input = weight_variable(value.asnumpy().shape)
-    model.train(ds.get_repeat_count(), ds, callbacks=parallel_callback, dataset_sink_mode=False)
-    loss_value = np.array(parallel_callback.loss_list)
-    expect_out = [12.19179, 11.965041, 11.969687, 11.97815, 11.969171, 12.603289, 12.165594,
-                  12.824818, 12.38842, 12.604046]
-    logger.info("expected loss value output: {}".format(expect_out))
-    assert np.allclose(loss_value, expect_out, 0.00001, 0.00001)
-
-
-if __name__ == '__main__':
-    test_bert_tdt()

From 65cd41844deeada6fb63579e9c1166e355e686c9 Mon Sep 17 00:00:00 2001
From: kpy <kuangpeiyu@huawei.com>
Date: Tue, 28 Apr 2020 19:55:18 +0800
Subject: [PATCH 165/242] change manager logic to faster combine_like step

---
 mindspore/ccsrc/pipeline/action.cc       | 10 ++++----
 tests/st/control/test_multigraph_sink.py | 29 ++++++++++++++++++++----
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/mindspore/ccsrc/pipeline/action.cc b/mindspore/ccsrc/pipeline/action.cc
index 778600dc0a..f15723d64d 100644
--- a/mindspore/ccsrc/pipeline/action.cc
+++ b/mindspore/ccsrc/pipeline/action.cc
@@ -130,7 +130,7 @@ bool ParseAction(const ResourcePtr &res) {
 // This step do this optimize: graph1(x){xx(fv1),xxx(fv2)}, graph2(x){xxx(fv3),xxx(fv4)}->
 // graph1(x){base_graph(x, fv1, fv2)}, graph1(x){base_graph(x, fv3, fv4)}, base_graph(x, fv...){xxx,xxx}
 // all obj_map's graph shared base_graph
-bool CombineLikeGraphs(const ResourcePtr &) {
+bool CombineLikeGraphs(const ResourcePtr &res) {
   auto &obj_map = parse::data_converter::GetObjGraphs();
 
   for (auto it : obj_map) {
@@ -147,13 +147,15 @@ bool CombineLikeGraphs(const ResourcePtr &) {
     if (fg->paramter_obj_nodes().size() == 0 || graphs.size() <= 1) {
       continue;
     }
-    auto mng = Manage(base_graph, false);
     for (auto &fv : fg->paramter_obj_nodes()) {
       TraceManager::DebugTrace(std::make_shared<TraceCombileLikeGraphs>(fv->debug_info()));
       auto param = base_graph->add_parameter();
       TraceManager::EndTrace();
-      auto repl_node = (*cloner->cloned_node())[fv];
-      (void)mng->Replace(repl_node, param);
+      auto &node_users = res->manager()->node_users()[fv];
+      for (auto &n : node_users) {
+        auto repl_n = (*cloner->cloned_node())[n.first]->cast<CNodePtr>();
+        repl_n->set_input(n.second, param);
+      }
     }
     MS_LOG(DEBUG) << "Fg0 paramter_obj_nodes size :" << fg->paramter_obj_nodes().size();
 
diff --git a/tests/st/control/test_multigraph_sink.py b/tests/st/control/test_multigraph_sink.py
index b2732a63d4..2b9a1a020a 100644
--- a/tests/st/control/test_multigraph_sink.py
+++ b/tests/st/control/test_multigraph_sink.py
@@ -24,9 +24,7 @@ from mindspore.ops import operations as P
 
 
 def setup_module(module):
-    context.set_context(mode = context.PYNATIVE_MODE, save_graphs = True, device_target = "Ascend")
-    context.set_context(enable_task_sink = True, device_id = 0)
-
+    context.set_context(mode = context.PYNATIVE_MODE, device_target = "Ascend")
 
 c1 = Tensor([2], mstype.int32)
 c2 = Tensor([14], mstype.int32)
@@ -135,6 +133,10 @@ def while_in_while_in_while(x, y, z):
     return out
 
 
+@pytest.mark.level0
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.env_onecard
 def test_simple_if():
     output = simple_if(c1, c2, c3)
     expect = Tensor([6], mstype.int32)
@@ -153,30 +155,49 @@ def test_if_in_if():
     assert output == expect
 
 
+@pytest.mark.level0
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.env_onecard
 def test_simple_while():
     output = simple_while(c1, c2, c3)
     expect = Tensor([21], mstype.int32)
     assert output == expect
 
 
+@pytest.mark.level0
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.env_onecard
 def test_while_by_while():
     output = while_by_while(c1, c2, c3)
     expect = Tensor([28], mstype.int32)
     assert output == expect
 
 
+@pytest.mark.level0
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.env_onecard
 def test_while_in_while():
     output = while_in_while(c1, c2, c3)
     expect = Tensor([1274], mstype.int32)
     assert output == expect
 
 
+@pytest.mark.level0
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.env_onecard
 def test_while_by_while_in_while():
     output = while_by_while_in_while(c1, c2, c3)
     expect = Tensor([350], mstype.int32)
     assert output == expect
 
-
+@pytest.mark.level0
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.env_onecard
 def test_while_in_while_in_while():
     output = while_in_while_in_while(c1, c2, c3)
     expect = Tensor([2534], mstype.int32)

From 16e57a406288c943013b53b69aaedef681ac9617 Mon Sep 17 00:00:00 2001
From: simson <526422051@qq.com>
Date: Tue, 28 Apr 2020 21:10:28 +0800
Subject: [PATCH 166/242] change the error type in Infer from ValueError to
 TypeError

---
 mindspore/ccsrc/pipeline/static_analysis/evaluator.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
index 445be852bb..06d61292d7 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/evaluator.cc
@@ -93,9 +93,9 @@ AbstractBasePtr BaseFuncGraphEvaluator::Infer(AnalysisEnginePtr engine, const Ab
   MS_EXCEPTION_IF_NULL(fg);
   std::size_t nargs = fg->parameters().size();
   if (args_spec_list.size() != nargs) {
-    MS_EXCEPTION(ValueError) << "Function " << fg->ToString() << ", The number of parameters of this function is "
-                             << fg->parameters().size() << ", but the number of provided arguments is "
-                             << args_spec_list.size() << ". NodeInfo: " << trace::GetDebugInfo(fg->debug_info());
+    MS_EXCEPTION(TypeError) << "Function " << fg->ToString() << ", The number of parameters of this function is "
+                            << fg->parameters().size() << ", but the number of provided arguments is "
+                            << args_spec_list.size() << ". NodeInfo: " << trace::GetDebugInfo(fg->debug_info());
   }
   MS_EXCEPTION_IF_NULL(parent_context_);
   MS_EXCEPTION_IF_NULL(engine);

From b2cdf220f7ee6d71d67ac8e3ea104856d30bb721 Mon Sep 17 00:00:00 2001
From: zhaoting <zhaoting23@huawei.com>
Date: Wed, 22 Apr 2020 14:30:08 +0800
Subject: [PATCH 167/242] add README.md for YOLOv3

---
 example/yolov3_coco2017/README.md | 94 +++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 example/yolov3_coco2017/README.md

diff --git a/example/yolov3_coco2017/README.md b/example/yolov3_coco2017/README.md
new file mode 100644
index 0000000000..b35d22f4b3
--- /dev/null
+++ b/example/yolov3_coco2017/README.md
@@ -0,0 +1,94 @@
+# YOLOv3 Example
+
+## Description
+
+YOLOv3 network based on ResNet-18, with support for training and evaluation.
+
+## Requirements
+
+- Install [MindSpore](https://www.mindspore.cn/install/en).
+
+- Dataset
+
+    We use coco2017 as training dataset.
+
+    1. Download coco2017: [train2017](http://images.cocodataset.org/zips/train2017.zip), [val2017](http://images.cocodataset.org/zips/val2017.zip), [test2017](http://images.cocodataset.org/zips/test2017.zip), [annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip). The directory structure is as follows:
+        > ```
+        > .
+        > ├── annotations  # annotation jsons
+        > ├── train2017    # train dataset
+        > └── val2017      # infer dataset
+        > ```
+
+    2. Organize the dataset infomation into a TXT file, each row in the file is as follows:
+
+        ```
+        train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2
+        ```
+
+        Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. `dataset.py` is the parsing script, we read image from an image path joined by the `image_dir`(dataset directory) and the relative path in `anno_path`(the TXT file path), `image_dir` and `anno_path` are external inputs.
+
+
+## Running the Example
+
+### Training
+
+To train the model, run `train.py` with the dataset `image_dir`, `anno_path` and `mindrecord_dir`. If the `mindrecord_dir` is empty, it wil generate [mindrecord](https://www.mindspore.cn/tutorial/en/master/use/data_preparation/converting_datasets.html) file by `image_dir` and `anno_path`(the absolute image path is joined by the `image_dir` and the relative path in `anno_path`). **Note if `mindrecord_dir` isn't empty, it will use `mindrecord_dir` rather than `image_dir` and `anno_path`.**
+
+- Stand alone mode
+
+    ```
+    sh run_standalone_train.sh 0 50 ./Mindrecord_train ./dataset ./dataset/train.txt
+
+    ```
+
+    The input variables are device id, epoch size, mindrecord directory path, dataset directory path and train TXT file path.
+
+
+- Distributed mode
+
+    ```
+    sh run_distribute_train.sh 8 150 /data/Mindrecord_train /data /data/train.txt /data/hccl.json
+    ```
+
+    The input variables are device numbers, epoch size, mindrecord directory path, dataset directory path, train TXT file path and [hccl json configuration file](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). **It is better to use absolute path.**
+
+You will get the loss value and time of each step as following:
+
+```
+epoch: 145 step: 156, loss is 12.202981
+epoch time: 25599.22742843628, per step time: 164.0976117207454
+epoch: 146 step: 156, loss is 16.91706
+epoch time: 23199.971675872803, per step time: 148.7177671530308
+epoch: 147 step: 156, loss is 13.04007
+epoch time: 23801.95164680481, per step time: 152.57661312054364
+epoch: 148 step: 156, loss is 10.431475
+epoch time: 23634.241580963135, per step time: 151.50154859591754
+epoch: 149 step: 156, loss is 14.665991
+epoch time: 24118.8325881958, per step time: 154.60790120638333 
+epoch: 150 step: 156, loss is 10.779521
+epoch time: 25319.57221031189, per step time: 162.30495006610187
+```
+
+Note the results is two-classification(person and face) used our own annotations with coco2017, you can change `num_classes` in `config.py` to train your dataset. And we will suport 80 classifications in coco2017 the near future.
+
+### Evaluation
+
+To eval, run `eval.py` with the dataset `image_dir`, `anno_path`(eval txt), `mindrecord_dir` and `ckpt_path`. `ckpt_path` is the path of [checkpoint](https://www.mindspore.cn/tutorial/en/master/use/saving_and_loading_model_parameters.html) file.
+
+```
+sh run_eval.sh 0 yolo.ckpt ./Mindrecord_eval ./dataset ./dataset/eval.txt
+```
+
+The input variables are device id, checkpoint path, mindrecord directory path, dataset directory path and train TXT file path.
+
+You will get the precision and recall value of each class:
+
+```
+class 0 precision is 88.18%, recall is 66.00%
+class 1 precision is 85.34%, recall is 79.13%
+```
+
+Note the precision and recall values are results of two-classification(person and face) used our own annotations with coco2017.
+
+

From 90e2f7555d9d3778b6d02ef91e08ec160595d6a4 Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Tue, 28 Apr 2020 09:27:57 -0400
Subject: [PATCH 168/242] fix globalbatchnorm bug

---
 mindspore/nn/layer/normalization.py | 71 +++++++++++++++++++++--------
 1 file changed, 52 insertions(+), 19 deletions(-)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index a4062a7a54..f9316c7c11 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -76,6 +76,10 @@ class _BatchNorm(Cell):
         self.shape = P.Shape()
         self.reduce_mean = P.ReduceMean()
         self.square = P.Square()
+        self.sqrt = P.Sqrt()
+        self.cast = P.Cast()
+        self.dtype = P.DType()
+        self.reshape = P.Reshape()
 
         if context.get_context("enable_ge"):
             self.is_ge_backend = True
@@ -112,29 +116,52 @@ class _BatchNorm(Cell):
         group_list = [list(i) for i in world_rank_list]
         return group_list
 
+    def _global_sync(self, x):
+        if len(self.shape(x)) == 4:
+            axes = (0, 2, 3)
+            re_shape = (1, self.num_features, 1, 1)
+            x_mean = self.reduce_mean(x, axes)
+            x_mean_square = self.reduce_mean(self.square(x), axes)
+            global_batch_mean = self.all_reduce(x_mean) / self.group
+            global_batch_mean_square = self.all_reduce(x_mean_square) / self.group
+            global_mean = global_batch_mean
+            global_var = global_batch_mean_square - self.square(global_mean)
+            var_sqrt = self.sqrt(global_var + self.eps)
+            mean_first = (x - global_mean) / var_sqrt
+            y = mean_first * self.reshape(self.gamma, re_shape) + self.reshape(self.beta, re_shape)
+
+            mean_sub = self.sub_mean(self.reshape(self.moving_mean, re_shape), global_mean)
+            tmp_mean = self.mul_mean(mean_sub, self.cast(self.momentum, self.dtype(mean_sub)))
+            mean_sub2 = self.sub_var(self.reshape(self.moving_mean, re_shape), global_var)
+            tmp_variance = self.mul_var(mean_sub2, self.cast(self.momentum, self.dtype(mean_sub2)))
+            y = F.depend(y, self.assign_sub_mean(self.reshape(self.moving_mean, re_shape), tmp_mean))
+            y = F.depend(y, self.assign_sub_var(self.reshape(self.moving_variance, re_shape), tmp_variance))
+        else:
+            axes = (0,)
+            re_shape = (1, self.num_features)
+            x_mean = self.reduce_mean(x, axes)
+            x_mean_square = self.reduce_mean(self.square(x), axes)
+            global_batch_mean = self.all_reduce(x_mean) / self.group
+            global_batch_mean_square = self.all_reduce(x_mean_square) / self.group
+            global_mean = global_batch_mean
+            global_var = global_batch_mean_square - self.square(global_mean)
+            var_sqrt = self.sqrt(global_var + self.eps)
+            mean_first = (x - global_mean) / var_sqrt
+            y = mean_first * self.gamma + self.beta
+
+            mean_sub = self.sub_mean(self.moving_mean, global_mean)
+            temp_mean = self.mul_mean(mean_sub, self.cast(self.momentum, self.dtype(mean_sub)))
+            mean_sub2 = self.sub_var(self.moving_variance, global_var)
+            temp_variance = self.mul_var(mean_sub2, self.cast(self.momentum, self.dtype(mean_sub2)))
+            y = F.depend(y, self.assign_sub_mean(self.reshape(self.moving_mean, re_shape), temp_mean))
+            y = F.depend(y, self.assign_sub_var(self.reshape(self.moving_variance, re_shape), temp_variance))
+        return y
+
     def construct(self, x):
         if self.training and self.use_batch_statistics:
             if self.is_ge_backend:
                 if self.is_global:
-                    x_mean = self.reduce_mean(x)
-                    x_mean_square = self.reduce_mean(self.square(x))
-                    global_batch_mean = self.all_reduce(x_mean) / self.group
-                    global_batch_mean_square = self.all_reduce(x_mean_square) / self.group
-                    global_mean = global_batch_mean
-                    global_var = global_batch_mean_square - self.square(global_batch_mean)
-                    y, batch_mean, batch_var, _, _ = \
-                        self.bn_train(x,
-                                      self.gamma,
-                                      self.beta,
-                                      None,
-                                      None)
-
-                    mean_sub = self.sub_mean(self.moving_mean, global_mean)
-                    temp_mean = self.mul_mean(mean_sub, self.momentum)
-                    mean_sub2 = self.sub_var(self.moving_variance, global_var)
-                    temp_variance = self.mul_var(mean_sub2, self.momentum)
-                    y = F.depend(y, self.assign_sub_mean(self.moving_mean, temp_mean))
-                    y = F.depend(y, self.assign_sub_var(self.moving_variance, temp_variance))
+                    y = self._global_sync(x)
                 else:
                     y, batch_mean, batch_var, _, _ = \
                         self.bn_train(x,
@@ -474,6 +501,12 @@ class GroupNorm(Cell):
         num_channels (int): The number of channels per group.
         eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
         affine (bool): A bool value, this layer will has learnable affine parameters when set to true. Default: True.
+        gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight.
+            The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
+            'he_uniform', etc. Default: 'ones'.
+        beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight.
+            The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
+            'he_uniform', etc. Default: 'zeros'.
 
     Inputs:
         - **input_x** (Tensor) - The input feature with shape [N, C, H, W].

From dfa66e4d0c049c709893c57723027da8aa010878 Mon Sep 17 00:00:00 2001
From: YuJianfeng <yujianfeng5@huawei.com>
Date: Tue, 28 Apr 2020 21:43:33 +0800
Subject: [PATCH 169/242] Check the empty value tuple when converting it to
 tuple tensor

---
 mindspore/ccsrc/pre_activate/common/helper.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mindspore/ccsrc/pre_activate/common/helper.cc b/mindspore/ccsrc/pre_activate/common/helper.cc
index 9e8187ffb2..107908934c 100644
--- a/mindspore/ccsrc/pre_activate/common/helper.cc
+++ b/mindspore/ccsrc/pre_activate/common/helper.cc
@@ -299,6 +299,10 @@ tensor::TensorPtr CreateTensorWithValueTuple(const ValueTuplePtr &value_tuple_pt
 tensor::TensorPtr CreateTupleTensor(const ValueTuplePtr &value_tuple) {
   MS_EXCEPTION_IF_NULL(value_tuple);
   tensor::TensorPtr tensor = nullptr;
+  if (value_tuple->value().empty()) {
+    MS_LOG(WARNING) << "The value tuple is empty.";
+    return nullptr;
+  }
   ValuePtr v = *(value_tuple->value().begin());
   MS_EXCEPTION_IF_NULL(v);
   // Currently we only deal with the scalar tuple

From eb46dd9198b358a8fac4fbceff260f6a363f3b8a Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Tue, 28 Apr 2020 09:56:28 -0400
Subject: [PATCH 170/242] fix globalbatchnorm bug

---
 mindspore/nn/layer/normalization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index f9316c7c11..9d623bc6fd 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -74,7 +74,7 @@ class _BatchNorm(Cell):
                     management.create_group('group' + str(i), self.rank_list[i])
                     self.all_reduce = P.AllReduce(P.ReduceOp.SUM, 'group' + str(i)).add_prim_attr('fusion', 1)
         self.shape = P.Shape()
-        self.reduce_mean = P.ReduceMean()
+        self.reduce_mean = P.ReduceMean(keep_dims=True)
         self.square = P.Square()
         self.sqrt = P.Sqrt()
         self.cast = P.Cast()

From 332398d20a0507560d47cfe91dba8faf412d5b44 Mon Sep 17 00:00:00 2001
From: Jesse Lee <jesse.lee@huawei.com>
Date: Tue, 28 Apr 2020 15:19:03 -0400
Subject: [PATCH 171/242] Check return code from WaitPost::Register

---
 .../engine/datasetops/source/celeba_op.cc     |  2 +-
 .../engine/datasetops/source/cifar_op.cc      |  2 +-
 .../engine/datasetops/source/generator_op.cc  |  2 +-
 .../datasetops/source/image_folder_op.cc      |  2 +-
 .../engine/datasetops/source/manifest_op.cc   |  2 +-
 .../engine/datasetops/source/mindrecord_op.cc |  2 +-
 .../engine/datasetops/source/mnist_op.cc      |  2 +-
 .../engine/datasetops/source/text_file_op.cc  |  2 +-
 .../engine/datasetops/source/tf_reader_op.cc  |  2 +-
 .../engine/datasetops/source/voc_op.cc        |  2 +-
 mindspore/ccsrc/dataset/util/CMakeLists.txt   |  1 -
 mindspore/ccsrc/dataset/util/semaphore.cc     | 38 --------------
 mindspore/ccsrc/dataset/util/semaphore.h      | 49 -------------------
 mindspore/ccsrc/dataset/util/task_manager.cc  |  2 +-
 mindspore/ccsrc/dataset/util/wait_post.cc     |  2 +-
 mindspore/ccsrc/dataset/util/wait_post.h      |  2 +-
 tests/ut/cpp/dataset/interrupt_test.cc        |  8 +--
 17 files changed, 17 insertions(+), 105 deletions(-)
 delete mode 100644 mindspore/ccsrc/dataset/util/semaphore.cc
 delete mode 100644 mindspore/ccsrc/dataset/util/semaphore.h

diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
index 2394380ea4..b2e4170e84 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
@@ -94,7 +94,7 @@ Status CelebAOp::LaunchThreadsAndInitOp() {
 
   RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
   RETURN_IF_NOT_OK(attr_info_queue_->Register(tree_->AllTasks()));
-  wp_.Register(tree_->AllTasks());
+  RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks()));
 
   RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("Walking attr file", std::bind(&CelebAOp::ParseAttrFile, this)));
   RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CelebAOp::WorkerEntry, this, std::placeholders::_1)));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
index 0c2d57ff42..4a4367460f 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
@@ -149,7 +149,7 @@ Status CifarOp::LaunchThreadsAndInitOp() {
     RETURN_STATUS_UNEXPECTED("tree_ not set");
   }
   RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
-  wp_.Register(tree_->AllTasks());
+  RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks()));
   RETURN_IF_NOT_OK(
     tree_->AllTasks()->CreateAsyncTask("Get cifar data block", std::bind(&CifarOp::ReadCifarBlockDataAsync, this)));
   RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CifarOp::WorkerEntry, this, std::placeholders::_1)));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
index 37a74f019a..a86fd677cf 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
@@ -168,7 +168,7 @@ Status GeneratorOp::FillBuffer(TensorQTable *tt) {
 Status GeneratorOp::operator()() {
   // Handshake with TaskManager to synchronize thread creation
   TaskManager::FindMe()->Post();
-  wp_.Register(tree_->AllTasks());
+  RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks()));
   std::unique_ptr<DataBuffer> fetched_buffer;
   bool eof = false;
   while (!eof) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
index 32d7171c8f..f57a2f8b64 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
@@ -386,7 +386,7 @@ Status ImageFolderOp::LaunchThreadsAndInitOp() {
   RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
   RETURN_IF_NOT_OK(folder_name_queue_->Register(tree_->AllTasks()));
   RETURN_IF_NOT_OK(image_name_queue_->Register(tree_->AllTasks()));
-  wp_.Register(tree_->AllTasks());
+  RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks()));
   // The following code launch 3 threads group
   // 1) A thread that walks all folders and push the folder names to a util:Queue mFoldernameQueue.
   // 2) Workers that pull foldername from mFoldernameQueue, walk it and return the sorted images to mImagenameQueue
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
index ab0c012416..d31b67fd65 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
@@ -140,7 +140,7 @@ Status ManifestOp::LaunchThreadsAndInitOp() {
     RETURN_STATUS_UNEXPECTED("tree_ not set");
   }
   RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
-  wp_.Register(tree_->AllTasks());
+  RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks()));
 
   RETURN_IF_NOT_OK(
     tree_->LaunchWorkers(num_workers_, std::bind(&ManifestOp::WorkerEntry, this, std::placeholders::_1)));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
index 72dee6f2e6..96675e6f6e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -644,7 +644,7 @@ Status MindRecordOp::LaunchThreadAndInitOp() {
   }
 
   RETURN_IF_NOT_OK(io_blk_queues_.Register(tree_->AllTasks()));
-  shard_reader_wait_post_.Register(tree_->AllTasks());
+  RETURN_IF_NOT_OK(shard_reader_wait_post_.Register(tree_->AllTasks()));
   if (shard_reader_->Launch(!block_reader_) == MSRStatus::FAILED) {
     RETURN_STATUS_UNEXPECTED("MindRecordOp launch failed.");
   }
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
index fbf041e985..da25c45027 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
@@ -395,7 +395,7 @@ Status MnistOp::LaunchThreadsAndInitOp() {
     RETURN_STATUS_UNEXPECTED("tree_ not set");
   }
   RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
-  wp_.Register(tree_->AllTasks());
+  RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks()));
   RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&MnistOp::WorkerEntry, this, std::placeholders::_1)));
   TaskManager::FindMe()->Post();
   RETURN_IF_NOT_OK(this->WalkAllFiles());
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
index 8ab186761e..c339e81ed1 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
@@ -370,7 +370,7 @@ Status TextFileOp::operator()() {
   // must be called after launching workers.
   TaskManager::FindMe()->Post();
 
-  io_block_queue_wait_post_.Register(tree_->AllTasks());
+  RETURN_IF_NOT_OK(io_block_queue_wait_post_.Register(tree_->AllTasks()));
   NotifyToFillIOBlockQueue();
   while (!finished_reading_dataset_) {
     int64_t buffer_id = 0;
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
index 50c60caa86..b059d54b9a 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
@@ -222,7 +222,7 @@ Status TFReaderOp::operator()() {
   // so workers have to be kept alive until the end of the program
   TaskManager::FindMe()->Post();
 
-  io_block_queue_wait_post_.Register(tree_->AllTasks());
+  RETURN_IF_NOT_OK(io_block_queue_wait_post_.Register(tree_->AllTasks()));
 
   NotifyToFillIOBlockQueue();
   while (!finished_reading_dataset_) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
index e523aa84d6..834d4c512b 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
@@ -231,7 +231,7 @@ Status VOCOp::LaunchThreadsAndInitOp() {
     RETURN_STATUS_UNEXPECTED("tree_ not set");
   }
   RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
-  wp_.Register(tree_->AllTasks());
+  RETURN_IF_NOT_OK(wp_.Register(tree_->AllTasks()));
   RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&VOCOp::WorkerEntry, this, std::placeholders::_1)));
   TaskManager::FindMe()->Post();
   RETURN_IF_NOT_OK(this->ParseImageIds());
diff --git a/mindspore/ccsrc/dataset/util/CMakeLists.txt b/mindspore/ccsrc/dataset/util/CMakeLists.txt
index ff14d772ca..9ae93618ab 100644
--- a/mindspore/ccsrc/dataset/util/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/util/CMakeLists.txt
@@ -3,7 +3,6 @@ add_library(utils OBJECT
     circular_pool.cc
     memory_pool.cc
     cond_var.cc
-    semaphore.cc
     intrp_service.cc
     task.cc
     task_manager.cc
diff --git a/mindspore/ccsrc/dataset/util/semaphore.cc b/mindspore/ccsrc/dataset/util/semaphore.cc
deleted file mode 100644
index 983c387df5..0000000000
--- a/mindspore/ccsrc/dataset/util/semaphore.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "dataset/util/semaphore.h"
-#include "dataset/util/task_manager.h"
-
-namespace mindspore {
-namespace dataset {
-Status Semaphore::P() {
-  std::unique_lock<std::mutex> lck(mutex_);
-  return (wait_cond_.Wait(&lck, [this]() { return value_ != 0; }));
-}
-
-void Semaphore::V() {
-  std::unique_lock<std::mutex> lck(mutex_);
-  ++value_;
-  wait_cond_.NotifyOne();
-}
-
-void Semaphore::Register(TaskGroup *vg) { (void)wait_cond_.Register(vg->GetIntrpService()); }
-
-Status Semaphore::Deregister() { return (wait_cond_.Deregister()); }
-
-void Semaphore::ResetIntrpState() { wait_cond_.ResetIntrpState(); }
-}  // namespace dataset
-}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/util/semaphore.h b/mindspore/ccsrc/dataset/util/semaphore.h
deleted file mode 100644
index 74c344f7d3..0000000000
--- a/mindspore/ccsrc/dataset/util/semaphore.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef DATASET_UTIL_SEMAPHORE_H_
-#define DATASET_UTIL_SEMAPHORE_H_
-
-#include "dataset/util/cond_var.h"
-
-namespace mindspore {
-namespace dataset {
-class TaskGroup;
-
-class Semaphore {
- public:
-  explicit Semaphore(int init) : value_(init) {}
-
-  virtual ~Semaphore() {}
-
-  Status P();
-
-  void V();
-
-  void Register(TaskGroup *vg);
-
-  Status Deregister();
-
-  void ResetIntrpState();
-
- private:
-  int value_;
-
-  std::mutex mutex_;
-  CondVar wait_cond_;
-};
-}  // namespace dataset
-}  // namespace mindspore
-#endif  // DATASET_UTIL_SEMAPHORE_H_
diff --git a/mindspore/ccsrc/dataset/util/task_manager.cc b/mindspore/ccsrc/dataset/util/task_manager.cc
index a9f509385e..06340e90ea 100644
--- a/mindspore/ccsrc/dataset/util/task_manager.cc
+++ b/mindspore/ccsrc/dataset/util/task_manager.cc
@@ -53,7 +53,7 @@ Status TaskManager::CreateAsyncTask(const std::string &my_name, const std::funct
     LockGuard lck(&tg_lock_);
     this->grp_list_.insert(vg);
   }
-  (*task)->wp_.Register(vg);
+  RETURN_IF_NOT_OK((*task)->wp_.Register(vg));
   RETURN_IF_NOT_OK((*task)->Run());
   // Wait for the thread to initialize successfully.
   RETURN_IF_NOT_OK((*task)->Wait());
diff --git a/mindspore/ccsrc/dataset/util/wait_post.cc b/mindspore/ccsrc/dataset/util/wait_post.cc
index 99ee0cb77f..204f203d9a 100644
--- a/mindspore/ccsrc/dataset/util/wait_post.cc
+++ b/mindspore/ccsrc/dataset/util/wait_post.cc
@@ -36,7 +36,7 @@ void WaitPost::Clear() {
   value_ = 0;
 }
 
-void WaitPost::Register(TaskGroup *vg) { (void)wait_cond_.Register(vg->GetIntrpService()); }
+Status WaitPost::Register(TaskGroup *vg) { return wait_cond_.Register(vg->GetIntrpService()); }
 
 void WaitPost::ResetIntrpState() { wait_cond_.ResetIntrpState(); }
 
diff --git a/mindspore/ccsrc/dataset/util/wait_post.h b/mindspore/ccsrc/dataset/util/wait_post.h
index bac43f7a4e..4e60995bd9 100644
--- a/mindspore/ccsrc/dataset/util/wait_post.h
+++ b/mindspore/ccsrc/dataset/util/wait_post.h
@@ -36,7 +36,7 @@ class WaitPost {
 
   void Clear();
 
-  void Register(TaskGroup *vg);
+  Status Register(TaskGroup *vg);
 
   Status Deregister();
 
diff --git a/tests/ut/cpp/dataset/interrupt_test.cc b/tests/ut/cpp/dataset/interrupt_test.cc
index 7816346c15..ee2018a050 100644
--- a/tests/ut/cpp/dataset/interrupt_test.cc
+++ b/tests/ut/cpp/dataset/interrupt_test.cc
@@ -20,7 +20,6 @@
 #include "dataset/util/intrp_service.h"
 #include "dataset/util/task_manager.h"
 #include "dataset/util/queue.h"
-#include "dataset/util/semaphore.h"
 
 using namespace mindspore::dataset;
 using mindspore::MsLogLevel::INFO;
@@ -55,11 +54,12 @@ TEST_F(MindDataTestIntrpService, Test1) {
 TEST_F(MindDataTestIntrpService, Test2) {
   MS_LOG(INFO) << "Test Semaphore";
   Status rc;
-  Semaphore sem(0);
-  sem.Register(&vg_);
+  WaitPost wp;
+  rc = wp.Register(&vg_);
+  EXPECT_TRUE(rc.IsOk());
   vg_.CreateAsyncTask("Test1", [&]() -> Status {
     TaskManager::FindMe()->Post();
-      Status rc = sem.P();
+      Status rc = wp.Wait();
       EXPECT_TRUE(rc.IsInterrupted());
       return rc;
   });

From 2facd80c8d24c603b7545a05e6985d3d56cd5d8f Mon Sep 17 00:00:00 2001
From: Jamie Nisbet <jamie.nisbet@huawei.com>
Date: Tue, 28 Apr 2020 14:59:32 -0400
Subject: [PATCH 172/242] updated print functions for info diagnostics

fix some ci failures

more ci fixes

more ci fails

yet another ci fail

more ci stuff

still ci

more ci

more ci

more ci
---
 .../dataset/engine/datasetops/barrier_op.cc   | 18 +++++++---
 .../dataset/engine/datasetops/batch_op.cc     | 18 +++++++---
 .../dataset/engine/datasetops/dataset_op.cc   | 24 ++++++++-----
 .../engine/datasetops/device_queue_op.cc      | 18 +++++++---
 .../dataset/engine/datasetops/filter_op.cc    | 25 ++++++++-----
 .../ccsrc/dataset/engine/datasetops/map_op.cc | 34 +++++++++++-------
 .../dataset/engine/datasetops/parallel_op.cc  | 16 +++++----
 .../dataset/engine/datasetops/pipeline_op.cc  | 26 +++++++++++---
 .../dataset/engine/datasetops/project_op.cc   | 22 +++++++++---
 .../dataset/engine/datasetops/rename_op.cc    | 26 ++++++++++----
 .../dataset/engine/datasetops/repeat_op.cc    | 35 ++++++++++--------
 .../dataset/engine/datasetops/shuffle_op.cc   | 22 ++++++++----
 .../dataset/engine/datasetops/skip_op.cc      | 20 +++++++----
 .../engine/datasetops/source/celeba_op.cc     | 17 +++++++--
 .../engine/datasetops/source/cifar_op.cc      | 17 +++++++--
 .../engine/datasetops/source/generator_op.cc  | 23 +++++++++++-
 .../engine/datasetops/source/generator_op.h   |  5 +++
 .../datasetops/source/image_folder_op.cc      | 19 +++++++---
 .../engine/datasetops/source/manifest_op.cc   | 17 +++++++--
 .../engine/datasetops/source/mindrecord_op.cc | 28 ++++++++-------
 .../engine/datasetops/source/mnist_op.cc      | 18 +++++++---
 .../engine/datasetops/source/storage_op.cc    | 36 +++++++------------
 .../engine/datasetops/source/text_file_op.cc  | 25 +++++++++++++
 .../engine/datasetops/source/text_file_op.h   |  5 +++
 .../engine/datasetops/source/tf_reader_op.cc  | 31 ++++++++++++++++
 .../engine/datasetops/source/tf_reader_op.h   |  5 +++
 .../engine/datasetops/source/voc_op.cc        | 18 +++++++---
 .../dataset/engine/datasetops/take_op.cc      | 21 +++++++----
 .../ccsrc/dataset/engine/datasetops/zip_op.cc | 18 +++++++---
 .../ccsrc/dataset/engine/execution_tree.cc    | 33 +++++++++++++----
 .../ccsrc/dataset/engine/execution_tree.h     | 14 ++++++--
 31 files changed, 483 insertions(+), 171 deletions(-)

diff --git a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
index b50a7788da..3d5b682155 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/barrier_op.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 #include "dataset/engine/datasetops/barrier_op.h"
+#include <iomanip>
 #include <utility>
 #include "dataset/core/constants.h"
 #include "dataset/engine/data_buffer.h"
@@ -214,10 +215,19 @@ Status BarrierOp::getNextTensorRow(TensorRow *new_row) {
 
 // A function that prints info about the Operator
 void BarrierOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first
-  PipelineOp::Print(out, show_all);
-  out << "\nBarrierOp:\n"
-      << "\nCondition " << condition_name_ << "\n\n";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <BarrierOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nCondition: " << condition_name_ << "\n\n";
+  }
 }
 
 // overwrite function and handle eof
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
index c80078cb44..ad8b95b625 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
@@ -15,6 +15,7 @@
  */
 #include "dataset/engine/datasetops/batch_op.h"
 #include <utility>
+#include <iomanip>
 #include "common/utils.h"
 #include "dataset/engine/data_buffer.h"
 #include "dataset/engine/db_connector.h"
@@ -102,10 +103,19 @@ Status BatchOp::operator()() {
 }
 
 void BatchOp::Print(std::ostream &out, bool show_all) const {
-  ParallelOp::Print(out, show_all);
-  out << "\nBatchOp:\n"
-      << "number of parallel workers: " << num_workers_ << "\nBatch size: " << start_batch_size_
-      << "\nDrop remainder: " << (drop_ ? "yes" : "no") << "\n\n";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <BatchOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << " [batch size: " << start_batch_size_ << "]\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nStart batch size: " << start_batch_size_ << "\nDrop remainder: " << (drop_ ? "yes" : "no") << "\n\n";
+  }
 }
 
 Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *source_table,
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
index 5e3ea3dc44..adbf42487e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.cc
@@ -92,18 +92,24 @@ void DatasetOp::CreateConnector(int32_t num_producers, int32_t num_consumers) {
 
 // A print method typically used for debugging.  showAll of true will recursively descend to child prints
 void DatasetOp::Print(std::ostream &out, bool show_all) const {
+  // When show_all is false, we display a 1 liner piece of text for the op.
+  // When show_all is true, we display more detailed output for the op.
+  // Derived printers should show their own header info, then call base class printer, followed by
+  // derived-specific items.
+  // For now, the base class doesn't have any summary info to show so it's a no-op in that case.
   if (show_all) {
+    // The detailed display will show common base class info of the op.  Allow the derived class to print
+    // it's own id and name though as the first line.
+    out << "\nNumber of children     : " << child_.size();
     for (size_t i = 0; i < child_.size(); i++) {
-      child_[i]->Print(out, show_all);
+      out << "\n  Child[" << i << "] id: " << child_[i]->id();
     }
-  }
-  out << "\n-------------------------"
-      << "\nOperator #             : " << operator_id_ << "\nNumber of children     : " << child_.size()
-      << "\nNumber of parents      : " << parent_.size() << "\nConnector queue size   : " << oc_queue_size_
-      << "\nOperator control flags : 0x" << std::hex << std::setw(8) << std::setfill('0') << op_ctrl_flags_ << std::dec
-      << std::setfill(' ') << "\nHas parents:\n";
-  for (size_t i = 0; i < parent_.size(); i++) {
-    out << "Parent[" << i << "] id: " << parent_[i]->id() << "\n";
+    out << "\nNumber of parents      : " << parent_.size();
+    for (size_t i = 0; i < parent_.size(); i++) {
+      out << "\n  Parent[" << i << "] id: " << parent_[i]->id();
+    }
+    out << "\nConnector queue size   : " << oc_queue_size_ << "\nOperator control flags : 0x" << std::hex
+        << std::setw(8) << std::setfill('0') << op_ctrl_flags_ << std::dec << std::setfill(' ');
   }
 }
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
index 71e4ce64a4..2c91d36259 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "dataset/engine/datasetops/device_queue_op.h"
-
+#include <iomanip>
 #include <iostream>
 #include <memory>
 
@@ -246,9 +246,19 @@ Status DeviceQueueOp::SendDataToCPU() {
 }
 
 void DeviceQueueOp::Print(std::ostream &out, bool show_all) const {
-  PipelineOp::Print(out, show_all);
-
-  out << "DeviceQueueOp: channelName: " << channel_name_ << ", prefetchSize: " << prefetch_size_ << '\n';
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <DeviceQueueOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nChannel name: " << channel_name_ << "\nPrefetch size: " << prefetch_size_ << "\n\n";
+  }
 }
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc
index ce312ce3d9..5ede8ad6f4 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/filter_op.cc
@@ -16,6 +16,7 @@
 #include "dataset/engine/datasetops/filter_op.h"
 #include <algorithm>
 #include <cstring>
+#include <iomanip>
 #include <iostream>
 #include <memory>
 #include <vector>
@@ -88,14 +89,22 @@ Status FilterOp::ValidateInColumns(const std::unordered_map<std::string, int32_t
 
 // A print method typically used for debugging.
 void FilterOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first.
-  ParallelOp::Print(out, show_all);
-
-  // Then display our own stuff.
-  out << "\nFilterOp:";
-  out << "\n  Input column names:";
-  for (size_t i = 0; i < in_columns_.size(); i++) {
-    out << " " << in_columns_[i];
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <FilterOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nInput column names:";
+    for (size_t i = 0; i < in_columns_.size(); i++) {
+      out << " " << in_columns_[i];
+    }
+    out << "\n\n";
   }
 }
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
index b6d603bac9..4cbe2ac603 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.cc
@@ -15,6 +15,7 @@
  */
 #include "dataset/engine/datasetops/map_op.h"
 #include <cstring>
+#include <iomanip>
 #include <iostream>
 #include <memory>
 #include <vector>
@@ -81,20 +82,27 @@ int32_t MapOp::num_consumers() const {
 
 // A print method typically used for debugging
 void MapOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first
-  ParallelOp::Print(out, show_all);
-
-  // Then display our own stuff
-  out << "\nMapOp:";
-  out << "\n  Input column names:";
-  for (size_t i = 0; i < in_columns_.size(); i++) {
-    out << " " << in_columns_[i];
-  }
-  out << "\n  TensorOps:";
-  for (size_t i = 0; i < tfuncs_.size(); i++) {
-    out << " " << tfuncs_[i];
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <MapOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nInput column names:";
+    for (size_t i = 0; i < in_columns_.size(); i++) {
+      out << " " << in_columns_[i];
+    }
+    out << "\n  TensorOps:";
+    for (size_t i = 0; i < tfuncs_.size(); i++) {
+      out << " " << tfuncs_[i];
+    }
+    out << "\n\n";
   }
-  out << "\n";
 }
 
 // This class functor will provide the master loop that drives the logic for performing the work
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
index 4b2af2250a..2eeb931554 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/parallel_op.cc
@@ -55,12 +55,16 @@ Status ParallelOp::CreateWorkerConnector(int32_t worker_connector_size) {
 
 // A print method typically used for debugging
 void ParallelOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first
-  DatasetOp::Print(out, show_all);
-
-  // Then show our own stuff
-  out << "ParallelOp:";
-  out << "\n  Num workers                   : " << num_workers_ << "\n";
+  // Summary 1-liner print
+  if (!show_all) {
+    out << " [workers: " << num_workers_ << "]";
+    // Call super class printer
+    DatasetOp::Print(out, show_all);
+  } else {
+    // Detailed print
+    DatasetOp::Print(out, show_all);
+    out << "\nNum workers: " << num_workers_;
+  }
 }
 
 // Override base class reset to provide reset actions specific to the ParallelOp class.
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc
index 56fc24883a..69ace1ed9a 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/pipeline_op.cc
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <iomanip>
 #include <iostream>
 #include "dataset/engine/datasetops/pipeline_op.h"
 
@@ -23,11 +24,26 @@ PipelineOp::PipelineOp(int32_t op_connector_size) : DatasetOp(op_connector_size)
 
 // A print method typically used for debugging
 void PipelineOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first
-  DatasetOp::Print(out, show_all);
-
-  // Then display our own stuff for the pipeline op
-  // out << "This is a pipeline op print.  nothing to display here at the moment.\n";
+  // Summary 1-liner print
+  if (!show_all) {
+    out << " [workers: ";
+    if (this->inlined()) {
+      out << "0 (inlined)]";
+    } else {
+      out << "1]";  // Pipeline ops only have 1 worker
+    }
+    // Call super class printer
+    DatasetOp::Print(out, show_all);
+  } else {
+    // Detailed print
+    DatasetOp::Print(out, show_all);
+    out << "\nNum workers: ";
+    if (this->inlined()) {
+      out << "0 (inlined)";
+    } else {
+      out << "1";  // Pipeline ops only have 1 worker
+    }
+  }
 }
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
index b87967dde8..128d3e68e5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/project_op.cc
@@ -16,6 +16,7 @@
 
 #include "dataset/engine/datasetops/project_op.h"
 #include <algorithm>
+#include <iomanip>
 #include <iostream>
 #include <string>
 #include <unordered_map>
@@ -49,12 +50,23 @@ ProjectOp::ProjectOp(const std::vector<std::string> &columns_to_project)
     : PipelineOp(0), columns_to_project_(columns_to_project) {}
 
 void ProjectOp::Print(std::ostream &out, bool show_all) const {
-  PipelineOp::Print(out, show_all);
-  out << "ProjectOp: columns that are projected: ";
-  for (size_t i = 0; i < columns_to_project_.size(); i++) {
-    out << columns_to_project_[i] << " ";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <ProjectOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nColumns that are projected:";
+    for (size_t i = 0; i < columns_to_project_.size(); i++) {
+      out << "\n" << columns_to_project_[i];
+    }
+    out << "\n\n";
   }
-  out << '\n';
 }
 
 // Gets a buffer from the child operator and projects the buffer.
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
index 725476bf91..5f354abb04 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "dataset/engine/datasetops/rename_op.h"
-
+#include <iomanip>
 #include <vector>
 #include <utility>
 #include <unordered_map>
@@ -138,11 +138,25 @@ Status RenameOp::RenameBuffer(std::unique_ptr<DataBuffer> *input_buffer) {
 // prints rename
 void RenameOp::Print(std::ostream &out,      // In: The output stream to print to
                      bool show_all) const {  // In: T/F if it should print everything
-  // Call base class printer first
-  PipelineOp::Print(out, show_all);
-  out << "\nRenameOp:\n";
-  for (size_t i = 0; i < in_columns_.size(); ++i) {
-    out << "\nin Columns: " << in_columns_[i] << "\nOut Columns: " << out_columns_[i] << "\n\n";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <RenameOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nIn columns:";
+    for (size_t i = 0; i < in_columns_.size(); ++i) {
+      out << "\n  " << in_columns_[i];
+    }
+    for (size_t i = 0; i < out_columns_.size(); ++i) {
+      out << "\n  " << out_columns_[i];
+    }
+    out << "\n\n";
   }
 }
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
index 33c731c400..065631eb31 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.cc
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <iomanip>
 #include <iostream>
 #include <utility>
 
@@ -51,22 +52,28 @@ RepeatOp::~RepeatOp() {}
 
 // A print method typically used for debugging
 void RepeatOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first
-  PipelineOp::Print(out, show_all);
-
-  // Then display our own stuff
-  out << "RepeatOp:"
-      << "\nCurrent repeat count: " << repeat_count_ << "\nMax repeat count: " << max_repeats_
-      << "\nLeaf Nodes in my execution path:";
-  if (!eoe_ops_.empty()) {
-    out << "\n";
-    for (size_t i = 0; i < eoe_ops_.size(); i++) {
-      out << "  Operator: " << eoe_ops_[i]->id() << "\n";
-    }
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <RepeatOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << " [repeats: " << max_repeats_ << "]\n";
   } else {
-    out << " kNone.";
+    // Call the super class for displaying any common detailed info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nCurrent repeat count: " << repeat_count_ << "\nMax repeat count: " << max_repeats_
+        << "\nLeaf Nodes in execution path:";
+    if (!eoe_ops_.empty()) {
+      for (size_t i = 0; i < eoe_ops_.size(); i++) {
+        out << "\n  Operator: " << eoe_ops_[i]->id();
+      }
+    } else {
+      out << " None.";
+    }
+    out << "\n\n";
   }
-  out << "\n-------------------------\n\n";  // End the display with this line
 }
 
 // Base-class override for executing specific RepeatOp configurations. This code will be called
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
index 422c38f2f2..7b09bcef4d 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.cc
@@ -19,6 +19,7 @@
 #include <securec.h>
 #include <algorithm>
 #include <chrono>
+#include <iomanip>
 #include <iostream>
 #include <limits>
 #include <random>
@@ -108,13 +109,20 @@ Status ShuffleOp::SelfReset() {
 
 // A print method typically used for debugging
 void ShuffleOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first
-  PipelineOp::Print(out, show_all);
-
-  // Then display our own stuff
-  out << "ShuffleOp:\n  Shuffle size: " << shuffle_size_ << "\n  rows_per_buffer_: " << rows_per_buffer_
-      << "\n  shuffle_buffer_state_: " << shuffle_buffer_state_ << "\n  shuffle_seed_: " << shuffle_seed_;
-  out << "\n-------------------------\n\n";  // End the display with this line
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <ShuffleOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << " [shuffle size: " << shuffle_size_ << "]\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nShuffle size: " << shuffle_size_ << "\nRows per buffer: " << rows_per_buffer_
+        << "\nShuffle buffer state: " << shuffle_buffer_state_ << "\nShuffle seed: " << shuffle_seed_ << "\n\n";
+  }
 }
 
 // Private function to add a new row to the shuffle buffer.
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
index d851f2c699..ec45d5d25e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <iomanip>
 #include <iostream>
 #include <utility>
 
@@ -51,12 +52,19 @@ SkipOp::~SkipOp() {}
 
 // A print method typically used for debugging
 void SkipOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first
-  PipelineOp::Print(out, show_all);
-
-  // Then display our own stuff
-  out << "SkipOp:"
-      << "\nCurrent skip count: " << skip_count_ << "\nMax skip count: " << max_skips_;
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <SkipOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << " [skips: " << max_skips_ << "]\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nSkip count: " << skip_count_ << "\nMax skips: " << max_skips_ << "\n\n";
+  }
 }
 
 // Since the buffer may contain multi rows, this function will drop the rows
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
index 2394380ea4..998672bca5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
@@ -16,6 +16,7 @@
 #include "dataset/engine/datasetops/source/celeba_op.h"
 
 #include <fstream>
+#include <iomanip>
 #include "dataset/core/config_manager.h"
 #include "dataset/util/path.h"
 #include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
@@ -434,9 +435,19 @@ Status CelebAOp::LoadTensorRow(const std::pair<std::string, std::vector<int32_t>
 }
 
 void CelebAOp::Print(std::ostream &out, bool show_all) const {
-  DatasetOp::Print(out, show_all);
-  out << "\nnumber of parallel workers:" << num_workers_ << "\nNumber of rows:" << num_rows_exact_
-      << "\nceleba dir: " << folder_path_ << "\n-------------------------\n";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <CelebAOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nNumber of rows:" << num_rows_exact_ << "\nceleba dir: " << folder_path_ << "\n\n";
+  }
 }
 
 // Reset Sampler and wakeup Master thread (functor)
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
index 0c2d57ff42..4f69938a31 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <fstream>
+#include <iomanip>
 #include <utility>
 
 #include "common/utils.h"
@@ -225,9 +226,19 @@ Status CifarOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<Dat
 }
 
 void CifarOp::Print(std::ostream &out, bool show_all) const {
-  DatasetOp::Print(out, show_all);
-  out << "\nnumber of parallel workers:" << num_workers_ << "\nNumber of rows:" << num_rows_
-      << "\nCifar Directory: " << folder_path_ << "\n-------------------------\n";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <CifarOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nNumber of rows:" << num_rows_ << "\nCifar directory: " << folder_path_ << "\n\n";
+  }
 }
 
 // Reset Sampler and wakeup Master thread (functor)
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
index 37a74f019a..b0c8b8af35 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
@@ -13,8 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "dataset/core/global_context.h"
 #include "dataset/engine/datasetops/source/generator_op.h"
+#include <iomanip>
+#include "dataset/core/global_context.h"
 #include "dataset/engine/db_connector.h"
 #include "dataset/engine/data_buffer.h"
 #include "dataset/engine/execution_tree.h"
@@ -58,6 +59,26 @@ GeneratorOp::GeneratorOp(py::function generator_function, std::vector<std::strin
 
 GeneratorOp::~GeneratorOp() { this->Dealloc(); }
 
+void GeneratorOp::Print(std::ostream &out, bool show_all) const {
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <GeneratorOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nColumn names:\n";
+    for (int i = 0; i < column_names_.size(); ++i) {
+      out << "\n  " << column_names_[i];
+    }
+    out << "\n\n";
+  }
+}
+
 void GeneratorOp::Dealloc() noexcept {
   // Setup GIL state
   PyGILState_STATE gstate;
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
index a5407a9b09..8165fed970 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
@@ -95,6 +95,11 @@ class GeneratorOp : public PipelineOp {
 
   ~GeneratorOp();
 
+  // A print method typically used for debugging
+  // @param out - The output stream to write output to
+  // @param show_all - A bool to control if you want to show all info or just a summary
+  void Print(std::ostream &out, bool show_all) const override;
+
   // << Stream output operator overload
   // @notes This allows you to write the debug print info using stream operators
   // @param out - reference to the output stream being overloaded
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
index 32d7171c8f..b52404ce69 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
@@ -14,9 +14,8 @@
  * limitations under the License.
  */
 #include "dataset/engine/datasetops/source/image_folder_op.h"
-
 #include <fstream>
-
+#include <iomanip>
 #include "common/utils.h"
 #include "dataset/core/config_manager.h"
 #include "dataset/core/tensor_shape.h"
@@ -243,9 +242,19 @@ Status ImageFolderOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_p
 }
 
 void ImageFolderOp::Print(std::ostream &out, bool show_all) const {
-  DatasetOp::Print(out, show_all);
-  out << "\nnumber of parallel workers:" << num_workers_ << "\nNumber of rows:" << num_rows_
-      << "\nImageFolder Directory: " << folder_path_ << "\n-------------------------\n";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <ImageFolderOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nNumber of rows:" << num_rows_ << "\nImageFolder directory: " << folder_path_ << "\n\n";
+  }
 }
 
 // Reset Sampler and wakeup Master thread (functor)
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
index ab0c012416..fb47274aae 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <fstream>
+#include <iomanip>
 #include <nlohmann/json.hpp>
 
 #include "common/utils.h"
@@ -239,9 +240,19 @@ Status ManifestOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<
 }
 
 void ManifestOp::Print(std::ostream &out, bool show_all) const {
-  DatasetOp::Print(out, show_all);
-  out << "\nnumber of parallel workers:" << num_workers_ << "\nNumber of rows:" << num_rows_
-      << "\nManifest file: " << file_ << "\n-------------------------\n";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <ManifestOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nNumber of rows:" << num_rows_ << "\nManifest file: " << file_ << "\n\n";
+  }
 }
 
 // Reset Sampler and wakeup Master thread (functor)
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
index 72dee6f2e6..c8bbf0e3c4 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <cstdint>
+#include <iomanip>
 #include <limits>
 #include <utility>
 
@@ -179,18 +180,21 @@ MindRecordOp::~MindRecordOp() {}
 
 // A print method typically used for debugging
 void MindRecordOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first
-  ParallelOp::Print(out, show_all);
-
-  // Then display our own stuff
-  out << "\nMindRecordOp:";
-  out << "\n  1 Dataset file                : " << dataset_file_;
-  out << "\n  Number of rows                : " << num_rows_;
-  out << "\n  Rows per buffer               : " << rows_per_buffer_;
-  out << "\n  Number of buffers             : " << buffers_needed_;
-  out << "\n  Number of ShardReader workers : " << num_mind_record_workers_;
-
-  out << "\n\n";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <MindRecordOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\n1 Dataset file : " << dataset_file_ << "\nNumber of rows : " << num_rows_
+        << "\nRows per buffer : " << rows_per_buffer_ << "\nNumber of buffers : " << buffers_needed_
+        << "\nNumber of ShardReader workers : " << num_mind_record_workers_ << "\n\n";
+  }
 }
 
 template <typename T>
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
index fbf041e985..33aca001e9 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
@@ -16,7 +16,7 @@
 #include "dataset/engine/datasetops/source/mnist_op.h"
 
 #include <fstream>
-
+#include <iomanip>
 #include "common/utils.h"
 #include "dataset/core/config_manager.h"
 #include "dataset/core/tensor_shape.h"
@@ -190,9 +190,19 @@ Status MnistOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<Dat
 }
 
 void MnistOp::Print(std::ostream &out, bool show_all) const {
-  DatasetOp::Print(out, show_all);
-  out << "\nnumber of parallel workers:" << num_workers_ << "\nNumber of rows:" << num_rows_
-      << "\nMNIST Directory: " << folder_path_ << "\n-------------------------\n";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <MnistOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nNumber of rows:" << num_rows_ << "\nMNIST Directory: " << folder_path_ << "\n\n";
+  }
 }
 
 // Reset Sampler and wakeup Master thread (functor)
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc
index 2ca957ae6d..f310a097ee 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc
@@ -23,6 +23,7 @@
 #include <chrono>
 #include <cstdint>
 #include <fstream>
+#include <iomanip>
 #include <iostream>
 #include <memory>
 #include <mutex>
@@ -319,31 +320,18 @@ StorageOp::~StorageOp() {}
 
 // A print method typically used for debugging
 void StorageOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first
-  ParallelOp::Print(out, show_all);
-
-  // Then display our own stuff
-  out << "\nStorageOp:";
-  out << "\n  Dataset files dir : " << dataset_files_dir_ << "\n  Dataset schema file    : " << schema_file_;
-  if (!dataset_file_list_.empty()) {
-    out << "\n  Dataset Files List:\n";
-    for (auto filename : dataset_file_list_) {
-      out << "      " << filename << "\n";
-    }
-  }
-  out << "\n\n";
-  if (!data_buffers_.empty()) {
-    out << std::boolalpha << "  Number of DataBuffers inside StorageOp: " << data_buffers_.size()
-        << "\n  Number of rows: " << num_rows_ << "\n  Rows per buffer: " << rows_per_buffer_ << "\n\n  DataBuffers:\n";
-
-    // Iterate over each DataBuffer and display the buffer id and the buffer
-    int32_t i = 0;
-    for (i = 0; i < data_buffers_.size(); i++) {
-      out << "  " << i << ")\n";
-      data_buffers_[i]->Print(out, show_all);
-    }
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <StorageOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
   } else {
-    out << "DataCache is empty!\n";
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nDetailed operator printing has not been implemented for this op.\n\n";
   }
 }
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
index 8ab186761e..ac14f3261e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
@@ -16,6 +16,7 @@
 
 #include <algorithm>
 #include <fstream>
+#include <iomanip>
 #include <memory>
 #include <string>
 #include <utility>
@@ -90,6 +91,30 @@ TextFileOp::TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num
   worker_connector_size_ = worker_connector_size;
 }
 
+// A print method typically used for debugging
+void TextFileOp::Print(std::ostream &out, bool show_all) const {
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <TextFileOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << num_samples_
+        << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
+        << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nText files list:\n";
+    for (int i = 0; i < text_files_list_.size(); ++i) {
+      out << " " << text_files_list_[i];
+    }
+    out << "\nData Schema:\n";
+    out << *data_schema_ << "\n\n";
+  }
+}
+
 Status TextFileOp::Init() {
   RETURN_IF_NOT_OK(filename_index_->insert(text_files_list_));
 
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h
index 49f224ffc3..305b2596fa 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.h
@@ -144,6 +144,11 @@ class TextFileOp : public ParallelOp {
   // Default destructor
   ~TextFileOp() = default;
 
+  // A print method typically used for debugging
+  // @param out - The output stream to write output to
+  // @param show_all - A bool to control if you want to show all info or just a summary
+  void Print(std::ostream &out, bool show_all) const override;
+
   // Instantiates the internal queues and connectors
   // @return Status - the error code returned
   Status Init();
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
index 50c60caa86..e4121bf8f8 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
@@ -18,6 +18,7 @@
 #include <cmath>
 #include <condition_variable>
 #include <future>
+#include <iomanip>
 #include <memory>
 #include <mutex>
 #include <utility>
@@ -155,6 +156,36 @@ TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64
   worker_connector_size_ = worker_connector_size;
 }
 
+// A print method typically used for debugging
+void TFReaderOp::Print(std::ostream &out, bool show_all) const {
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <TFReaderOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nRows per buffer: " << rows_per_buffer_ << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_
+        << "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no")
+        << "\nDataset files list:\n";
+    for (int i = 0; i < dataset_files_list_.size(); ++i) {
+      out << " " << dataset_files_list_[i];
+    }
+    if (!columns_to_load_.empty()) {
+      out << "\nColumns to load:\n";
+      for (int i = 0; i < columns_to_load_.size(); ++i) {
+        out << " " << columns_to_load_[i];
+      }
+    }
+    out << "\nData Schema:\n";
+    out << *data_schema_ << "\n\n";
+  }
+}
+
 Status TFReaderOp::Init() {
   if (data_schema_->Empty()) {
     RETURN_IF_NOT_OK(CreateSchema(dataset_files_list_[0], columns_to_load_));
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
index 560cff114f..f0f08c7971 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
@@ -188,6 +188,11 @@ class TFReaderOp : public ParallelOp {
   // Default destructor
   ~TFReaderOp() = default;
 
+  // A print method typically used for debugging
+  // @param out - The output stream to write output to
+  // @param show_all - A bool to control if you want to show all info or just a summary
+  void Print(std::ostream &out, bool show_all) const override;
+
   // Instantiates the internal queues and connectors.
   // @return Status - the error code returned.
   Status Init();
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
index e523aa84d6..2befa1a3be 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
@@ -16,7 +16,7 @@
 #include "dataset/engine/datasetops/source/voc_op.h"
 
 #include <fstream>
-
+#include <iomanip>
 #include "common/utils.h"
 #include "dataset/core/config_manager.h"
 #include "dataset/core/tensor_shape.h"
@@ -133,9 +133,19 @@ Status VOCOp::operator()() {
 }
 
 void VOCOp::Print(std::ostream &out, bool show_all) const {
-  DatasetOp::Print(out, show_all);
-  out << "\nnumber of parallel workers:" << num_workers_ << "\nNumber of rows:" << num_rows_
-      << "\nVOC Directory: " << folder_path_ << "\n-------------------\n";
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <VOCOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nNumber of rows: " << num_rows_ << "\nVOC Directory: " << folder_path_ << "\n\n";
+  }
 }
 
 Status VOCOp::Reset() {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc
index 5d7df58153..872c4c27c5 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+#include <iomanip>
 #include <utility>
 
 #include "common/utils.h"
@@ -47,12 +47,19 @@ TakeOp::TakeOp(int32_t count) : PipelineOp(0), max_takes_(count), take_count_(0)
 
 // A print method typically used for debugging
 void TakeOp::Print(std::ostream &out, bool show_all) const {
-  // Call base class printer first
-  PipelineOp::Print(out, show_all);
-
-  // Then display our own stuff
-  out << "TakeOp:"
-      << "\nCurrent take count: " << take_count_ << "\nMax take count: " << max_takes_;
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <TakeOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << " [takes: " << max_takes_ << "]\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nTake count: " << take_count_ << "\nMax takes: " << max_takes_ << "\n\n";
+  }
 }
 
 // This function will be call muti times to returns the buffer, when meet required max take count or meet
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc
index ec771740c1..bb8bddcc09 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/zip_op.cc
@@ -15,6 +15,7 @@
  */
 #include "dataset/engine/datasetops/zip_op.h"
 #include <utility>
+#include <iomanip>
 #include "dataset/core/constants.h"
 #include "dataset/engine/data_buffer.h"
 #include "dataset/engine/db_connector.h"
@@ -224,10 +225,19 @@ Status ZipOp::drainPipeline() {
 // A function that prints info about the Operator
 void ZipOp::Print(std::ostream &out,      // In: The output stream to print to
                   bool show_all) const {  // In: T/F if it should print everything
-  // Call base class printer first
-  PipelineOp::Print(out, show_all);
-  out << "\nZipOp:\n"
-      << "\nDatasets: " << children_num_ << "\n\n";
+  // Always show the id and name as first line regardless if this is summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <ZipOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    PipelineOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nDatasets: " << children_num_ << "\n\n";
+  }
 }
 
 // overwrite function and handle eof
diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.cc b/mindspore/ccsrc/dataset/engine/execution_tree.cc
index ebfa532195..dbcc201d48 100644
--- a/mindspore/ccsrc/dataset/engine/execution_tree.cc
+++ b/mindspore/ccsrc/dataset/engine/execution_tree.cc
@@ -81,13 +81,29 @@ Status ExecutionTree::AssignRoot(const std::shared_ptr<DatasetOp> &op) {
 }
 
 // A print method typically used for debugging
-void ExecutionTree::Print(std::ostream &out, bool show_all) const {
-  out << "Total number of nodes in the ExecutionTree (may or may not be connected nodes): " << id_count_
-      << "\nTree state: " << static_cast<int>(tree_state_) << "\n";
-  if (root_ != nullptr) {
-    // Just call the printer on the root node.  Each node descends to it's children to print them if
-    // showAll is true.
-    root_->Print(out, show_all);
+void ExecutionTree::Print(std::ostream &out) const {
+  out << "Execution tree summary:\n"
+      << "-----------------------\n";
+  this->PrintNode(out, root_, "", true, false);
+  out << "\nExecution tree operator details:\n"
+      << "--------------------------------\n";
+  this->PrintNode(out, root_, "", true, true);
+}
+
+// A helper functions for doing the recursive printing
+void ExecutionTree::PrintNode(std::ostream &out, const std::shared_ptr<DatasetOp> &dataset_op, std::string indent,
+                              bool last, bool detailed) const {
+  // Decide which printer to use based on detailed arg.
+  if (!detailed) {
+    out << indent << "+- " << *dataset_op;
+    indent += (last ? "    " : "|   ");
+  } else {
+    dataset_op->Print(out, detailed);
+  }
+
+  // Descend to children
+  for (int32_t i = 0; i < dataset_op->child_.size(); ++i) {
+    this->PrintNode(out, dataset_op->child_[i], indent, (i == (dataset_op->child_.size() - 1)), detailed);
   }
 }
 
@@ -100,6 +116,9 @@ Status ExecutionTree::Launch() {
       " Expected state: " + std::to_string(static_cast<int>(kDeTStateReady));
     RETURN_STATUS_UNEXPECTED(err_msg);
   }
+  std::ostringstream ss;
+  ss << *this;
+  MS_LOG(INFO) << "Printing the tree before launch tasks:\n" << ss.str();
   for (auto itr = this->begin(); itr != this->end(); ++itr) {
     // An inlined operator is one that has an output connector size of 0, and it does not
     // require a thread to execute.  Instead, the work of this operator is executed inlined
diff --git a/mindspore/ccsrc/dataset/engine/execution_tree.h b/mindspore/ccsrc/dataset/engine/execution_tree.h
index 0f6cdfc165..838eb3a014 100644
--- a/mindspore/ccsrc/dataset/engine/execution_tree.h
+++ b/mindspore/ccsrc/dataset/engine/execution_tree.h
@@ -19,6 +19,7 @@
 #include <functional>
 #include <memory>
 #include <stack>
+#include <string>
 #include <vector>
 #include "dataset/engine/datasetops/dataset_op.h"
 #include "dataset/util/status.h"
@@ -114,8 +115,7 @@ class ExecutionTree {
 
   // A print method typically used for debugging
   // @param out - The output stream to write output to
-  // @param show_all - A bool to control if you want to show all info or just a summary
-  void Print(std::ostream &out, bool show_all) const;
+  void Print(std::ostream &out) const;
 
   // Returns an iterator positioned at the start
   // @return Iterator - The iterator
@@ -133,7 +133,7 @@ class ExecutionTree {
   // @param exe_tree - reference to the execution tree to display
   // @return - the output stream must be returned
   friend std::ostream &operator<<(std::ostream &out, ExecutionTree &exe_tree) {
-    exe_tree.Print(out, false);
+    exe_tree.Print(out);
     return out;
   }
 
@@ -178,6 +178,14 @@ class ExecutionTree {
   TaskGroup *AllTasks() const { return tg_.get(); }
 
  private:
+  // A helper functions for doing the recursive printing
+  // @param dataset_op - The dataset op to print
+  // @param indent - an indent string for aligning child levels in output
+  // @param last - an indicator if it's the last child or not
+  // @param detailed - should it display the detailed node output or the summary line
+  void PrintNode(std::ostream &out, const std::shared_ptr<DatasetOp> &dataset_op, std::string indent, bool last,
+                 bool detailed) const;
+
   std::unique_ptr<TaskGroup> tg_;                        // Class for worker management
   std::shared_ptr<DatasetOp> root_;                      // The root node of the tree
   int32_t id_count_;                                     // Counter for generating operator id's

From 3322e65da9db2625d390de0be3d7b9e4e64331f5 Mon Sep 17 00:00:00 2001
From: Adel Shafiei <adel.shafiei@huawei.com>
Date: Fri, 24 Apr 2020 15:21:43 -0400
Subject: [PATCH 173/242] added ut for uniform augment C++ op

---
 .../ccsrc/mindrecord/common/shard_utils.cc    |  2 +-
 mindspore/ccsrc/mindrecord/io/shard_reader.cc |  2 +-
 .../ut/python/dataset/test_uniform_augment.py | 62 +++++++++++++++++++
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/mindrecord/common/shard_utils.cc b/mindspore/ccsrc/mindrecord/common/shard_utils.cc
index 51de0c5f64..edeabb3cde 100644
--- a/mindspore/ccsrc/mindrecord/common/shard_utils.cc
+++ b/mindspore/ccsrc/mindrecord/common/shard_utils.cc
@@ -39,7 +39,7 @@ std::vector<std::string> StringSplit(const std::string &field, char separator) {
     }
     s_pos = e_pos + 1;
   }
-  return std::move(res);
+  return res;
 }
 
 bool ValidateFieldName(const std::string &str) {
diff --git a/mindspore/ccsrc/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
index 4cbb2b3767..804613e40a 100644
--- a/mindspore/ccsrc/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/mindrecord/io/shard_reader.cc
@@ -914,7 +914,7 @@ vector<std::string> ShardReader::GetAllColumns() {
   } else {
     columns = selected_columns_;
   }
-  return std::move(columns);
+  return columns;
 }
 
 MSRStatus ShardReader::CreateTasksByBlock(const std::vector<std::tuple<int, int, int, uint64_t>> &row_group_summary,
diff --git a/tests/ut/python/dataset/test_uniform_augment.py b/tests/ut/python/dataset/test_uniform_augment.py
index ce0490336e..ea99056116 100644
--- a/tests/ut/python/dataset/test_uniform_augment.py
+++ b/tests/ut/python/dataset/test_uniform_augment.py
@@ -18,6 +18,7 @@ import matplotlib.pyplot as plt
 from mindspore import log as logger
 import mindspore.dataset.engine as de
 import mindspore.dataset.transforms.vision.py_transforms as F
+import mindspore.dataset.transforms.vision.c_transforms as C
 
 DATA_DIR = "../data/dataset/testImageNetData/train/"
 
@@ -101,7 +102,68 @@ def test_uniform_augment(plot=False, num_ops=2):
     if plot:
         visualize(images_original, images_ua)
         
+def test_cpp_uniform_augment(plot=False, num_ops=2):
+    """
+    Test UniformAugment
+    """
+    logger.info("Test CPP UniformAugment")
+
+    # Original Images
+    ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
+
+    transforms_original = [C.Decode(), C.Resize(size=[224, 224]),
+                           F.ToTensor()]
+
+    ds_original = ds.map(input_columns="image",
+                         operations=transforms_original)
+
+    ds_original = ds_original.batch(512)
+
+    for idx, (image,label) in enumerate(ds_original):
+        if idx == 0:
+            images_original = np.transpose(image, (0, 2, 3, 1))
+        else:
+            images_original = np.append(images_original,
+                                        np.transpose(image, (0, 2, 3, 1)),
+                                        axis=0)
+
+
+    # UniformAugment Images
+    ds = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)
+    transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]),
+                     C.RandomHorizontalFlip(),
+                     C.RandomVerticalFlip(),
+                     C.RandomColorAdjust(),
+                     C.RandomRotation(degrees=45)]
+
+    uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops)
+
+    transforms_all = [C.Decode(), C.Resize(size=[224, 224]),
+                      uni_aug,
+                      F.ToTensor()]
+
+    ds_ua = ds.map(input_columns="image",
+                   operations=transforms_all, num_parallel_workers=1)
+
+    ds_ua = ds_ua.batch(512)
+
+    for idx, (image,label) in enumerate(ds_ua):
+        if idx == 0:
+            images_ua = np.transpose(image, (0, 2, 3, 1))
+        else:
+            images_ua = np.append(images_ua,
+                                  np.transpose(image, (0, 2, 3, 1)),
+                                  axis=0)
+    if plot:
+        visualize(images_original, images_ua)
+
+    num_samples = images_original.shape[0]
+    mse = np.zeros(num_samples)
+    for i in range(num_samples):
+        mse[i] = np.mean((images_ua[i] - images_original[i]) ** 2)
+    logger.info("MSE= {}".format(str(np.mean(mse))))
 
 if __name__ == "__main__":
     test_uniform_augment(num_ops=1)
+    test_cpp_uniform_augment(num_ops=1)
     

From 0ba35eaec3c73867056e05259e36b44520355535 Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Tue, 28 Apr 2020 21:09:15 -0400
Subject: [PATCH 174/242] fix globalbatchnorm bug

---
 mindspore/nn/layer/normalization.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index 9d623bc6fd..66f17e3f38 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -117,6 +117,7 @@ class _BatchNorm(Cell):
         return group_list
 
     def _global_sync(self, x):
+        """calculate global batch normalization output"""
         if len(self.shape(x)) == 4:
             axes = (0, 2, 3)
             re_shape = (1, self.num_features, 1, 1)

From cc54bb565db38a2c48079e65e280b907bd0d1a79 Mon Sep 17 00:00:00 2001
From: chenfei <chenfei52@huawei.com>
Date: Fri, 17 Apr 2020 19:37:17 +0800
Subject: [PATCH 175/242] move opt to build graph

---
 mindspore/ccsrc/kernel/kernel_build_info.cc   |  12 +-
 mindspore/ccsrc/kernel/kernel_build_info.h    |   3 +
 mindspore/ccsrc/kernel/kernel_query.cc        |   6 +-
 mindspore/ccsrc/kernel/mng/rt_kernel_info.cc  |  32 +-
 .../ascend/ascend_backend_optimization.cc     |  12 +-
 .../common/common_backend_optimization.cc     |   1 +
 .../ccsrc/session/anf_runtime_algorithm.cc    |  28 +-
 mindspore/ccsrc/session/ascend_session.cc     | 332 ++++++++++++------
 mindspore/ccsrc/session/ascend_session.h      |  23 +-
 mindspore/ccsrc/session/kernel_graph.cc       |  49 ++-
 mindspore/ccsrc/session/kernel_graph.h        |  13 +-
 mindspore/ccsrc/session/session_basic.cc      |  19 +-
 mindspore/ccsrc/session/session_basic.h       |   2 +
 mindspore/ops/_op_impl/tbe/assign.py          |   1 +
 14 files changed, 381 insertions(+), 152 deletions(-)

diff --git a/mindspore/ccsrc/kernel/kernel_build_info.cc b/mindspore/ccsrc/kernel/kernel_build_info.cc
index 279a62bad6..df855f5340 100644
--- a/mindspore/ccsrc/kernel/kernel_build_info.cc
+++ b/mindspore/ccsrc/kernel/kernel_build_info.cc
@@ -22,28 +22,32 @@ namespace mindspore {
 namespace kernel {
 std::string KernelBuildInfo::GetInputFormat(size_t input_index) const {
   if (input_index >= inputs_format_.size()) {
-    MS_LOG(EXCEPTION) << "The index [" << input_index << "] is exceed the number of input node";
+    MS_LOG(ERROR) << "The index [" << input_index << "] is exceed the number of input node";
+    return kInvalidFormat;
   }
   return inputs_format_[input_index];
 }
 
 std::string KernelBuildInfo::GetOutputFormat(size_t output_index) const {
   if (output_index >= outputs_format_.size()) {
-    MS_LOG(EXCEPTION) << "The index [" << output_index << "] is exceed the number of input node";
+    MS_LOG(ERROR) << "The index [" << output_index << "] is exceed the number of input node";
+    return kInvalidFormat;
   }
   return outputs_format_[output_index];
 }
 
 TypeId KernelBuildInfo::GetInputDeviceType(size_t input_index) const {
   if (input_index >= inputs_device_type_.size()) {
-    MS_LOG(EXCEPTION) << "The index [" << input_index << "] is exceed the number of input node";
+    MS_LOG(ERROR) << "The index [" << input_index << "] is exceed the number of input";
+    return TypeId::kNumberTypeEnd;
   }
   return inputs_device_type_[input_index];
 }
 
 TypeId KernelBuildInfo::GetOutputDeviceType(size_t output_index) const {
   if (output_index >= outputs_device_type_.size()) {
-    MS_LOG(EXCEPTION) << "The index [" << output_index << "] is exceed the number of input node";
+    MS_LOG(ERROR) << "The index [" << output_index << "] is exceed the number of output";
+    return TypeId::kNumberTypeEnd;
   }
   return outputs_device_type_[output_index];
 }
diff --git a/mindspore/ccsrc/kernel/kernel_build_info.h b/mindspore/ccsrc/kernel/kernel_build_info.h
index 76ebc7a572..779be057f6 100644
--- a/mindspore/ccsrc/kernel/kernel_build_info.h
+++ b/mindspore/ccsrc/kernel/kernel_build_info.h
@@ -82,6 +82,9 @@ class KernelBuildInfo {
 
   bool operator==(const KernelBuildInfo &other) const;
 
+ public:
+  static auto constexpr kInvalidFormat = "InvalidFormat";
+
  private:
   KernelType kernel_type_;
   std::vector<std::string> inputs_format_;
diff --git a/mindspore/ccsrc/kernel/kernel_query.cc b/mindspore/ccsrc/kernel/kernel_query.cc
index 3d3282e7b5..e4a1af7f50 100755
--- a/mindspore/ccsrc/kernel/kernel_query.cc
+++ b/mindspore/ccsrc/kernel/kernel_query.cc
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace kernel {
 namespace {
-void FilterInvaildKernelInfo(const CNodePtr &kernel_node,
+void FilterInvalidKernelInfo(const CNodePtr &kernel_node,
                              std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {
   MS_EXCEPTION_IF_NULL(kernel_info_list);
   std::vector<std::shared_ptr<kernel::KernelBuildInfo>> filtered_list;
@@ -63,9 +63,9 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
     HcclMetadataInfo(kernel_node, kernel_info_list);
   }
   if (kernel_info_list->empty()) {
-    MS_LOG(EXCEPTION) << "op" << kernel_node->DebugString() << "kernel query fail!";
+    MS_LOG(EXCEPTION) << "Op " << kernel_node->DebugString() << "kernel query fail!";
   }
-  FilterInvaildKernelInfo(kernel_node, kernel_info_list);
+  FilterInvalidKernelInfo(kernel_node, kernel_info_list);
 }
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/kernel/mng/rt_kernel_info.cc b/mindspore/ccsrc/kernel/mng/rt_kernel_info.cc
index a87bb4d514..cb230bc706 100755
--- a/mindspore/ccsrc/kernel/mng/rt_kernel_info.cc
+++ b/mindspore/ccsrc/kernel/mng/rt_kernel_info.cc
@@ -46,24 +46,40 @@ RtKerDescFactory &RtKerDescFactory::Get() {
 
 void GetRtKelInfo(const CNodePtr &kernel_node,
                   std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {
-  MS_LOG(INFO) << "Mng kernel Info.";
   MS_EXCEPTION_IF_NULL(kernel_info_list);
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::string opNameLower = AnfAlgo::GetCNodeName(kernel_node);
   (void)std::transform(opNameLower.begin(), opNameLower.end(), opNameLower.begin(), ::tolower);
 
   auto ker_desc_ptr = RtKerDescFactory::Create(opNameLower);
-  if (ker_desc_ptr == nullptr) {
-    MS_LOG(DEBUG) << "Mng can't find op [" << opNameLower << "].";
+  if (ker_desc_ptr != nullptr && !ker_desc_ptr->GetKernelInfo().empty()) {
+    *kernel_info_list = ker_desc_ptr->GetKernelInfo();
     return;
   }
-  MS_EXCEPTION_IF_NULL(ker_desc_ptr);
-  auto kernel_info = ker_desc_ptr->GetKernelInfo();
-  if (kernel_info.empty()) {
-    MS_LOG(DEBUG) << "Rt dose not have op [" << opNameLower << "].";
+  // if can't find kernel info in kernel info database, use the default kernel info
+  auto node_name = AnfAlgo::GetCNodeName(kernel_node);
+  if (node_name == "StreamSwitch" || node_name == "StreamActive") {
+    auto kernel_build_info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
+    // set input infos
+    auto input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+    kernel_build_info_builder->SetInputsFormat(std::vector<std::string>(input_num, kOpFormat_DEFAULT));
+    std::vector<TypeId> input_types = {};
+    for (size_t i = 0; i < input_num; i++) {
+      input_types.push_back(AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, i));
+    }
+    kernel_build_info_builder->SetInputsDeviceType(input_types);
+    // set output info
+    auto output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
+    kernel_build_info_builder->SetOutputsFormat(std::vector<std::string>(output_num, kOpFormat_DEFAULT));
+    kernel_build_info_builder->SetOutputsDeviceType(std::vector<TypeId>(output_num, TypeId::kTypeUnknown));
+    // set ohter info
+    kernel_build_info_builder->SetFusionType(kernel::FusionType::OPAQUE);
+    kernel_build_info_builder->SetProcessor(kernel::Processor::AICORE);
+    kernel_build_info_builder->SetKernelType(KernelType::RT_KERNEL);
+    kernel_info_list->push_back(kernel_build_info_builder->Build());
     return;
   }
-  *kernel_info_list = kernel_info;
+  MS_LOG(DEBUG) << "Rt dose not have op [" << opNameLower << "].";
 }
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
index 1b152c8998..66ea5ee526 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@@ -186,7 +186,8 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap
     save_graphs_path = ".";
   }
   if (save_graphs) {
-    std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_before.ir";
+    std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_before" + "_graph_" +
+                            std::to_string(kernel_graph->graph_id()) + ".ir";
     DumpIR(file_path, kernel_graph);
     DumpIRProto(kernel_graph, "before_hwopt");
   }
@@ -208,7 +209,8 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap
   (void)optimizer->Optimize(kernel_graph);
   kernel_graph->SetExecOrderByDefault();
   if (save_graphs) {
-    std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_after.ir";
+    std::string file_path = save_graphs_path + "/" + "hwopt_d_ir_fusion_after" + "_graph_" +
+                            std::to_string(kernel_graph->graph_id()) + ".ir ";
     DumpIR(file_path, kernel_graph);
   }
 }
@@ -252,7 +254,8 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
     save_graphs_path = ".";
   }
   if (save_graphs) {
-    std::string file_path = save_graphs_path + "/" + "hwopt_d_before.ir";
+    std::string file_path =
+      save_graphs_path + "/" + "hwopt_d_before" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir";
     DumpIR(file_path, kernel_graph);
   }
   // data layout optimization
@@ -278,7 +281,8 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
   (void)optimizer->Optimize(kernel_graph);
   kernel_graph->SetExecOrderByDefault();
   if (save_graphs) {
-    std::string file_path = save_graphs_path + "/" + "hwopt_d_end.ir";
+    std::string file_path =
+      save_graphs_path + "/" + "hwopt_d_end" + "_graph_" + std::to_string(kernel_graph->graph_id()) + ".ir";
     DumpIR(file_path, kernel_graph, true);
     DumpIRProto(kernel_graph, "after_hwopt");
   }
diff --git a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc b/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc
index f622f2f06f..0383311122 100644
--- a/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/common/common_backend_optimization.cc
@@ -27,6 +27,7 @@
 namespace mindspore {
 namespace opt {
 void BackendCommonOptimization(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  MS_LOG(INFO) << "start common opt graph:" << kernel_graph->graph_id();
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   bool save_graphs = context_ptr->save_graphs_flag();
diff --git a/mindspore/ccsrc/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
index dbf7097970..45588052b0 100644
--- a/mindspore/ccsrc/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/session/anf_runtime_algorithm.cc
@@ -300,7 +300,12 @@ std::string AnfRuntimeAlgorithm::GetOutputFormat(const AnfNodePtr &node, size_t
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
-  return build_info->GetOutputFormat(output_idx);
+  auto format = build_info->GetOutputFormat(output_idx);
+  if (format == kernel::KernelBuildInfo::kInvalidFormat) {
+    MS_LOG(EXCEPTION) << "Node [" << node->DebugString() << "]"
+                      << " has a invalid output format";
+  }
+  return format;
 }
 
 std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t input_idx) {
@@ -314,7 +319,12 @@ std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t i
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
-  return build_info->GetInputFormat(input_idx);
+  auto format = build_info->GetInputFormat(input_idx);
+  if (format == kernel::KernelBuildInfo::kInvalidFormat) {
+    MS_LOG(EXCEPTION) << "Node [" << node->DebugString() << "]"
+                      << " has a invalid input format";
+  }
+  return format;
 }
 
 KernelWithIndex AnfRuntimeAlgorithm::GetPrevNodeOutput(const AnfNodePtr &anf_node, size_t input_idx) {
@@ -481,7 +491,12 @@ TypeId AnfRuntimeAlgorithm::GetOutputDeviceDataType(const AnfNodePtr &node, size
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
-  return build_info->GetOutputDeviceType(output_idx);
+  auto dtype = build_info->GetOutputDeviceType(output_idx);
+  if (dtype == TypeId::kNumberTypeEnd) {
+    MS_LOG(EXCEPTION) << "Node [" << node->DebugString() << "]"
+                      << " has a invalid dtype";
+  }
+  return dtype;
 }
 
 TypeId AnfRuntimeAlgorithm::GetInputDeviceDataType(const AnfNodePtr &node, size_t input_idx) {
@@ -494,7 +509,12 @@ TypeId AnfRuntimeAlgorithm::GetInputDeviceDataType(const AnfNodePtr &node, size_
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
-  return build_info->GetInputDeviceType(input_idx);
+  auto dtype = build_info->GetInputDeviceType(input_idx);
+  if (dtype == TypeId::kNumberTypeEnd) {
+    MS_LOG(EXCEPTION) << "Node [" << node->DebugString() << "]"
+                      << " has a invalid dtype";
+  }
+  return dtype;
 }
 
 TypeId AnfRuntimeAlgorithm::GetPrevNodeOutputDeviceDataType(const AnfNodePtr &anf_node, size_t input_idx) {
diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc
index bd5fba6d4b..a0a9a108cc 100755
--- a/mindspore/ccsrc/session/ascend_session.cc
+++ b/mindspore/ccsrc/session/ascend_session.cc
@@ -15,6 +15,9 @@
  */
 #include "session/ascend_session.h"
 #include <algorithm>
+#include <map>
+#include <tuple>
+#include <set>
 #include "operator/ops.h"
 #include "ir/meta_tensor.h"
 #include "ir/anf.h"
@@ -75,28 +78,15 @@ void DumpGraphInputArgs(const VectorRef &args) {
 
 void SetStreamDistinctionLabel(const KernelGraphPtr &graph, uint32_t label, bool is_override) {
   MS_EXCEPTION_IF_NULL(graph);
-  for (auto &node : graph->execution_order()) {
-    if (is_override || AnfAlgo::GetStreamDistinctionLabel(node.get()) == kInvalidDistincLabel) {
-      MS_EXCEPTION_IF_NULL(node);
-      AnfAlgo::SetStreamDistinctionLabel(label, node.get());
-    }
-  }
-}
-
-GraphId GetDistinctionLabel(const KernelGraphPtr &graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-  // if graph is empty,use graph id as distinction label
-  if (graph->execution_order().empty()) {
-    return graph->graph_id();
+  if (is_override || graph->stream_distinction_label() == kInvalidDistincLabel) {
+    graph->set_stream_distinction_label(label);
   }
-  // else use first node of execution order as label
-  return AnfAlgo::GetStreamDistinctionLabel(graph->execution_order()[0].get());
 }
 
 std::vector<BaseRef> GetRealArgs(const KernelGraphPtr graph, const VectorRef &args) {
   MS_EXCEPTION_IF_NULL(graph);
   std::vector<AnfNodePtr> graph_inputs = graph->inputs();
-  auto valid_inputs = graph->ValidInputs();
+  auto valid_inputs = graph->valid_inputs();
   size_t real_args_size = 0;
   std::vector<BaseRef> real_args = {};
   for (size_t i = 0; i < args.size(); i++) {
@@ -141,23 +131,9 @@ std::vector<BaseRef> GetRealArgs(const KernelGraphPtr graph, const VectorRef &ar
 
 GraphId AscendSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) {
   MS_LOG(INFO) << "start";
-  auto graph_id = graph_sum_;
   // construct graph, if successfully, graph_sum_ + 1
   auto graph = ConstructKernelGraph(lst, outputs);
-  MS_EXCEPTION_IF_NULL(graph);
-  opt::AscendBackendIRFusionOptimization(graph);
-  // select kernel build info
-  SelectKernel(*graph);
-  // convert kernel Graph to model
-  predictmodel::StepConvertGraph(graph);
-  // optimize graph
-  HardwareOptimize(graph);
-  // init runtime resource
-  InitRuntimeResource();
-  // assign static memory of parameters
-  auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
-  MS_EXCEPTION_IF_NULL(runtime_instance);
-  runtime_instance->AssignStaticMemoryInput(graph.get());
+  auto graph_id = graph->graph_id();
   MS_LOG(INFO) << "Compile graph " << graph_id << " success";
   return graph_id;
 }
@@ -166,16 +142,36 @@ void AscendSession::BuildGraph(GraphId graph_id) {
   MS_LOG(INFO) << "start";
   auto graph = GetGraph(graph_id);
   MS_EXCEPTION_IF_NULL(graph);
+  // resource initialize
+  InitRuntimeResource();
   // multiple graph handle
   if (graph_id == final_graph_id_) {
     if (!graph->executable()) {
       return;
     }
+    // insert assigns to child graph
+    InsertAllAssigns();
+    // insert switch and active to child graph
+    MergeSwitchCompile();
+    // OptChildGraphs
+    auto graph_order = GetGraphOrder(final_graph_id_);
+    auto &graph_type = GetGraphOrderType(final_graph_id_);
+    for (size_t i = 0; i < graph_order.size(); i++) {
+      if (graph_type[i] == BRANCH_END || graph_type[i] == BRANCH_START) {
+        continue;
+      }
+      MS_LOG(INFO) << "Start build child  graph " << graph_order[i];
+      auto child_graph = GetGraph(graph_order[i]);
+      CompileChildGraph(child_graph);
+    }
     // merge child graph
     MergeGraphExecOrder();
   } else {
+    auto single_graph = GetGraph(graph_id);
+    CompileChildGraph(single_graph);
     // set the distinction label of single graph
-    SetStreamDistinctionLabel(GetGraph(graph_id), graph_id, false);
+    single_graph->set_stream_distinction_label(graph_id);
+    single_graph->UpdateExecuteKernelStreamLabel();
   }
   // adjust execution order because  merge child graph and other special operations
   AdjustKernel(graph);
@@ -197,9 +193,26 @@ void AscendSession::BuildGraph(GraphId graph_id) {
     // load task info to device if it is sink mode
     LoadTask(graph);
   }
+  // sync the inital const tensor to device
+  SyncInitialTenosrToDevice();
   MS_LOG(INFO) << "end";
 }
 
+void AscendSession::CompileChildGraph(const KernelGraphPtr &child_graph) {
+  MS_EXCEPTION_IF_NULL(child_graph);
+  opt::AscendBackendIRFusionOptimization(child_graph);
+  // select kernel build info
+  SelectKernel(*child_graph);
+  // convert kernel Graph to model
+  predictmodel::StepConvertGraph(child_graph);
+  // optimize graph
+  HardwareOptimize(child_graph);
+  // assign static memory of parameters
+  auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
+  MS_EXCEPTION_IF_NULL(runtime_instance);
+  runtime_instance->AssignStaticMemoryInput(child_graph.get());
+}
+
 void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs,
                              VectorRef *const outputs) {
   MS_LOG(INFO) << "start";
@@ -458,11 +471,9 @@ void AscendSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const
 
 GraphId AscendSession::SetFinalGraphInput(const std::vector<AnfNodePtr> &args) {
   MS_LOG(INFO) << "Start! Args size " << args.size();
-  auto final_graph = std::make_shared<KernelGraph>();
-  final_graph_id_ = graph_sum_++;
-  graphs_[final_graph_id_] = final_graph;
-  final_graph->set_graph_id(final_graph_id_);
-  MS_LOG(INFO) << "Create a new final graph" << final_graph_id_ << "success";
+  auto final_graph = NewKernelGraph();
+  final_graph_id_ = final_graph->graph_id();
+  MS_LOG(INFO) << "Create a new final graph" << final_graph_id_ << " success";
   // init private variables and bind them with final_graph_id
   graph_execute_orders_[final_graph_id_] = std::vector<GraphId>();
   graph_order_types_[final_graph_id_] = std::vector<GraphType>();
@@ -498,6 +509,46 @@ GraphId AscendSession::SetFinalGraphInput(const std::vector<AnfNodePtr> &args) {
   return final_graph_id_;
 }
 
+AnfNodePtr AscendSession::CreateFakeOutput(GraphId fake_graph_id, const AnfNodePtr &true_output) {
+  auto fake_graph = GetGraph(fake_graph_id);
+  auto output_item_with_index = AnfAlgo::VisitKernelWithReturnType(true_output, 0);
+  auto create_parameter = [&](const AbstractBasePtr &abstract) -> AnfNodePtr {
+    auto parameter = fake_graph->NewParameter();
+    MS_EXCEPTION_IF_NULL(parameter);
+    parameter->set_abstract(abstract);
+    auto new_parameter = fake_graph->NewParameter(parameter);
+    // Add new parameter to the graph input of fake_graph to sure that all parameters will be allocated memory.
+    auto graph_inputs = fake_graph->MutableInputs();
+    MS_EXCEPTION_IF_NULL(graph_inputs);
+    graph_inputs->push_back(new_parameter);
+    return new_parameter;
+  };
+  auto create_parameter_from_cnode = [&](const AnfNodePtr &cnode, size_t output_idx) -> AnfNodePtr {
+    MS_EXCEPTION_IF_NULL(cnode);
+    auto abstract = cnode->abstract();
+    MS_EXCEPTION_IF_NULL(abstract);
+    // create multiple parameters if is a tuple output real kernel
+    if (abstract->isa<abstract::AbstractTuple>()) {
+      auto tuple_abstract = abstract->cast<abstract::AbstractTuplePtr>();
+      MS_EXCEPTION_IF_NULL(tuple_abstract);
+      MS_LOG(INFO) << "tuple_size [" << tuple_abstract->size() << "]";
+      return create_parameter((*tuple_abstract)[output_idx]);
+    }
+    return create_parameter(cnode->abstract());
+  };
+  if (AnfAlgo::CheckPrimitiveType(output_item_with_index.first, prim::kPrimMakeTuple)) {
+    std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple)};
+    auto make_tuple = output_item_with_index.first->cast<CNodePtr>();
+    MS_EXCEPTION_IF_NULL(make_tuple);
+    for (size_t i = 1; i < make_tuple->inputs().size(); i++) {
+      auto input = make_tuple->inputs()[i];
+      make_tuple_inputs.push_back(CreateFakeOutput(fake_graph_id, input));
+    }
+    return fake_graph->NewCNode(make_tuple_inputs);
+  }
+  return create_parameter_from_cnode(output_item_with_index.first, output_item_with_index.second);
+}
+
 void AscendSession::SetFinalGraphOutput(const BaseRef &output) {
   auto final_graph = GetGraph(final_graph_id_);
   MS_EXCEPTION_IF_NULL(final_graph);
@@ -559,12 +610,6 @@ void AscendSession::InsertSwitchToGraph(GraphId condition_graph_id, GraphId true
   condition_graph->AddValueNodeToGraph(counter_const);
   // create a new switch op
   auto switch_primitive = std::make_shared<Primitive>("StreamSwitch");
-  auto kernel_build_info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
-  kernel_build_info_builder->SetOutputsFormat(std::vector<std::string>{kOpFormat_DEFAULT});
-  kernel_build_info_builder->SetOutputsDeviceType(std::vector<TypeId>{kNumberTypeInt32});
-  kernel_build_info_builder->SetFusionType(kernel::FusionType::OPAQUE);
-  kernel_build_info_builder->SetProcessor(kernel::Processor::AICORE);
-  kernel_build_info_builder->SetKernelType(KernelType::RT_KERNEL);
   auto cond_output_it = condition_output_.find(condition_graph_id);
   if (cond_output_it == condition_output_.end()) {
     MS_LOG(EXCEPTION) << "Can't find condition graph" << condition_graph_id;
@@ -574,11 +619,9 @@ void AscendSession::InsertSwitchToGraph(GraphId condition_graph_id, GraphId true
   MS_EXCEPTION_IF_NULL(cond_output_kernel);
   std::vector<AnfNodePtr> inputs = {NewValueNode(switch_primitive), cond_output_kernel, counter_const};
   CNodePtr switch_node = condition_graph->NewCNode(inputs);
-  AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), switch_node.get());
   MS_EXCEPTION_IF_NULL(switch_node);
   switch_node->set_abstract(std::make_shared<abstract::AbstractNone>());
   AnfAlgo::SetGraphId(condition_graph_id, switch_node.get());
-  AnfAlgo::SetStreamDistinctionLabel(GetDistinctionLabel(GetGraph(condition_graph_id)), switch_node.get());
   // set attr: cond_ RT_GREATER
   AnfAlgo::SetNodeAttr(kAttrSwitchCondition, MakeValue<int>(static_cast<int>(RT_GREATER)), switch_node);
   // set attr:data_type
@@ -586,9 +629,9 @@ void AscendSession::InsertSwitchToGraph(GraphId condition_graph_id, GraphId true
   // set attr:true branch graph id ,which is same to stream distinction label
   AnfAlgo::SetNodeAttr(kAttrTrueBranchStream, MakeValue<uint32_t>(true_graph_id), switch_node);
   // append switch at the end of condition graph
-  std::vector<CNodePtr> exec_order = condition_graph->execution_order();
-  exec_order.push_back(switch_node);
-  condition_graph->set_execution_order(exec_order);
+  auto return_node = condition_graph->get_return();
+  MS_EXCEPTION_IF_NULL(return_node);
+  InsertControlDependToGraph(condition_graph_id, return_node->input(1), switch_node);
   MS_LOG(INFO) << "Finish!";
 }
 
@@ -615,8 +658,14 @@ void AscendSession::CopyOutputOfIf(GraphId false_graph_id) {
       MS_EXCEPTION_IF_NULL(true_last);
       MS_EXCEPTION_IF_NULL(false_last);
       MS_LOG(INFO) << "The last graph of false branch is " << false_last_id;
-      // now only consider the single output
-      InsertMultipleAssignToGraph(true_last_id, true_last->output(), false_last->output());
+      // create fake output
+      auto fake_output_graph = NewKernelGraph();
+      graph_execute_order.push_back(fake_output_graph->graph_id());
+      graph_order_type.push_back(COMMON_GRAPH);
+      fake_output_graph->set_output(CreateFakeOutput(fake_output_graph->graph_id(), final_graph->output()));
+      final_graph->set_output(fake_output_graph->output());
+      InsertMultipleAssignToGraph(true_last_id, true_last->output(), final_graph->output());
+      InsertMultipleAssignToGraph(false_last_id, false_last->output(), final_graph->output());
       // insert stream active for loop sink
       auto context_ptr = MsContext::GetInstance();
       MS_EXCEPTION_IF_NULL(context_ptr);
@@ -650,14 +699,14 @@ void AscendSession::SwitchCompile(GraphId cond_graph_id, GraphId true_graph_id,
   if (false_graph_id != kInvalidGraphId) {
     // false graph and condition in graph same stream
     auto condition_graph = GetGraph(cond_graph_id);
-    SetStreamDistinctionLabel(GetGraph(false_graph_id), GetDistinctionLabel(condition_graph), true);
+    SetStreamDistinctionLabel(GetGraph(false_graph_id), condition_graph->stream_distinction_label(), true);
     // if false graph is a condition graph and has been switch compiled before,it's false should be updated again
     auto cond_it = switches_.find(false_graph_id);
     while (cond_it != switches_.end() && cond_it->second.second != kInvalidGraphId) {
       cond_graph_id = cond_it->first;
       false_graph_id = cond_it->second.second;
       condition_graph = GetGraph(cond_graph_id);
-      SetStreamDistinctionLabel(GetGraph(false_graph_id), GetDistinctionLabel(condition_graph), true);
+      SetStreamDistinctionLabel(GetGraph(false_graph_id), condition_graph->stream_distinction_label(), true);
       cond_it = switches_.find(false_graph_id);
     }
   }
@@ -691,7 +740,7 @@ void AscendSession::MergeSwitchCompile() {
     }
     // insert stream active to common graph
     if (prev_graph_id != kInvalidGraphId) {
-      InsertStreamActiveToGraph(prev_graph_id, GetDistinctionLabel(condition_graph));
+      InsertStreamActiveToGraph(prev_graph_id, condition_graph->stream_distinction_label());
     }
     // if this is a 'if' condition
     auto it = while_condition_graphs_.find(cond_graph_id);
@@ -700,12 +749,39 @@ void AscendSession::MergeSwitchCompile() {
     } else {
       // if it is a while,insert a stream active to true graph
       GraphId from_graph = it->second;
-      InsertStreamActiveToGraph(from_graph, GetDistinctionLabel(condition_graph));
+      InsertStreamActiveToGraph(from_graph, condition_graph->stream_distinction_label());
     }
   }
   MS_LOG(INFO) << "Finish!";
 }
 
+void AscendSession::InsertAllAssigns() {
+  std::set<std::pair<AnfNodePtr, AnfNodePtr>> assigns;
+  for (auto assign : assigns_) {
+    auto front_anf = std::get<0>(assign);
+    auto to_graph_id = std::get<1>(assign);
+    auto input_idx = std::get<2>(assign);
+    auto to_graph = GetGraph(to_graph_id);
+    MS_EXCEPTION_IF_NULL(to_graph);
+    std::vector<AnfNodePtr> graph_inputs = to_graph->inputs();
+    if (input_idx >= graph_inputs.size()) {
+      MS_LOG(EXCEPTION) << "input_index " << input_idx << " out of range size " << graph_inputs.size();
+    }
+    auto backend_parameter = graph_inputs[input_idx];
+    (void)assigns.insert(std::pair<AnfNodePtr, AnfNodePtr>(front_anf, backend_parameter));
+  }
+  // erase the repeat assign
+  for (auto &assign : assigns) {
+    auto front_anf = assign.first;
+    auto backend_parameter = assign.second;
+    auto from_graph_id = GetGraphIdByNode(front_anf);
+    auto from_graph = GetGraph(from_graph_id);
+    MS_EXCEPTION_IF_NULL(from_graph);
+    auto backend_arg = from_graph->GetBackendAnfByFrontAnf(front_anf);
+    InsertAssignToGraph(from_graph_id, backend_arg, backend_parameter);
+  }
+}
+
 // insert active to graph
 void AscendSession::SetActive(GraphId from, GraphId to) {
   if (while_condition_graphs_.find(to) != while_condition_graphs_.end()) {
@@ -735,20 +811,21 @@ void AscendSession::SetActive(GraphId from, GraphId to) {
   while_condition_graphs_[to] = from;
 }
 
-void AscendSession::SetChildGraphParameter(const AnfNodePtr &front_anf, const AnfNodePtr &backend_parameter) {
+void AscendSession::SetChildGraphParameter(const AnfNodePtr &front_anf, GraphId to_graph_id, size_t input_idx) {
   MS_LOG(INFO) << "Start!";
-  MS_EXCEPTION_IF_NULL(backend_parameter);
   MS_EXCEPTION_IF_NULL(front_anf);
-  if (!backend_parameter->isa<Parameter>()) {
-    MS_LOG(EXCEPTION) << "Backend parameter's type is not a parameter,but is " << backend_parameter->ToString();
-  }
   auto from_graph_id = GetGraphIdByNode(front_anf);
   auto from_graph = GetGraph(from_graph_id);
   MS_EXCEPTION_IF_NULL(from_graph);
-  auto to_graph_id = AnfAlgo::GetGraphId(backend_parameter.get());
   auto to_graph = GetGraph(to_graph_id);
-  auto backend_arg = from_graph->GetBackendAnfByFrontAnf(front_anf);
   MS_EXCEPTION_IF_NULL(to_graph);
+  std::vector<AnfNodePtr> graph_inputs = to_graph->inputs();
+  if (input_idx >= graph_inputs.size()) {
+    MS_LOG(EXCEPTION) << "input_index " << input_idx << " out of range size " << graph_inputs.size();
+  }
+  auto backend_parameter = graph_inputs[input_idx];
+  MS_EXCEPTION_IF_NULL(backend_parameter);
+  auto backend_arg = from_graph->GetBackendAnfByFrontAnf(front_anf);
   MS_LOG(INFO) << "Set node[" << front_anf->DebugString() << "] of graph[" << from_graph_id << "]to node["
                << backend_parameter->DebugString() << "] of graph[" << AnfAlgo::GetGraphId(backend_parameter.get())
                << "]";
@@ -759,39 +836,21 @@ void AscendSession::SetChildGraphParameter(const AnfNodePtr &front_anf, const An
   // if arg is the the parameter of child graph,it is parameter of final graph too
   if (front_anf->isa<Parameter>()) {
     MS_EXCEPTION_IF_NULL(backend_arg);
-    if (!AnfAlgo::OutputAddrExist(backend_arg, 0)) {
-      // set parameter's addr in child graph to parameter in final graph
-      AnfAlgo::SetOutputAddr(AnfAlgo::GetMutableOutputAddr(backend_parameter, 0), 0, backend_arg.get());
-      MS_LOG(INFO) << "Assign mem of node" << backend_parameter->DebugString() << " of graph "
-                   << AnfAlgo::GetGraphId(backend_parameter.get()) << " to node" << backend_arg->DebugString()
-                   << "of graph " << AnfAlgo::GetGraphId(backend_arg.get());
-      return;
-    }
-    // if a parameter is a weight and not linked to any executable node,device type will be kTypeUnknown,set it's device
-    // type same to arg
-    if (AnfAlgo::GetOutputDeviceDataType(backend_parameter, 0) == kTypeUnknown) {
-      AnfAlgo::SetSelectKernelBuildInfo(AnfAlgo::GetSelectKernelBuildInfo(backend_arg), backend_parameter.get());
-    }
-    // if front anf is a parameter,we can assign the value back,because backend_parameter won't be change in it's graph
-    // unless it's a weight.If backend_parameter is a weight,we should assign the value back.
-    AnfAlgo::SetOutputAddr(AnfAlgo::GetMutableOutputAddr(backend_arg, 0), 0, backend_parameter.get());
+    MS_LOG(INFO) << "Reuse node [" << backend_arg->DebugString() << "], old node[" << backend_parameter->DebugString()
+                 << "] will be replaced.";
+    to_graph->ReplaceNode(backend_parameter, backend_arg);
     return;
   }
-  InsertAssignToGraph(from_graph_id, backend_arg, backend_parameter);
-  MS_LOG(INFO) << "Finish!";
+  MS_LOG(INFO) << "Assign of node" << backend_arg->DebugString() << " of graph " << from_graph_id << " to node"
+               << backend_parameter->DebugString() << "of graph " << to_graph_id;
+  (void)assigns_.insert(std::tuple<AnfNodePtr, GraphId, size_t>(front_anf, to_graph_id, input_idx));
 }
 
-void AscendSession::SetChildGraphParameter(const tensor::TensorPtr &front_tensor, const AnfNodePtr &backend_parameter) {
+void AscendSession::SetChildGraphParameter(const tensor::TensorPtr &front_tensor, GraphId to_graph_id,
+                                           size_t input_idx) {
   MS_LOG(INFO) << "Start!";
-  // sync data from host to device
-  MS_EXCEPTION_IF_NULL(front_tensor);
-  size_t tensor_size = front_tensor->data().nbytes();
-  auto addr = AnfAlgo::GetOutputAddr(backend_parameter, 0);
-  MS_EXCEPTION_IF_NULL(addr);
-  if (!addr->SyncHostToDevice(trans::GetRuntimePaddingShape(backend_parameter, 0), tensor_size,
-                              front_tensor->data_type(), front_tensor->data_c(false))) {
-    MS_LOG(EXCEPTION) << "Tensor SyncHostToDevice fail!";
-  }
+  std::pair<GraphId, size_t> graph_input_pair(to_graph_id, input_idx);
+  initial_tenosrs_[graph_input_pair] = front_tensor;
   MS_LOG(INFO) << "Finish!";
 }
 
@@ -818,10 +877,9 @@ size_t AscendSession::SetChildGraphInput(const KernelGraphPtr &graph, const AnfN
   if (output_num > 1 && !AnfAlgo::CheckPrimitiveType(node, prim::kPrimTupleGetItem)) {
     return input_index + output_num;
   }
-  auto &graph_inputs = graph->inputs();
-  auto &valid_inputs = graph->ValidInputs();
+  auto valid_inputs = graph->valid_inputs();
   if (valid_inputs[input_index]) {
-    SetChildGraphParameter(node, graph_inputs[input_index]);
+    SetChildGraphParameter(node, graph->graph_id(), input_index);
   } else {
     MS_LOG(DEBUG) << "Invalid input arg: " << node->DebugString();
   }
@@ -833,8 +891,7 @@ size_t AscendSession::SetChildGraphInput(const KernelGraphPtr &graph, const Valu
   if (!value->isa<Tensor>()) {
     MS_LOG(EXCEPTION) << "Value Node should be a tensor, unexpected value: " << value->ToString();
   }
-  auto &graph_inputs = graph->inputs();
-  SetChildGraphParameter(value->cast<TensorPtr>(), graph_inputs[input_index]);
+  SetChildGraphParameter(value->cast<TensorPtr>(), graph->graph_id(), input_index);
   return ++input_index;
 }
 
@@ -905,8 +962,6 @@ GraphId AscendSession::GetGraphIdByNode(const AnfNodePtr &front_anf) const {
 
 void AscendSession::MergeGraphExecOrder() {
   MS_LOG(INFO) << "Start!";
-  // insert switch to graph
-  MergeSwitchCompile();
   // merge graph order
   auto &graph_order = GetGraphOrder(final_graph_id_);
   auto &graph_type = GetGraphOrderType(final_graph_id_);
@@ -916,6 +971,13 @@ void AscendSession::MergeGraphExecOrder() {
     MS_LOG(WARNING) << "Graph output is a lonely variable not linked to any op!";
     return;
   }
+  if (graph_order.size() > 1) {
+    auto context_ptr = MsContext::GetInstance();
+    MS_EXCEPTION_IF_NULL(context_ptr);
+    if (!context_ptr->enable_task_sink()) {
+      MS_LOG(INFO) << "Control sink network should run with task-sink mode!";
+    }
+  }
   // if first graph is common,the final graph has no label,then set the stream of final graph same with the first graph
   SetStreamDistinctionLabel(final_graph, graph_order[0], false);
   std::vector<CNodePtr> final_exec_order = final_graph->execution_order();
@@ -930,7 +992,11 @@ void AscendSession::MergeGraphExecOrder() {
     MS_EXCEPTION_IF_NULL(child_graph);
     auto exec_order = child_graph->execution_order();
     MS_LOG(INFO) << "Merge graph,graph_id " << graph_id;
-    (void)std::copy(exec_order.begin(), exec_order.end(), std::back_inserter(final_exec_order));
+    (void)std::transform(exec_order.begin(), exec_order.end(), std::back_inserter(final_exec_order),
+                         [&](CNodePtr node) -> CNodePtr {
+                           AnfAlgo::SetStreamDistinctionLabel(child_graph->stream_distinction_label(), node.get());
+                           return node;
+                         });
     // add all value nodes of child graphs to final graph
     for (auto &value_node : child_graph->graph_value_nodes()) {
       final_graph->AddValueNodeToGraph(value_node);
@@ -969,15 +1035,9 @@ void AscendSession::InsertAssignToGraph(GraphId graph_id, const AnfNodePtr &from
   // generate a new cnode
   auto assign_node = graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(assign_node);
-  assign_node->set_abstract(std::make_shared<abstract::AbstractNone>());
-  auto kernel_build_info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
-  kernel_build_info_builder->SetKernelType(KernelType::RT_KERNEL);
-  AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), assign_node.get());
-  AnfAlgo::SetStreamDistinctionLabel(GetDistinctionLabel(graph), assign_node.get());
+  assign_node->set_abstract(to->abstract());
   // append the assign at the end of from graph
-  auto exec_order = graph->execution_order();
-  exec_order.push_back(assign_node);
-  graph->set_execution_order(exec_order);
+  InsertDependToGraph(graph_id, assign_node);
 }
 
 void AscendSession::InsertMultipleAssignToGraph(GraphId graph_id, const AnfNodePtr &from, const AnfNodePtr &to) {
@@ -997,24 +1057,46 @@ void AscendSession::InsertMultipleAssignToGraph(GraphId graph_id, const AnfNodeP
 
 void AscendSession::InsertStreamActiveToGraph(GraphId graph_id, uint32_t actived_stream) {
   MS_LOG(INFO) << "Insert stream_active from " << graph_id << " to " << actived_stream;
-  auto from_graph = graphs_[graph_id];
+  auto from_graph = GetGraph(graph_id);
   MS_EXCEPTION_IF_NULL(from_graph);
   std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>("StreamActive"))};
   auto active_node = from_graph->NewCNode(inputs);
   MS_EXCEPTION_IF_NULL(active_node);
   active_node->set_abstract(std::make_shared<abstract::AbstractNone>());
-  auto kernel_build_info_builder = std::make_shared<kernel::KernelBuildInfo::KernelBuildInfoBuilder>();
-  kernel_build_info_builder->SetKernelType(KernelType::RT_KERNEL);
-  AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), active_node.get());
   // set the active stream id into the attr of active node
   std::vector<uint32_t> active_index_value = {};
   active_index_value.push_back(actived_stream);
   AnfAlgo::SetNodeAttr(kAttrActiveStreamList, MakeValue<std::vector<uint32_t>>(active_index_value), active_node);
-  AnfAlgo::SetStreamDistinctionLabel(GetDistinctionLabel(from_graph), active_node.get());
   // append the active node at the end of from graph
-  auto exec_order = from_graph->execution_order();
-  exec_order.push_back(active_node);
-  from_graph->set_execution_order(exec_order);
+  auto return_node = from_graph->get_return();
+  MS_EXCEPTION_IF_NULL(return_node);
+  InsertControlDependToGraph(graph_id, return_node->input(1), active_node);
+}
+
+void AscendSession::InsertDependToGraph(GraphId graph_id, const AnfNodePtr &attch_node) {
+  MS_LOG(INFO) << "Insert depend at the end of graph, the attach node is " << attch_node->DebugString();
+  auto graph = GetGraph(graph_id);
+  MS_EXCEPTION_IF_NULL(graph);
+  std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>("depend"))};
+  auto return_node = graph->get_return();
+  MS_EXCEPTION_IF_NULL(return_node);
+  inputs.push_back(return_node->input(1));
+  inputs.push_back(attch_node);
+  auto depend_node = graph->NewCNode(inputs);
+  return_node->set_input(1, depend_node);
+}
+
+void AscendSession::InsertControlDependToGraph(GraphId graph_id, const AnfNodePtr &first_node,
+                                               const AnfNodePtr &second_node) {
+  MS_LOG(INFO) << "Insert control depend at the end of graph, the first node is " << first_node->DebugString()
+               << ", the second node is " << second_node->DebugString();
+  auto graph = GetGraph(graph_id);
+  MS_EXCEPTION_IF_NULL(graph);
+  std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>("ControlDepend"))};
+  inputs.push_back(first_node);
+  inputs.push_back(second_node);
+  auto control_depend = graph->NewCNode(inputs);
+  InsertDependToGraph(graph_id, control_depend);
 }
 
 size_t AscendSession::ExecOrderOfChildGraph(GraphId final_graph, GraphId child_graph) {
@@ -1043,5 +1125,29 @@ std::vector<GraphType> &AscendSession::GetGraphOrderType(GraphId final_graph_id)
   }
   return graph_type_iter->second;
 }
+
+void AscendSession::SyncInitialTenosrToDevice() {
+  for (auto &item : initial_tenosrs_) {
+    auto to_graph_id = item.first.first;
+    auto input_idx = item.first.second;
+    auto front_tensor = item.second;
+    auto to_graph = GetGraph(to_graph_id);
+    MS_EXCEPTION_IF_NULL(to_graph);
+    std::vector<AnfNodePtr> graph_inputs = to_graph->inputs();
+    if (input_idx >= graph_inputs.size()) {
+      MS_LOG(EXCEPTION) << "input_index " << input_idx << " out of range size " << graph_inputs.size();
+    }
+    auto backend_parameter = graph_inputs[input_idx];
+    // sync data from host to device
+    MS_EXCEPTION_IF_NULL(front_tensor);
+    size_t tensor_size = front_tensor->data().nbytes();
+    auto addr = AnfAlgo::GetOutputAddr(backend_parameter, 0);
+    MS_EXCEPTION_IF_NULL(addr);
+    if (!addr->SyncHostToDevice(trans::GetRuntimePaddingShape(backend_parameter, 0), tensor_size,
+                                front_tensor->data_type(), front_tensor->data_c(false))) {
+      MS_LOG(EXCEPTION) << "Tensor SyncHostToDevice fail!";
+    }
+  }
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/ascend_session.h b/mindspore/ccsrc/session/ascend_session.h
index 1ce236c9c3..635003d97c 100755
--- a/mindspore/ccsrc/session/ascend_session.h
+++ b/mindspore/ccsrc/session/ascend_session.h
@@ -21,6 +21,9 @@
 #include <vector>
 #include <utility>
 #include <stack>
+#include <map>
+#include <tuple>
+#include <set>
 #include "session/session_basic.h"
 #include "session/kernel_graph.h"
 #include "kernel/kernel.h"
@@ -60,6 +63,8 @@ class AscendSession : public SessionBasic {
   GraphId GetFinalRunGraph() const override { return final_graph_id_; }
   // insert active to graph
   void SetActive(GraphId, GraphId) override;
+  // compile child graph when session have multiple child graphs
+  void CompileChildGraph(const KernelGraphPtr &child_graph);
 
  private:
   void InitRuntimeResource();
@@ -95,12 +100,16 @@ class AscendSession : public SessionBasic {
   size_t ExecOrderOfChildGraph(GraphId final_graph, GraphId child_graph);
   // handle condition graph from vm
   void InsertSwitchToGraph(GraphId condition_graph_id, GraphId true_graph_id);
+  // insert depend to graph, used to attch control nodes to graph
+  void InsertDependToGraph(GraphId graph_id, const AnfNodePtr &attch_node);
+  // insert depend to graph, used to attch control nodes to graph
+  void InsertControlDependToGraph(GraphId graph_id, const AnfNodePtr &first_node, const AnfNodePtr &second_node);
   // Get graph by graph id ,if not exist return null ptr
   KernelGraphPtr GetGraph(GraphId graph_id);
   // set child graph parameter if front arg is a anf
-  void SetChildGraphParameter(const AnfNodePtr &front_anf, const AnfNodePtr &backend_parameter);
+  void SetChildGraphParameter(const AnfNodePtr &front_anf, GraphId to_graph_id, size_t input_idx);
   // set child graph parameter if front arg is a tensor
-  void SetChildGraphParameter(const tensor::TensorPtr &front_tensor, const AnfNodePtr &backend_parameter);
+  void SetChildGraphParameter(const tensor::TensorPtr &front_tensor, GraphId to_graph_id, size_t input_idx);
   // update the execution order of all child graphs
   void UpdateGraphOrder(GraphId to_graph);
   // handle switch when merge
@@ -113,6 +122,12 @@ class AscendSession : public SessionBasic {
   void CopyOutputOfIf(GraphId false_graph_id);
   // check if graph cache exist
   bool GraphCacheExist(const GraphInfo &graph_info) const;
+  // insert all assign to child graph
+  void InsertAllAssigns();
+  // create fake output of final graph
+  AnfNodePtr CreateFakeOutput(GraphId final_graph_id, const AnfNodePtr &true_output);
+  // sync intial tensors' data to device
+  void SyncInitialTenosrToDevice();
 
   // member variables
   // key is final_graph_id,value is child graph execute order of final graph
@@ -124,6 +139,10 @@ class AscendSession : public SessionBasic {
   // record all conditions
   std::unordered_map<GraphId, std::pair<GraphId, GraphId>> switches_;
   std::unordered_map<GraphId, AnfNodePtr> condition_output_;
+  // share parameters
+  std::set<std::tuple<AnfNodePtr, GraphId, size_t>> assigns_;
+  // initial tensors, these tensor will sync data to device before run graph
+  std::map<std::pair<GraphId, size_t>, tensor::TensorPtr> initial_tenosrs_;
   // final_graph_id is used in every root graph has it's own session situation
   GraphId final_graph_id_;
 };
diff --git a/mindspore/ccsrc/session/kernel_graph.cc b/mindspore/ccsrc/session/kernel_graph.cc
index cdadf389a6..95ac38c405 100755
--- a/mindspore/ccsrc/session/kernel_graph.cc
+++ b/mindspore/ccsrc/session/kernel_graph.cc
@@ -295,10 +295,7 @@ ValueNodePtr KernelGraph::NewValueNode(const ValueNodePtr &value_node) {
   // set the format of value_node to DEFAULT_FORMAT
   kernel_build_info_builder->SetOutputsFormat(std::vector<std::string>{kOpFormat_DEFAULT});
   // set value node initial device data type = infer data type
-  std::vector<TypeId> types;
-  for (size_t index = 0; index < AnfAlgo::GetOutputTensorNum(value_node); ++index) {
-    types.push_back(kTypeUnknown);
-  }
+  std::vector<TypeId> types = std::vector<TypeId>(AnfAlgo::GetOutputTensorNum(value_node), kTypeUnknown);
   kernel_build_info_builder->SetOutputsDeviceType(types);
   AnfAlgo::SetSelectKernelBuildInfo(kernel_build_info_builder->Build(), new_value_node.get());
   AnfAlgo::SetGraphId(graph_id_, new_value_node.get());
@@ -330,10 +327,11 @@ void KernelGraph::FrontBackendlMapUpdate(const AnfNodePtr &old_backend_anf, cons
     MS_LOG(EXCEPTION) << "old can't be same with new";
   }
   if (backend_front_anf_map_.find(old_backend_anf) == backend_front_anf_map_.end()) {
-    MS_LOG(EXCEPTION) << "old_backend_anf " << old_backend_anf->DebugString() << " is not exist in the map";
+    MS_LOG(DEBUG) << "old_backend_anf " << old_backend_anf->DebugString() << " is not exist in the map";
+    return;
   }
   if (front_backend_anf_map_.find(backend_front_anf_map_[old_backend_anf]) == front_backend_anf_map_.end()) {
-    MS_LOG(EXCEPTION) << "anf is not exist in the mape ,old " << old_backend_anf->DebugString();
+    MS_LOG(EXCEPTION) << "anf is not exist in the map ,old " << old_backend_anf->DebugString();
   }
   front_backend_anf_map_[backend_front_anf_map_[old_backend_anf]] = new_backend_anf;
   backend_front_anf_map_[new_backend_anf] = backend_front_anf_map_[old_backend_anf];
@@ -528,5 +526,44 @@ bool KernelGraph::RemoveValueNodeFromGraph(const ValueNodePtr &value_node) {
   }
   return false;
 }
+
+void KernelGraph::ReplaceNode(const AnfNodePtr &old_anf_node, AnfNodePtr new_anf_node) {
+  MS_EXCEPTION_IF_NULL(old_anf_node);
+  MS_EXCEPTION_IF_NULL(new_anf_node);
+  MS_EXCEPTION_IF_NULL(inputs_);
+  auto it = node_output_edges_.find(old_anf_node);
+  if (it == node_output_edges_.end()) {
+    MS_LOG(EXCEPTION) << "Can't find anf node in node_output_edges map";
+  }
+  auto &outputs = it->second;
+  for (auto &output_node : outputs) {
+    auto output_cnode = output_node.first->cast<CNodePtr>();
+    MS_EXCEPTION_IF_NULL(output_cnode);
+    auto &output_node_inputs = output_cnode->inputs();
+    for (size_t i = 1; i < output_node_inputs.size(); i++) {
+      if (output_node_inputs[i] == old_anf_node) {
+        output_cnode->set_input(i, new_anf_node);
+      }
+    }
+    // update graph inputs
+    for (size_t i = 0; i < inputs_->size(); i++) {
+      if ((*inputs_)[i] == old_anf_node) {
+        (*inputs_)[i] = new_anf_node;
+        break;
+      }
+    }
+  }
+  // update front to backend map
+  FrontBackendlMapUpdate(old_anf_node, new_anf_node);
+  // update output depend relations
+  node_output_edges_[new_anf_node] = it->second;
+  (void)node_output_edges_.erase(old_anf_node);
+}
+
+void KernelGraph::UpdateExecuteKernelStreamLabel() {
+  for (auto &kernel : execution_order_) {
+    AnfAlgo::SetStreamDistinctionLabel(stream_distinction_label_, kernel.get());
+  }
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/kernel_graph.h b/mindspore/ccsrc/session/kernel_graph.h
index 8cafcc2ebc..3425bde9c2 100755
--- a/mindspore/ccsrc/session/kernel_graph.h
+++ b/mindspore/ccsrc/session/kernel_graph.h
@@ -27,6 +27,7 @@
 #include "ir/func_graph.h"
 #include "ir/anf.h"
 #include "utils/graph_utils.h"
+#include "device/kernel_info.h"
 
 namespace mindspore {
 namespace session {
@@ -37,6 +38,7 @@ class KernelGraph : public FuncGraph {
     inputs_ = std::make_shared<std::vector<AnfNodePtr>>();
     execution_order_ = {};
     executable_ = true;
+    stream_distinction_label_ = kInvalidDistincLabel;
   }
   ~KernelGraph() override = default;
 
@@ -88,7 +90,15 @@ class KernelGraph : public FuncGraph {
   void set_executable(bool executable) { executable_ = executable; }
   // set invalid inputs for control sink
   std::vector<bool> *MutableValidInputs() { return &valid_inputs_; }
-  const std::vector<bool> &ValidInputs() const { return valid_inputs_; }
+  std::vector<bool> valid_inputs() const { return valid_inputs_; }
+  // replace node in graph
+  void ReplaceNode(const AnfNodePtr &old_anf_node, AnfNodePtr new_anf_node);
+  // set stream label of graph
+  void set_stream_distinction_label(uint32_t stream_label) { stream_distinction_label_ = stream_label; }
+  // get stream label of graph
+  uint32_t stream_distinction_label() { return stream_distinction_label_; }
+  // refresh execute kernel stream label
+  void UpdateExecuteKernelStreamLabel();
 
  private:
   // remove value node form graph
@@ -108,6 +118,7 @@ class KernelGraph : public FuncGraph {
   std::shared_ptr<std::vector<AnfNodePtr>> inputs_;
   std::vector<CNodePtr> execution_order_;
   uint32_t graph_id_;
+  uint32_t stream_distinction_label_;
 
   // record map bettween front anf and backend anf,use two map implement bidirectional map
   std::unordered_map<AnfNodePtr, AnfNodePtr> front_backend_anf_map_;
diff --git a/mindspore/ccsrc/session/session_basic.cc b/mindspore/ccsrc/session/session_basic.cc
index 3436d68b81..5404ad6911 100755
--- a/mindspore/ccsrc/session/session_basic.cc
+++ b/mindspore/ccsrc/session/session_basic.cc
@@ -417,9 +417,8 @@ CNodePtr SessionBasic::CreateNewCNode(const CNodePtr &cnode, bool valid_input, K
 
 KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) {
   std::unordered_map<AnfNodePtr, AnfNodePtr> other_graph_cnode;
-  auto graph = std::make_shared<KernelGraph>();
-  graph->set_graph_id(graph_sum_);
-  MS_LOG(INFO) << "Create graph: " << graph_sum_;
+  auto graph = NewKernelGraph();
+  MS_LOG(INFO) << "Create graph: " << graph->graph_id();
   size_t from_other_graph_depend_num = 0;
   for (const auto &node : lst) {
     MS_EXCEPTION_IF_NULL(node);
@@ -456,7 +455,6 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con
   }
   graph->SetExecOrderByDefault();
   opt::BackendCommonOptimization(graph);
-  graphs_[graph_sum_++] = graph;
   return graph;
 }
 
@@ -588,14 +586,14 @@ void SessionBasic::Summary(KernelGraph *graph) {
 CNodePtr SessionBasic::ConstructOutput(const AnfNodePtrList &outputs, const std::shared_ptr<KernelGraph> &graph) {
   MS_EXCEPTION_IF_NULL(graph);
   std::vector<AnfNodePtr> output_args;
+  for (const auto &output : outputs) {
+    MS_LOG(INFO) << "output:" << output->DebugString();
+  }
   auto FindEqu = [graph, outputs](const AnfNodePtr &out) -> AnfNodePtr {
     auto backend_anf = graph->GetBackendAnfByFrontAnf(out);
     if (backend_anf != nullptr) {
       return backend_anf;
     }
-    for (const auto &output : outputs) {
-      MS_LOG(INFO) << "output:" << output->DebugString();
-    }
     MS_LOG(EXCEPTION) << "Can't find the node in the equiv map!";
   };
   output_args.push_back(NewValueNode(prim::kPrimMakeTuple));
@@ -695,5 +693,12 @@ BaseRef SessionBasic::TransformBaseRefListToTuple(const BaseRef &base_ref) {
     MS_LOG(EXCEPTION) << "The output is not a base ref list or a tensor!";
   }
 }
+
+KernelGraphPtr SessionBasic::NewKernelGraph() {
+  auto graph = std::make_shared<KernelGraph>();
+  graph->set_graph_id(graph_sum_);
+  graphs_[graph_sum_++] = graph;
+  return graph;
+}
 }  // namespace session
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/session/session_basic.h b/mindspore/ccsrc/session/session_basic.h
index 0fd0003cc9..de443833d6 100755
--- a/mindspore/ccsrc/session/session_basic.h
+++ b/mindspore/ccsrc/session/session_basic.h
@@ -104,6 +104,8 @@ class SessionBasic {
                                                       const std::vector<bool> &tensors_mask);
   // trans BaseRef list to py::tuple
   BaseRef TransformBaseRefListToTuple(const BaseRef &base_ref);
+  // create a new kernel graph and update the graph sum
+  KernelGraphPtr NewKernelGraph();
 
   std::unordered_map<GraphId, std::shared_ptr<KernelGraph>> graphs_;
   std::unordered_map<GraphInfo, std::shared_ptr<KernelGraph>> run_op_graphs_;
diff --git a/mindspore/ops/_op_impl/tbe/assign.py b/mindspore/ops/_op_impl/tbe/assign.py
index 41a9a0fecd..2fbd152c78 100644
--- a/mindspore/ops/_op_impl/tbe/assign.py
+++ b/mindspore/ops/_op_impl/tbe/assign.py
@@ -27,6 +27,7 @@ assign_op_info = TBERegOp("Assign") \
     .input(1, "value", False, "required", "all") \
     .output(0, "y", False, "required", "all") \
     .dtype_format(DataType.I8_Default, DataType.I8_Default, DataType.I8_Default) \
+    .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default, DataType.BOOL_Default) \
     .dtype_format(DataType.I8_5HD, DataType.I8_5HD, DataType.I8_5HD) \
     .dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.U8_Default) \
     .dtype_format(DataType.U8_5HD, DataType.U8_5HD, DataType.U8_5HD) \

From 8f1984a8d16926c401f66b75eeebb649105c2b74 Mon Sep 17 00:00:00 2001
From: Wei Luning <weiluning@huawei.com>
Date: Mon, 27 Apr 2020 21:25:28 +0800
Subject: [PATCH 176/242] only cast when level is O2

---
 mindspore/ccsrc/operator/composite/unpack_call.h |  2 --
 mindspore/ccsrc/pipeline/pipeline.cc             |  4 ++++
 mindspore/nn/wrap/cell_wrapper.py                | 10 +++++++---
 mindspore/train/model.py                         |  2 +-
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/mindspore/ccsrc/operator/composite/unpack_call.h b/mindspore/ccsrc/operator/composite/unpack_call.h
index 2f39615c1a..8c055a9386 100644
--- a/mindspore/ccsrc/operator/composite/unpack_call.h
+++ b/mindspore/ccsrc/operator/composite/unpack_call.h
@@ -35,7 +35,6 @@
 namespace mindspore {
 // namespace to support composite operators definition
 namespace prim {
-
 // Expand the tuple and dict parameters generated when parsing the function call,
 // and generate positional parameters and key-value pairs for function.
 class UnpackCall : public MetaFuncGraph {
@@ -47,7 +46,6 @@ class UnpackCall : public MetaFuncGraph {
   friend bool operator==(const UnpackCall &lhs, const UnpackCall &rhs) { return lhs.name_ == rhs.name_; }
 };
 using UnpackCallPtr = std::shared_ptr<UnpackCall>;
-
 }  // namespace prim
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/pipeline/pipeline.cc b/mindspore/ccsrc/pipeline/pipeline.cc
index 6e2c7be685..d04f9617f1 100644
--- a/mindspore/ccsrc/pipeline/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/pipeline.cc
@@ -300,6 +300,10 @@ void ExecutorPy::SaveCompiledGraphToPb(const std::string &phase_s) {
   // save the graph to file in protobuf format
   FuncGraphPtr func_graph = info_[phase_s]->resource->func_graph();
   MS_EXCEPTION_IF_NULL(func_graph);
+  if (phase_s.empty()) {
+    MS_LOG(ERROR) << "`phase` is empty '" << phase_s << "'!";
+    return;
+  }
   std::string name_prefix = phase_s.substr(0, phase_s.find("."));
   std::string pb_filename = std::string("ms_output_") + name_prefix + ".pb";
   std::string filename = GetFilePathName(pb_filename);
diff --git a/mindspore/nn/wrap/cell_wrapper.py b/mindspore/nn/wrap/cell_wrapper.py
index de0007c2eb..60718ec2b1 100644
--- a/mindspore/nn/wrap/cell_wrapper.py
+++ b/mindspore/nn/wrap/cell_wrapper.py
@@ -304,15 +304,19 @@ class WithEvalCell(Cell):
         >>> eval_net = nn.WithEvalCell(net, loss_fn)
     """
 
-    def __init__(self, network, loss_fn):
+    def __init__(self, network, loss_fn, add_cast_fp32=False):
         super(WithEvalCell, self).__init__(auto_prefix=False)
         self._network = network
         self._loss_fn = loss_fn
+        self.add_cast_fp32 = add_cast_fp32
+
 
     def construct(self, data, label):
         outputs = self._network(data)
-        label = _mp_cast_helper(mstype.float32, label)
-        loss = self._loss_fn(F.cast(outputs, mstype.float32), label)
+        if self.add_cast_fp32:
+            label = _mp_cast_helper(mstype.float32, label)
+            outputs = F.cast(outputs, mstype.float32)
+        loss = self._loss_fn(outputs, label)
         return loss, outputs, label
 
 
diff --git a/mindspore/train/model.py b/mindspore/train/model.py
index 5b1a34e418..c943252e43 100755
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@@ -162,7 +162,7 @@ class Model:
         else:
             if self._loss_fn is None:
                 raise ValueError("loss_fn can not be None.")
-            self._eval_network = nn.WithEvalCell(self._network, self._loss_fn)
+            self._eval_network = nn.WithEvalCell(self._network, self._loss_fn, self._amp_level == "O2")
             self._eval_indexes = [0, 1, 2]
 
     def _build_predict_network(self):

From 64f824e4fc7bc84343c564df8f01c39b9d4c6bd8 Mon Sep 17 00:00:00 2001
From: hanyuanai <6517995+hanyuanai@user.noreply.gitee.com>
Date: Mon, 27 Apr 2020 08:34:00 +0000
Subject: [PATCH 177/242] Add gnn aggregator and its ut

---
 tests/st/gnn/aggregator.py          | 222 ++++++++++++++++++++++++++++
 tests/st/gnn/test_gnn_aggregator.py |  53 +++++++
 2 files changed, 275 insertions(+)
 create mode 100644 tests/st/gnn/aggregator.py
 create mode 100644 tests/st/gnn/test_gnn_aggregator.py

diff --git a/tests/st/gnn/aggregator.py b/tests/st/gnn/aggregator.py
new file mode 100644
index 0000000000..18f189d979
--- /dev/null
+++ b/tests/st/gnn/aggregator.py
@@ -0,0 +1,222 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Aggregator."""
+import mindspore.nn as nn
+from mindspore.ops import operations as P
+from mindspore.ops import functional as F
+from mindspore._extends import cell_attr_register
+from mindspore import Tensor, Parameter
+from mindspore.common.initializer import initializer
+from mindspore._checkparam import check_int_positive, check_bool
+from mindspore.nn.layer.activation import get_activation
+
+
+class GNNFeatureTransform(nn.Cell):
+    r"""
+    The GNN featuren transform layer for input.
+
+    Applies linear transformation for the input feature. This layer implements the operation as:
+
+    .. math::
+        \text{outputs} = \text{inputs} * \text{kernel} + \text{bias},
+
+    where :math:`\text{activation}` is the activation function passed as the activation
+    argument (if passed in),:math:`\text{activation}` is a weight matrix with the same
+    data type as the inputs created by the layer, and :math:`\text{bias}` is a bias vector
+    with the same data type as the inputs created by the layer (only if has_bias is True).
+
+    Args:
+        in_channels (int): The number of channels in the input space.
+        out_channels (int): The number of channels in the output space.
+        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
+            is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
+        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
+            same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
+        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
+
+    Raises:
+        ValueError: If weight_init or bias_init shape is incorrect.
+
+    Inputs:
+        - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(*B, N, C)`,
+        where :math:`*B` represents the batch size which can be multidimensional, :math:`N` and :math:`C` are the
+        size of the last two dimensions. If `transpose_a` is True, its shape should be :math:`(*B, C, N)`.
+
+    Outputs:
+        Tensor, the shape of the output tensor is :math:`(*B, N, M)`.
+
+    Examples:
+        >>> net = nn.Dense(3, 4)
+        >>> input = Tensor(np.random.randint(0, 255, [2, 3]), mindspore.float32)
+        >>> net(input)
+        [[ 2.5246444   2.2738023   0.5711005  -3.9399147 ]
+         [ 1.0739875   4.0155234   0.94188046 -5.459526  ]]
+    """
+    @cell_attr_register(attrs=['has_bias', 'activation'])
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 weight_init='normal',
+                 bias_init='zeros',
+                 has_bias=True):
+        super(GNNFeatureTransform, self).__init__()
+        self.in_channels = check_int_positive(in_channels)
+        self.out_channels = check_int_positive(out_channels)
+        self.has_bias = check_bool(has_bias)
+
+        if isinstance(weight_init, Tensor):
+            if weight_init.dim() != 2 or weight_init.shape()[0] != out_channels or \
+               weight_init.shape()[1] != in_channels:
+                raise ValueError("weight_init shape error")
+
+        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")
+
+        if self.has_bias:
+            if isinstance(bias_init, Tensor):
+                if bias_init.dim() != 1 or bias_init.shape()[0] != out_channels:
+                    raise ValueError("bias_init shape error")
+
+            self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")
+
+        self.matmul = P.MatMul(transpose_b=True)
+        self.bias_add = P.BiasAdd()
+
+    def construct(self, x):
+        tensor_shape = F.shape(x)
+        input_feature = F.reshape(x, (tensor_shape[0] * tensor_shape[1], tensor_shape[2]))
+        output = self.matmul(input_feature, self.weight)
+        if self.has_bias:
+            output = self.bias_add(output, self.bias)
+        output = F.reshape(output, (tensor_shape[0], tensor_shape[1], self.out_channels))
+        return output
+
+    def extend_repr(self):
+        str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}' \
+                .format(self.in_channels, self.out_channels, self.weight, self.has_bias)
+        if self.has_bias:
+            str_info = str_info + ', bias={}'.format(self.bias)
+
+        return str_info
+
+
+class _BaseAggregator(nn.Cell):
+    """
+    Base Aggregator of GNN
+
+    Args:
+        feature_in_dim (int): Node or edge input feature dim.
+        feature_out_dim (int): Node or edge outpout feature dim.
+        use_fc (bool): Specifies whether a linear transformation before message is aggregated. Default: True
+        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
+            is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
+        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
+            same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
+        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
+        dropout_ratio (float): The keep rate of dropout layer, greater than 0 and less equal than 1. Default: None.
+        activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None.        
+
+    Examples:
+        >>> class MyAggregator(_BaseAggregator):
+        >>>    def __init__(self):
+        >>>        super(MyAggregator, self).__init__(self, feature_in_dim, feature_out_dim)
+        >>>        self.reduce_mean = P.ReduceSum()
+        >>>
+        >>>    def construct(self, x):
+        >>>        return self.reduce_mean(x, 1)
+    """
+    def __init__(self,
+                 feature_in_dim,
+                 feature_out_dim,
+                 use_fc=True,
+                 weight_init="normal",
+                 bias_init="zeros",
+                 has_bias=True,
+                 dropout_ratio=None,
+                 activation=None):
+        super(_BaseAggregator, self).__init__()
+        self.in_dim = feature_in_dim
+        self.out_dim = feature_out_dim
+        self.use_fc = use_fc
+        if self.use_fc:
+            self.weight_init = weight_init
+            self.bias_init = bias_init
+            self.has_bias = has_bias
+            self.fc = GNNFeatureTransform(self.in_dim,
+                                          self.out_dim,
+                                          weight_init=self.weight_init,
+                                          bias_init=self.bias_init,
+                                          has_bias=self.has_bias)
+        self.dropout_ratio = dropout_ratio
+        if self.dropout_ratio is not None:
+            self.dropout = nn.Dropout(keep_prob=self.dropout_ratio)
+        self.dropout_flag = self.dropout_ratio is not None
+        self.activation = get_activation(activation)
+        self.activation_flag = self.activation is not None
+
+    def construct(self, **kward):
+        """Must be overridden by all subclasses."""
+        raise NotImplementedError
+
+
+class MeanAggregator(_BaseAggregator):
+    """
+    Mean Aggregator of GNN
+
+    Args:
+        feature_in_dim (int): Node or edge input feature dim.
+        feature_out_dim (int): Node or edge outpout feature dim.
+        use_fc (bool): Specifies whether a linear transformation before message is aggregated. Default: True
+        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
+            is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
+        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
+            same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
+        has_bias (bool): Specifies whether the layer uses a bias vector. Default: True.
+        dropout_ratio (float): The keep rate of dropout layer, greater than 0 and less equal than 1. Default: None.
+        activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None.
+
+    Examples:
+        >>> net = MeanAggregator(32, 64, activation="relu", dropout=0.5)
+        >>> input_data = Tensor(np.array(np.random.rand(32, 3, 32), dtypy=np.float32))
+        >>> output = net(input_data)
+    """
+    def __init__(self,
+                 feature_in_dim,
+                 feature_out_dim,
+                 use_fc=True,
+                 weight_init="normal",
+                 bias_init="zeros",
+                 has_bias=True,
+                 dropout_ratio=None,
+                 activation=None):
+        super(MeanAggregator, self).__init__(
+            feature_in_dim,
+            feature_out_dim,
+            use_fc=True,
+            weight_init="normal",
+            bias_init="zeros",
+            has_bias=True,
+            dropout_ratio=None,
+            activation=None)
+        self.reduce_mean = P.ReduceMean(keep_dims=False)
+
+    def construct(self, input_feature):
+        if self.use_fc:
+            input_feature = self.fc(input_feature)
+        if self.dropout_flag:
+            input_feature = self.dropout(input_feature)
+        if self.activation_flag:
+            input_feature = self.activation(input_feature)
+        output_feature = self.reduce_mean(input_feature, 1)
+        return output_feature
diff --git a/tests/st/gnn/test_gnn_aggregator.py b/tests/st/gnn/test_gnn_aggregator.py
new file mode 100644
index 0000000000..bba7c09c31
--- /dev/null
+++ b/tests/st/gnn/test_gnn_aggregator.py
@@ -0,0 +1,53 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""test gnn aggregator."""
+import numpy as np
+
+import mindspore.nn as nn
+import mindspore.context as context
+from mindspore import Tensor
+from mindspore.common.api import _executor
+import mindspore.ops.composite as C
+from aggregator import MeanAggregator
+
+context.set_context(mode=context.GRAPH_MODE)
+
+
+class MeanAggregatorGrad(nn.Cell):
+    """Backward of MeanAggregator"""
+    def __init__(self, network):
+        super(MeanAggregatorGrad, self).__init__()
+        self.grad_op = C.grad_all_with_sens
+        self.network = network
+
+    def construct(self, x, sens):
+        grad_op = self.grad_op(self.network)(x, sens)
+        return grad_op
+
+
+def test_MeanAggregator():
+    """Compile MeanAggregator forward graph"""
+    aggregator = MeanAggregator(32, 64, activation="relu", dropout_ratio=0.5)
+    input_data = Tensor(np.array(np.random.rand(32, 3, 32), dtype=np.float32))
+    _executor.compile(aggregator, input_data)
+
+
+def test_MeanAggregator_grad():
+    """Compile MeanAggregator backward graph"""
+    aggregator = MeanAggregator(32, 64, activation="relu", dropout_ratio=0.5)
+    input_data = Tensor(np.array(np.random.rand(32, 3, 32), dtype=np.float32))
+    sens = Tensor(np.ones([32, 64]).astype(np.float32))
+    grad_op = MeanAggregatorGrad(aggregator)
+    _executor.compile(grad_op, input_data, sens)

From efe41902b80301a4c74cc8491b38865e938fea75 Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Wed, 29 Apr 2020 10:18:28 +0800
Subject: [PATCH 178/242] modify config param

---
 example/resnet101_imagenet2012/README.md | 4 ++--
 example/resnet101_imagenet2012/config.py | 4 ++--
 example/resnet101_imagenet2012/eval.py   | 2 +-
 example/resnet101_imagenet2012/train.py  | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/example/resnet101_imagenet2012/README.md b/example/resnet101_imagenet2012/README.md
index d5729b70db..852326c9d6 100644
--- a/example/resnet101_imagenet2012/README.md
+++ b/example/resnet101_imagenet2012/README.md
@@ -51,8 +51,8 @@ Parameters for both training and evaluating can be set in config.py.
 "image_height": 224,              # image height
 "image_width": 224,               # image width
 "save_checkpoint": True,          # whether save checkpoint or not
-"save_checkpoint_steps": 500,     # the step interval between two checkpoints. By default, the last checkpoint will be saved after the last step
-"keep_checkpoint_max": 40,        # only keep the last keep_checkpoint_max checkpoint
+"save_checkpoint_epochs": 1,      # the epoch interval between two checkpoints. By default, the last checkpoint will be saved after the last epoch
+"keep_checkpoint_max": 10,        # only keep the last keep_checkpoint_max checkpoint
 "save_checkpoint_path": "./",     # path to save checkpoint relative to the executed path
 "warmup_epochs": 0,               # number of warmup epoch
 "lr_decay_mode": "cosine"         # decay mode for generating learning rate
diff --git a/example/resnet101_imagenet2012/config.py b/example/resnet101_imagenet2012/config.py
index ca58f24da3..0b9f16b504 100755
--- a/example/resnet101_imagenet2012/config.py
+++ b/example/resnet101_imagenet2012/config.py
@@ -28,8 +28,8 @@ config = ed({
     "image_height": 224,
     "image_width": 224,
     "save_checkpoint": True,
-    "save_checkpoint_steps": 500,
-    "keep_checkpoint_max": 40,
+    "save_checkpoint_epochs": 1,
+    "keep_checkpoint_max": 10,
     "save_checkpoint_path": "./",
     "warmup_epochs": 0,
     "lr_decay_mode": "cosine",
diff --git a/example/resnet101_imagenet2012/eval.py b/example/resnet101_imagenet2012/eval.py
index 979c6ca949..bdf6e89ca8 100755
--- a/example/resnet101_imagenet2012/eval.py
+++ b/example/resnet101_imagenet2012/eval.py
@@ -54,7 +54,7 @@ if __name__ == '__main__':
     if not args_opt.do_eval and args_opt.run_distribute:
         context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                           mirror_mean=True, parameter_broadcast=True)
-        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+        auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313])
         init()
 
     epoch_size = config.epoch_size
diff --git a/example/resnet101_imagenet2012/train.py b/example/resnet101_imagenet2012/train.py
index c2de3e8d98..ca74262890 100755
--- a/example/resnet101_imagenet2012/train.py
+++ b/example/resnet101_imagenet2012/train.py
@@ -59,7 +59,7 @@ if __name__ == '__main__':
     if not args_opt.do_eval and args_opt.run_distribute:
         context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                           mirror_mean=True, parameter_broadcast=True)
-        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+        auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313])
         init()
 
     epoch_size = config.epoch_size
@@ -91,7 +91,7 @@ if __name__ == '__main__':
         loss_cb = LossMonitor()
         cb = [time_cb, loss_cb]
         if config.save_checkpoint:
-            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_steps,
+            config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs*step_size,
                                          keep_checkpoint_max=config.keep_checkpoint_max)
             ckpt_cb = ModelCheckpoint(prefix="resnet", directory=config.save_checkpoint_path, config=config_ck)
             cb += [ckpt_cb]

From e886a3182c269f95fb6462f9960e54be685d8e61 Mon Sep 17 00:00:00 2001
From: candanzg <zhangshucheng@huawei.com>
Date: Sun, 26 Apr 2020 15:17:59 +0800
Subject: [PATCH 179/242] tensor assign with ellpsis

Signed-off-by: candanzg <zhangshucheng@huawei.com>
---
 mindspore/_extends/parse/__init__.py          |   4 +-
 mindspore/_extends/parse/parser.py            |   7 +-
 mindspore/_extends/utils.py                   |   7 +
 mindspore/ccsrc/pipeline/parse/parse_base.h   |   1 +
 .../ccsrc/pipeline/static_analysis/prim.cc    |   6 +
 mindspore/ccsrc/utils/convert_utils.cc        |   2 +
 .../multitype_ops/_multitype_ops_util.py      |  81 ++++++--
 .../composite/multitype_ops/setitem_impl.py   |  37 +++-
 tests/ut/python/ops/test_tensor_slice.py      | 195 ++++++++++++------
 9 files changed, 254 insertions(+), 86 deletions(-)

diff --git a/mindspore/_extends/parse/__init__.py b/mindspore/_extends/parse/__init__.py
index 9366b5a2d2..62ba2e5406 100644
--- a/mindspore/_extends/parse/__init__.py
+++ b/mindspore/_extends/parse/__init__.py
@@ -22,11 +22,11 @@ from .parser import (Parser, create_obj_instance, generate_scope,
                      get_dataclass_attributes, get_dataclass_methods,
                      get_module_namespace, get_obj_type, get_object_key,
                      get_parse_method_of_class, get_scope_name,
-                     is_class_member, parse_cb, resolve_symbol)
+                     is_class_member, parse_cb, resolve_symbol, create_ellipsis_obj)
 from .serialize import *
 
 __all__ = ['parse_cb', 'get_parse_method_of_class', 'get_bprop_method_of_class', 'resolve_symbol',
            'get_object_key', 'get_class_instance_type', 'is_class_member', 'get_obj_type',
            'create_obj_instance', 'get_module_namespace', 'get_class_member_namespace_symbol',
            'Parser', 'get_dataclass_attributes', 'get_dataclass_methods', 'dump_obj', 'load_obj',
-           'get_dataclass_methods', 'get_scope_name', 'create_slice_obj']
+           'get_dataclass_methods', 'get_scope_name', 'create_slice_obj', 'create_ellipsis_obj']
diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py
index d8039cd56a..34a3a6c59e 100644
--- a/mindspore/_extends/parse/parser.py
+++ b/mindspore/_extends/parse/parser.py
@@ -29,7 +29,7 @@ from mindspore.common.dtype import pytype_to_dtype
 from mindspore.common.api import _MindSporeFunction
 from .namespace import CellNamespace, ClosureNamespace, ClassMemberNamespace
 from .resources import parse_object_map, convert_object_map, trope_ns, SYMBOL_UNDEFINE, NO_IMPLEMENT
-from ..utils import Slice
+from ..utils import Slice, Ellipsis_
 
 # define return value
 RET_SUCCESS = 0
@@ -70,6 +70,11 @@ parse_expr_statement_white_list = (
     "append",
 )
 
+def create_ellipsis_obj():
+    """Create Slice object"""
+    return Ellipsis_()
+
+
 def create_slice_obj(start, end, step):
     """Create Slice object"""
     return Slice(start, end, step)
diff --git a/mindspore/_extends/utils.py b/mindspore/_extends/utils.py
index d0457607b5..fecbf546f5 100644
--- a/mindspore/_extends/utils.py
+++ b/mindspore/_extends/utils.py
@@ -110,3 +110,10 @@ class Slice:
     start: int
     end: int
     step: int
+
+
+@dataclass
+class Ellipsis_:
+    """
+    Ellipsis class
+    """
diff --git a/mindspore/ccsrc/pipeline/parse/parse_base.h b/mindspore/ccsrc/pipeline/parse/parse_base.h
index a3ca67b60a..c7ce4e1196 100644
--- a/mindspore/ccsrc/pipeline/parse/parse_base.h
+++ b/mindspore/ccsrc/pipeline/parse/parse_base.h
@@ -80,6 +80,7 @@ const char PYTHON_PARSE_GENERATE_SCOPE[] = "generate_scope";
 const char PYTHON_PARSE_GET_SCOPE_NAME[] = "get_scope_name";
 
 const char PYTHON_PARSE_CLASS_SLICE[] = "create_slice_obj";
+const char PYTHON_PARSE_CLASS_ELLIPSIS[] = "create_ellipsis_obj";
 
 // define the common name
 const char NAMED_PRIMITIVE_ITER[] = "iter";
diff --git a/mindspore/ccsrc/pipeline/static_analysis/prim.cc b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
index 293f31707e..274f63844c 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/prim.cc
+++ b/mindspore/ccsrc/pipeline/static_analysis/prim.cc
@@ -298,6 +298,12 @@ py::dict ConvertAbstractToPython(const AbstractBasePtr &abs_base) {
   } else if (abs_base->isa<AbstractRef>()) {
     auto value = abs_base->cast<AbstractRefPtr>()->ref();
     dic = ConvertAbstractToPython(value);
+  } else if (abs_base->isa<AbstractEllipsis>()) {
+    auto arg_slice = dyn_cast<AbstractEllipsis>(abs_base);
+    std::vector<int> shape;
+    dic["shape"] = shape;
+    dic["dtype"] = arg_slice->BuildType();
+    dic["value"] = BuildValue(arg_slice->BuildValue());
   } else if (abs_base->isa<AbstractTuple>()) {
     auto arg_tuple = dyn_cast<AbstractTuple>(abs_base);
     size_t len = arg_tuple->size();
diff --git a/mindspore/ccsrc/utils/convert_utils.cc b/mindspore/ccsrc/utils/convert_utils.cc
index df4a8656f5..edbfe8dc4c 100644
--- a/mindspore/ccsrc/utils/convert_utils.cc
+++ b/mindspore/ccsrc/utils/convert_utils.cc
@@ -98,6 +98,8 @@ py::object ValuePtrToPyData(const ValuePtr &value) {
       i++;
     }
     ret = rets;
+  } else if (value->isa<EllipsisObj>()) {
+    ret = parse::python_adapter::CallPyFn(parse::PYTHON_MOD_PARSE_MODULE, parse::PYTHON_PARSE_CLASS_ELLIPSIS);
   } else if (value->isa<ValueSlice>()) {
     auto slice = value->cast<ValueSlicePtr>();
     auto start = ValuePtrToPyData(slice->start());
diff --git a/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py b/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py
index 3a44b1e483..d008f96648 100644
--- a/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py
+++ b/mindspore/ops/composite/multitype_ops/_multitype_ops_util.py
@@ -20,7 +20,7 @@ import numpy as np
 from ...primitive import constexpr
 from ....common.tensor import Tensor
 from ....common import dtype as mstype
-from ...._extends.utils import Slice
+from ...._extends.utils import Slice, Ellipsis_
 
 @constexpr
 def check_equal(param1, param2, msg="{},{}"):
@@ -29,31 +29,40 @@ def check_equal(param1, param2, msg="{},{}"):
         raise ValueError(msg.format(param1, param2))
     return param1
 
+
+@constexpr
+def check_ellipsis_shape_size(data_shape, value_shape, data_size, value_size):
+    """Checks the shape and size of the sensor and value."""
+    if data_shape == value_shape or data_size == value_size or value_size == 1:
+        return True
+    raise ValueError("The value(shape={}), can not assign to tensor(shape={}).".format(value_shape, data_shape))
+
+
 @constexpr
 def check_tensor_setitem_index(index, element_type=None):
     """Checks tuple index type of tensor assignment."""
     if index is None:
-        raise ValueError("Tensor's index cannot be None.")
+        raise IndexError("Tensor's index cannot be None.")
     # eg. Tensor[Slice] = u
     if isinstance(index, Slice):
         return True
     # eg. Tensor[tuple] = u
     if isinstance(index, tuple):
         if not index:
-            raise ValueError("Tensor's index cannot be empty.")
+            raise IndexError("Tensor's index cannot be empty.")
         # eg. Tensor[tuple(Slice...)] = u
-        if isinstance(index[0], (Slice, int)):
+        if isinstance(index[0], (Slice, Ellipsis_, int)):
             return True
-        raise ValueError("Index of type '{}' is not supported yet.".format(type(index[0])))
+        raise IndexError("Index of type '{}' is not supported yet.".format(type(index[0])))
     # eg. Tensor[Tensor[dtype=bool]] = u
     if index == mstype.tensor:
         if element_type is None or element_type != mstype.bool_:
-            raise ValueError(
-                "The index of tensor should be a bool type tensor. \
-                {} type is not supported yet.".format(element_type))
+            raise TypeError(
+                "The index of tensor should be a bool type tensor. "
+                "{} type is not supported yet.".format(element_type))
         return True
 
-    raise ValueError("Index of type '{}' is not supported yet.".format(type(index)))
+    raise IndexError("Index of type '{}' is not supported yet.".format(type(index)))
 
 
 @constexpr
@@ -90,10 +99,18 @@ def slice_expand(input_slices, shape):
     # Slice or tuple(Slice...)
     if isinstance(input_slices, Slice):
         slices = (input_slices,)
-    elif isinstance(input_slices, (tuple, list)) and input_slices and isinstance(input_slices[0], Slice):
-        slices = input_slices
+    elif isinstance(input_slices, (tuple, list)) and input_slices and isinstance(input_slices[0], (Slice, Ellipsis_)):
+        is_have_ellipsis = False
+        for _, element in enumerate(input_slices):
+            if isinstance(element, Ellipsis_):
+                is_have_ellipsis = True
+                break
+        if is_have_ellipsis:
+            slices = ellipsis2slice(input_slices, shape)
+        else:
+            slices = input_slices
     else:
-        raise ValueError("Tensor's index type is not supported yet.")
+        raise IndexError("Tensor's index type is not supported yet.")
 
     for s in slices:
         start = 0 if (s.start is None) else s.start
@@ -111,6 +128,26 @@ def slice_expand(input_slices, shape):
     return begin, end, strides
 
 
+def ellipsis2slice(input_, shape):
+    """Converts ellipsis to slice."""
+    input_slice = input_
+    result = []
+    if isinstance(input_, Ellipsis_):
+        input_slice = (input_,)
+    ell_count = 0
+    for _, element in enumerate(input_slice):
+        if not isinstance(element, Ellipsis_):
+            result.append(element)
+            continue
+        ell_count += 1
+        if ell_count > 1:
+            raise IndexError("There cannot be more than one ellisis (...) in the index of the tensor, "
+                             "but it is currently {}".format(input_slice))
+        for _ in range(len(shape) - len(input_slice) + 1):
+            result.append(Slice(None, None, None))
+    return tuple(result)
+
+
 @constexpr
 def slice2indices(input_slices, shape):
     """
@@ -139,7 +176,7 @@ def slice2indices(input_slices, shape):
 def check_indices(indices_size, index):
     """Checks indices whether is empty."""
     if indices_size < 1:
-        raise ValueError("The tensor's index is unreasonable. index:{}".format(index))
+        raise IndexError("The tensor's index is unreasonable. index:{}".format(index))
     return indices_size
 
 
@@ -151,8 +188,8 @@ def check_indices_value_size(indices_size, value_size):
     if value_size > 1:
         if value_size != indices_size:
             raise ValueError(
-                "The value given to tensor does not match the index size. \
-                value size:{}, indics size:{}".format(value_size, indices_size))
+                "The value given to tensor does not match the index size,"
+                " value size:{}, indics size:{}".format(value_size, indices_size))
     return value_size
 
 @constexpr
@@ -168,8 +205,11 @@ def integer_to_indices(index, shape):
 def tuple_element_is_slice(indexs):
     """Judges tuple element type."""
     if not indexs:
-        raise ValueError("Tensor's index cannot be empty.")
-    if isinstance(indexs, tuple) and isinstance(indexs[0], Slice):
+        raise IndexError("Tensor's index cannot be empty.")
+    if isinstance(indexs, tuple):
+        for _, ele in enumerate(indexs):
+            if not isinstance(ele, Slice):
+                return False
         return True
     return False
 
@@ -177,7 +217,10 @@ def tuple_element_is_slice(indexs):
 def tuple_element_is_int(indexs):
     """Judges tuple element type."""
     if not indexs:
-        raise ValueError("Tensor's index cannot be empty.")
-    if isinstance(indexs, tuple) and isinstance(indexs[0], int):
+        raise IndexError("Tensor's index cannot be empty.")
+    if isinstance(indexs, tuple):
+        for _, ele in enumerate(indexs):
+            if not isinstance(ele, int):
+                return False
         return True
     return False
diff --git a/mindspore/ops/composite/multitype_ops/setitem_impl.py b/mindspore/ops/composite/multitype_ops/setitem_impl.py
index 13d4a1ffce..2f44bdc5ba 100644
--- a/mindspore/ops/composite/multitype_ops/setitem_impl.py
+++ b/mindspore/ops/composite/multitype_ops/setitem_impl.py
@@ -254,10 +254,10 @@ def _tensor_indices_tensor(data, data_shape, index, indices, value):
     data_dtype = F.dtype(data)
     indices_size = F.size(indices)
     indices_size = mult_util.check_indices(indices_size, index)
-    update = F.fill(data_dtype, (indices_size,), 1)
+    update = F.fill(mstype.int32, (indices_size,), 1)
     condition_1d = F.scatter_nd(indices, update, (data_size,))
-    condition_1d = F.cast(condition_1d, mstype.bool_)
     condition = F.reshape(condition_1d, data_shape)
+    condition = F.cast(condition, mstype.bool_)
     value_fill = None
     value_size = F.size(value)
 
@@ -336,10 +336,10 @@ def _tensor_indices_number(data, data_shape, index, indices, value):
     data_dtype = F.dtype(data)
     indices_size = F.size(indices)
     indices_size = mult_util.check_indices(indices_size, index)
-    update = F.fill(data_dtype, (indices_size,), 1)
+    update = F.fill(mstype.int32, (indices_size,), 1)
     condition_1d = F.scatter_nd(indices, update, (data_size,))
-    condition_1d = F.cast(condition_1d, mstype.bool_)
     condition = F.reshape(condition_1d, data_shape)
+    condition = F.cast(condition, mstype.bool_)
     value_fill = F.fill(data_dtype, (indices_size,), value)
     value_1d = F.scatter_nd(indices, value_fill, (data_size,))
     u = F.reshape(value_1d, data_shape)
@@ -360,3 +360,32 @@ def _tensor_setitem_with_int_v2(data, index, value):
     data_shape = F.shape(data)
     indices = mult_util.integer_to_indices(index, data_shape)
     return _tensor_indices_tensor(data, data_shape, index, indices, value)
+
+
+@setitem.register("Tensor", "Ellipsis", "Number")
+def _tensor_setitem_with_ellipsis_v1(data, index, value):
+    """Syntax: A[...] = number."""
+    data_shape = F.shape(data)
+    data_dtype = F.dtype(data)
+    return F.fill(data_dtype, data_shape, value)
+
+
+@setitem.register("Tensor", "Ellipsis", "Tensor")
+def _tensor_setitem_with_ellipsis_v2(data, index, value):
+    """Syntax: A[...] = Tensor."""
+    result = None
+    data_shape = F.shape(data)
+    data_dtype = F.dtype(data)
+    data_size = F.size(data)
+    value_shape = F.shape(value)
+    value_size = F.size(value)
+    check_result = mult_util.check_ellipsis_shape_size(data_shape, value_shape, data_size, value_size)
+    if check_result:
+        if data_size == value_size:
+            result = F.reshape(value, data_shape)
+            result = F.cast(result, data_dtype)
+        elif value_size == 1:
+            param1 = F.fill(data_dtype, data_shape, 1)
+            param2 = F.cast(value, data_dtype)
+            result = F.tensor_mul(param1, param2)
+    return result
diff --git a/tests/ut/python/ops/test_tensor_slice.py b/tests/ut/python/ops/test_tensor_slice.py
index f713b1ea0c..32c4025368 100644
--- a/tests/ut/python/ops/test_tensor_slice.py
+++ b/tests/ut/python/ops/test_tensor_slice.py
@@ -103,6 +103,7 @@ class TensorAssignWithSliceError1(Cell):
         a[1:3:-1,::] = b
         return a
 
+
 class TensorAssignWithSliceError2(Cell):
     def __init__(self):
         super(TensorAssignWithSliceError2, self).__init__()
@@ -110,24 +111,29 @@ class TensorAssignWithSliceError2(Cell):
     def construct(self, a, b):
         a[1:3:-1] = b
         return a
+
+
 class TensorAssignWithSlice2(Cell):
     def __init__(self):
         super(TensorAssignWithSlice2, self).__init__()
 
-    def construct(self, a, b):
+    def construct(self, a, b, ck):
         a[1:5] = b
         a[3:4] = 5
         a[-1:1:-1] = b
         a[-1:3:-1] = 5
         a[::] = b
         a[::] = 9
-        return a
+        z = a + ck
+        return z
+
+
 class TensorAssignWithSlice(Cell):
     def __init__(self):
         super(TensorAssignWithSlice, self).__init__()
         self.c = 2
 
-    def construct(self, a, b):
+    def construct(self, a, b, ck):
         a[1:3,::] = b
         a[2:3:,3:] = b
         a[::] = b
@@ -136,9 +142,10 @@ class TensorAssignWithSlice(Cell):
         a[::,::] = self.c
         a[2:3:,0:, 4:1:-1] = b
         a[2:3:,0:, 4:1:-1] = self.c
-        z = a
+        z = a + ck
         return z
 
+
 def test_tensor_assign():
     context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
     net = TensorAssignWithSlice()
@@ -146,95 +153,145 @@ def test_tensor_assign():
     net_e1 = TensorAssignWithSliceError1()
     net_e2 = TensorAssignWithSliceError2()
     a = np.arange(60).reshape(3,4,5)
-    b = Tensor([1])
-    Ta = Tensor(a)
-    Ta4d = Tensor(a.reshape(1,3,4,5))
-    Tb= Tensor([1,3])
-    Tc= Tensor([])
-    t = Tensor([1, 2, 3, 4, 5, 6, 7, 8])
-    net(Ta, b)
-    net2(t, b)
+    ck = np.arange(60).reshape(3,4,5)
+    b = Tensor([1], dtype=mstype.float32)
+    Ta = Tensor(a, dtype=mstype.float32)
+    Tck = Tensor(ck, dtype=mstype.float32)
+    Ta4d = Tensor(a.reshape(1,3,4,5), dtype=mstype.float32)
+    Ta4d_ck = Tensor(ck.reshape(1,3,4,5), dtype=mstype.float32)
+    Tb= Tensor([1,3], dtype=mstype.float32)
+    Tc= Tensor([], dtype=mstype.float32)
+    t = Tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=mstype.float32)
+    tck = Tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=mstype.float32)
+    net(Ta, b, Tck)
+    net2(t, b, tck)
     # Error for A[Slice] = Number
     # 1. A[Slice] = Number,  Slice error
-    with pytest.raises(ValueError):
+    with pytest.raises(IndexError):
         net_e2(t, 2)
 
     # Error for A[Slice] = U, U is a Tensor
     # 1. A[Slice] = U,  u.size is error
     with pytest.raises(ValueError):
-        net2(t, Tb)
+        net2(t, Tb, tck)
     # 2. A[Slice] = U, U is empty
     with pytest.raises(ValueError):
-        net2(t, Tc)
+        net2(t, Tc, tck)
     # 3. A[Slice] = U, U.size error
     with pytest.raises(ValueError):
-        net2(t, Tb)
+        net2(t, Tb, tck)
 
     # Error for A[Tuple(Slice...)] = Tensor
     # 1. A[Tuple(Slice...)] = U, U is empty
     with pytest.raises(ValueError):
-        net(Ta, Tc)
+        net(Ta, Tc, Tck)
     # 2. A[Tuple(Slice...)] = U, U.size error
     with pytest.raises(ValueError):
-        net(Ta, Tb)
+        net(Ta, Tb, Tck)
     # 3. A[Tuple(Slice...)] = U,  Slice error
-    with pytest.raises(ValueError):
+    with pytest.raises(IndexError):
         net_e1(Ta, b)
 
     # Error for A[Tuple(Slice...)] = Number
     # 1. A[Tuple(Slice...)] = Number,  Slice error
-    with pytest.raises(ValueError):
+    with pytest.raises(IndexError):
         net_e1(Ta, 2)
 
     net = TensorAssignWithInteger()
     # Error for A[Number] = scalar/Tensor
     # 1. A[Number] = U, U is a Tensor, u.size not match
     with pytest.raises(ValueError):
-        net(Ta, Tb)
+        net(Ta, Tb, Tck)
     with pytest.raises(ValueError):
-        net(Ta, Tc)
+        net(Ta, Tc, Tck)
     # 2. A[Number] = U, the number index error
     with pytest.raises(IndexError):
-        net(Ta4d, b)
+        net(Ta4d, b, Ta4d_ck)
 
     # Error for A[(n,m)] = scalar/Tensor
     # 1. A[(n,m)] = U, U is a tensor. u.size not match
     net = TensorAssignWithTupleInteger()
     with pytest.raises(ValueError):
-        net(Ta, Tc)
+        net(Ta, Tc, Tck)
     with pytest.raises(ValueError):
-        net(Ta, Tb)
+        net(Ta, Tb, Tck)
     # 2. A[(n,m)] = U, the number index error
     with pytest.raises(IndexError):
-        net(Ta4d, b)
+        net(Ta4d, b, Ta4d_ck)
+
+    #Error for  A[...] = U or A[1:, ...] = u
+    #1. A[...] = scalar/tensor
+    net = TensorAssignWithEllipsis()
+    net(Ta, Ta4d)
+    with pytest.raises(ValueError):
+        net(Ta, Tc)
+    with pytest.raises(ValueError):
+        net(Ta, Tb)
+    #2. A[::, 1:, ...] = scalar/tensor
+    net = TensorAssignWithTupleEllipsis()
+    net(Ta, b)
+    with pytest.raises(ValueError):
+        net(Ta, Tc)
+    with pytest.raises(ValueError):
+        net(Ta, Tb)
+
+
+class TensorAssignWithTupleEllipsis2(Cell):
+    def __init__(self):
+        super(TensorAssignWithTupleEllipsis2, self).__init__()
+    def construct(self, a, b):
+        a[1:, ..., ::] = b
+        return a
+
+
+class TensorAssignWithTupleEllipsis(Cell):
+    def __init__(self):
+        super(TensorAssignWithTupleEllipsis, self).__init__()
+    def construct(self, a, b):
+        a[:2, ...] = 1
+        a[1:, ...] = b
+        return a
+
+
+class TensorAssignWithEllipsis(Cell):
+    def __init__(self):
+        super(TensorAssignWithEllipsis, self).__init__()
+    def construct(self, a, b):
+        a[...] = 1
+        a[...] = b
+        return a
+
 
 class TensorAssignWithInteger(Cell):
     def __init__(self):
         super(TensorAssignWithInteger, self).__init__()
 
-    def construct(self, a, b):
+    def construct(self, a, b, ck):
         a[1] = 1
         a[0] = b
-        return a
+        z = a + ck
+        return z
 
 class TensorAssignWithTupleInteger(Cell):
     def __init__(self):
         super(TensorAssignWithTupleInteger, self).__init__()
 
-    def construct(self, a, b):
+    def construct(self, a, b, ck):
         a[(1)] = 1
         a[(1)] = b
         a[(1,1)] = b
         a[(1,1)] = 1
-        return a
+        z = a + ck
+        return z
 
 class TensorAssignWithBoolTensorIndex(Cell):
     def __init__(self):
         super(TensorAssignWithBoolTensorIndex, self).__init__()
-        self.t = Tensor(np.arange(60).reshape([3,4,5]), dtype = mstype.float64)
+        self.t = Tensor(np.arange(60).reshape([3,4,5]), dtype = mstype.float32)
+        self.u_scalar = 5
 
-    def construct(self, a, b, c, u_tensor, _scalar):
-        a[c] = u_scalar
+    def construct(self, a, b, c, u_tensor):
+        a[c] = self.u_scalar
         a[b] = u_tensor
         z = a + self.t
         return z
@@ -252,15 +309,16 @@ class TensorAssignWithBoolTensorIndexError(Cell):
 class TensorAssignWithBoolTensorIndex2(Cell):
     def __init__(self):
         super(TensorAssignWithBoolTensorIndex2, self).__init__()
-        self.t = Tensor(np.arange(6).reshape([2, 3]), dtype=mstype.float64)
-        self.t = Tensor(np.arange(60).reshape([3,4,5]), dtype = mstype.float64)
+        self.t = Tensor(np.arange(6).reshape([2, 3]), dtype=mstype.float32)
+        self.t = Tensor(np.arange(60).reshape([3,4,5]), dtype = mstype.float32)
+        self.u_scalar = 5
 
-    def construct(self, a, u_tensor, _scalar):
+    def construct(self, a, u_tensor):
         a[a > 8] = u_tensor
-        a[a >= 6] = u_scalar
-        a[a < 3] = u_scalar
+        a[a >= 6] = self.u_scalar
+        a[a < 3] = self.u_scalar
         a[a <= 5] = u_tensor
-        a[a == 5] = u_scalar
+        a[a == 5] = self.u_scalar
         z = a + self.t
         return z
 
@@ -274,36 +332,41 @@ class TensorAssignWithBoolTensorIndex2Error(Cell):
         return a
 
 
-a = np.random.uniform(1,10,[3,4,5])
+a = np.arange(60).reshape(3, 4, 5)
+ck = np.arange(60).reshape(3, 4, 5)
+a4 = np.arange(60).reshape(3, 2, 2, 5)
 b = a > 5
 c = a < 3
-Ta = Tensor(a)
+Ta = Tensor(a, dtype=mstype.float32)
+Tck = Tensor(ck, dtype=mstype.float32)
+Ta4 = Tensor(a4, dtype=mstype.float32)
 Tb = Tensor(b)
 Tc = Tensor(c)
 Td = Tensor([True, True])
-u_tensor = Tensor([1])
-u_tensor_error = Tensor([1, 2])
-t_1d = Tensor([1, 2, 3, 4, 5, 6, 7, 8])
+u_tensor = Tensor([1], dtype=mstype.float32)
+u_tensor_error = Tensor([1, 2], dtype=mstype.float32)
+t_1d = Tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=mstype.float32)
+tck_1d = Tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=mstype.float32)
 u_scalar = 5
 
 def test_tensor_assign_bool_index():
     net1 = TensorAssignWithBoolTensorIndex()
     net2 = TensorAssignWithBoolTensorIndex2()
-    net1(Ta, Tb, Tc, u_tensor, u_scalar)
-    net1(Ta, Tb, Tc, u_tensor, u_scalar)
-    with pytest.raises(ValueError):
-        net1(Ta, Td, Tc, u_tensor, u_scalar)
-    with pytest.raises(ValueError):
-        net1(Ta, u_tensor, Tc, u_tensor, u_scalar)
+    net1(Ta, Tb, Tc, u_tensor)
+    net1(Ta, Tb, Tc, u_tensor)
     with pytest.raises(ValueError):
-        net1(Ta, Tb, Td, u_tensor, u_scalar)
+        net1(Ta, Td, Tc, u_tensor)
+    with pytest.raises(TypeError):
+        net1(Ta, u_tensor, Tc, u_tensor)
     with pytest.raises(ValueError):
-        net1(Ta, Tb, Ta, u_tensor, u_scalar)
+        net1(Ta, Tb, Td, u_tensor)
+    with pytest.raises(TypeError):
+        net1(Ta, Tb, Ta, u_tensor)
     with pytest.raises(ValueError):
-        net1(Ta, Tb, Tc, u_tensor_error, u_scalar)
+        net1(Ta, Tb, Tc, u_tensor_error)
     # net1(Ta, u_tensor, Tc, u_tensor_error, u_scalar)
     with pytest.raises(ValueError):
-        net2(Ta, u_tensor_error, u_scalar)
+        net2(Ta, u_tensor_error)
     net3 = TensorAssignWithBoolTensorIndexError()
     with pytest.raises(AttributeError):
         net3(Ta, Tb, Tc, u_tensor)
@@ -316,29 +379,41 @@ def test_tensor_assign_bool_index():
         net4(Ta, u_scalar)
 
 test_cases = [
+    ('TensorAssignWithTupleEllipsis2', {
+        'block': TensorAssignWithTupleEllipsis2(),
+        'desc_inputs': [Ta4,  u_tensor],
+    }),
+    ('TensorAssignWithTupleEllipsis', {
+        'block': TensorAssignWithTupleEllipsis(),
+        'desc_inputs': [Ta,  u_tensor],
+    }),
+    ('TensorAssignWithEllipsis', {
+        'block': TensorAssignWithEllipsis(),
+        'desc_inputs': [Ta,  u_tensor],
+    }),
     ('TensorAssignWithTupleInteger', {
         'block': TensorAssignWithTupleInteger(),
-        'desc_inputs': [Ta,  u_tensor],
+        'desc_inputs': [Ta,  u_tensor, Tck],
     }),
     ('TensorAssignWithInteger', {
         'block': TensorAssignWithInteger(),
-        'desc_inputs': [Ta,  u_tensor],
+        'desc_inputs': [Ta,  u_tensor, Tck],
     }),
     ('TensorAssignWithSlice', {
         'block': TensorAssignWithSlice(),
-        'desc_inputs': [Ta,  u_tensor],
+        'desc_inputs': [Ta,  u_tensor, Tck],
     }),
     ('TensorAssignWithSlice2', {
         'block': TensorAssignWithSlice2(),
-        'desc_inputs': [t_1d,  u_tensor],
+        'desc_inputs': [t_1d,  u_tensor, tck_1d],
     }),
     ('TensorAssignWithBoolTensorIndex', {
         'block': TensorAssignWithBoolTensorIndex(),
-        'desc_inputs': [Ta, Tb, Tc, u_tensor, u_scalar],
+        'desc_inputs': [Ta, Tb, Tc, u_tensor],
     }),
     ('TensorAssignWithBoolTensorIndex2', {
         'block': TensorAssignWithBoolTensorIndex2(),
-        'desc_inputs': [Ta, u_tensor, u_scalar],
+        'desc_inputs': [Ta, u_tensor],
     }),
     ('SlicePositive', {
         'block': NetWorkSlicePositive(),

From 198bcace6e428850ff77ecf51a92082f6a499301 Mon Sep 17 00:00:00 2001
From: fary86 <fary.fanrui@huawei.com>
Date: Wed, 29 Apr 2020 10:49:01 +0800
Subject: [PATCH 180/242] Add index error type for log interface

---
 mindspore/ccsrc/utils/log_adapter.cc | 7 ++++++-
 mindspore/ccsrc/utils/log_adapter.h  | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/utils/log_adapter.cc b/mindspore/ccsrc/utils/log_adapter.cc
index b23916b4fe..74a66f44d7 100644
--- a/mindspore/ccsrc/utils/log_adapter.cc
+++ b/mindspore/ccsrc/utils/log_adapter.cc
@@ -143,6 +143,7 @@ static std::string ExceptionTypeToString(ExceptionType type) {
       _TO_STRING(TimeOutError),
       _TO_STRING(ResourceUnavailable),
       _TO_STRING(NoPermissionError),
+      _TO_STRING(IndexError),
       _TO_STRING(ValueError),
       _TO_STRING(TypeError),
   };
@@ -179,7 +180,8 @@ void LogWriter::operator^(const LogStream &stream) const {
 
   std::ostringstream oss;
   oss << location_.file_ << ":" << location_.line_ << " " << location_.func_ << "] ";
-  if (exception_type_ != NoExceptionType && exception_type_ != TypeError && exception_type_ != ValueError) {
+  if (exception_type_ != NoExceptionType && exception_type_ != IndexError && exception_type_ != TypeError &&
+      exception_type_ != ValueError) {
     oss << ExceptionTypeToString(exception_type_) << " ";
   }
   oss << msg.str();
@@ -187,6 +189,9 @@ void LogWriter::operator^(const LogStream &stream) const {
   trace::TraceGraphInfer();
   trace::GetInferStackInfo(oss);
 
+  if (exception_type_ == IndexError) {
+    throw pybind11::index_error(oss.str());
+  }
   if (exception_type_ == ValueError) {
     throw pybind11::value_error(oss.str());
   }
diff --git a/mindspore/ccsrc/utils/log_adapter.h b/mindspore/ccsrc/utils/log_adapter.h
index 2122870c3b..d7d8eff23e 100644
--- a/mindspore/ccsrc/utils/log_adapter.h
+++ b/mindspore/ccsrc/utils/log_adapter.h
@@ -54,6 +54,7 @@ enum ExceptionType {
   TimeOutError,
   ResourceUnavailable,
   NoPermissionError,
+  IndexError,
   ValueError,
   TypeError,
 };

From 203b05d45890c9cd4cc8c1dbf21d1c9d9f51da1d Mon Sep 17 00:00:00 2001
From: yanghaitao <yanghaitao1@huawei.com>
Date: Wed, 29 Apr 2020 10:27:51 +0800
Subject: [PATCH 181/242] num_shards and sampler is not supported if source
 does not have __getitem__

---
 mindspore/dataset/engine/validators.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index bdeb97c812..4f1b394634 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -587,6 +587,11 @@ def check_generatordataset(method):
                 except TypeError:
                     raise TypeError("sampler should be either iterable or from mindspore.dataset.samplers")
 
+        if sampler is not None and not hasattr(source, "__getitem__"):
+            raise ValueError("sampler is not supported if source does not have attribute '__getitem__'")
+        if num_shards is not None and not hasattr(source, "__getitem__"):
+            raise ValueError("num_shards is not supported if source does not have attribute '__getitem__'")
+
         return method(*args, **kwargs)
 
     return new_method

From 0d7eb2a067c5d3ea0cf96991f66c93ac85c0f8ab Mon Sep 17 00:00:00 2001
From: wandongdong <wandongdong1@huawei.com>
Date: Wed, 29 Apr 2020 11:44:07 +0800
Subject: [PATCH 182/242] add label smooth and remove hccl setting in eval

---
 example/mobilenetv2_imagenet2012/config.py  |  1 +
 example/mobilenetv2_imagenet2012/dataset.py |  4 +--
 example/mobilenetv2_imagenet2012/eval.py    |  2 --
 example/mobilenetv2_imagenet2012/train.py   | 40 +++++++++++++++++++--
 4 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/example/mobilenetv2_imagenet2012/config.py b/example/mobilenetv2_imagenet2012/config.py
index 32df4eabc9..2a8d37b6fc 100644
--- a/example/mobilenetv2_imagenet2012/config.py
+++ b/example/mobilenetv2_imagenet2012/config.py
@@ -27,6 +27,7 @@ config = ed({
     "lr": 0.4,
     "momentum": 0.9,
     "weight_decay": 4e-5,
+    "label_smooth": 0.1,
     "loss_scale": 1024,
     "save_checkpoint": True,
     "save_checkpoint_epochs": 1,
diff --git a/example/mobilenetv2_imagenet2012/dataset.py b/example/mobilenetv2_imagenet2012/dataset.py
index 9df34d51dc..46f5a1770c 100644
--- a/example/mobilenetv2_imagenet2012/dataset.py
+++ b/example/mobilenetv2_imagenet2012/dataset.py
@@ -53,8 +53,8 @@ def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
 
     # define map operations
     decode_op = C.Decode()
-    resize_crop_op = C.RandomResizedCrop(resize_height, scale=(0.2, 1.0))
-    horizontal_flip_op = C.RandomHorizontalFlip()
+    resize_crop_op = C.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
+    horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5)
 
     resize_op = C.Resize((256, 256))
     center_crop = C.CenterCrop(resize_width)
diff --git a/example/mobilenetv2_imagenet2012/eval.py b/example/mobilenetv2_imagenet2012/eval.py
index 6c51fc042b..397b3a37c3 100644
--- a/example/mobilenetv2_imagenet2012/eval.py
+++ b/example/mobilenetv2_imagenet2012/eval.py
@@ -38,8 +38,6 @@ context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)
 
 if __name__ == '__main__':
-    context.set_context(enable_hccl=False)
-
     loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
     net = mobilenet_v2()
 
diff --git a/example/mobilenetv2_imagenet2012/train.py b/example/mobilenetv2_imagenet2012/train.py
index d97eab5f04..c12f2ef9c0 100644
--- a/example/mobilenetv2_imagenet2012/train.py
+++ b/example/mobilenetv2_imagenet2012/train.py
@@ -28,6 +28,10 @@ from mindspore.model_zoo.mobilenet import mobilenet_v2
 from mindspore.parallel._auto_parallel_context import auto_parallel_context
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
+from mindspore.nn.loss.loss import _Loss
+from mindspore.ops import operations as P
+from mindspore.ops import functional as F
+from mindspore.common import dtype as mstype
 
 from mindspore.train.model import Model, ParallelMode
 
@@ -54,6 +58,35 @@ context.set_context(enable_task_sink=True)
 context.set_context(enable_loop_sink=True)
 context.set_context(enable_mem_reuse=True)
 
+class CrossEntropyWithLabelSmooth(_Loss):
+    """
+    CrossEntropyWith LabelSmooth.
+
+    Args:
+        smooth_factor (float): smooth factor, default=0.
+        num_classes (int): num classes
+
+    Returns:
+        None.
+
+    Examples:
+        >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
+    """
+
+    def __init__(self, smooth_factor=0., num_classes=1000):
+        super(CrossEntropyWithLabelSmooth, self).__init__()
+        self.onehot = P.OneHot()
+        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
+        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
+        self.ce = nn.SoftmaxCrossEntropyWithLogits()
+        self.mean = P.ReduceMean(False)
+        self.cast = P.Cast()
+
+    def construct(self, logit, label):
+        one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1], self.on_value, self.off_value)
+        out_loss = self.ce(logit, one_hot_label)
+        out_loss = self.mean(out_loss, 0)
+        return out_loss
 
 class Monitor(Callback):
     """
@@ -63,7 +96,7 @@ class Monitor(Callback):
         lr_init (numpy array): train lr
 
     Returns:
-        None.
+        None
 
     Examples:
         >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
@@ -122,7 +155,10 @@ if __name__ == '__main__':
     for _, cell in net.cells_and_names():
         if isinstance(cell, nn.Dense):
             cell.add_flags_recursive(fp32=True)
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+    if config.label_smooth > 0:
+        loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes)
+    else:
+        loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
 
     print("train args: ", args_opt, "\ncfg: ", config,
           "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size))

From d43ad79b50f1159868a5965264a29de0158e9b15 Mon Sep 17 00:00:00 2001
From: Zhang Qinghua <zhangqinghua3@huawei.com>
Date: Mon, 27 Apr 2020 15:49:26 +0800
Subject: [PATCH 183/242] Optimize the collectors of manager which listen to
 the graphs and nodes changes.

1. Remove the records of user graphs;
2. Remove the records of user value nodes;
3. Remove the records of user cnodes;
4. Add the records of users, and the API to access the users of graph, value node, and cnode;
5. Fix issue:User cnode record may point to its own graph, when combine the user(caller) and used one(callee);
6. Fix issue:User graphs never update itself after its first creation.
---
 mindspore/ccsrc/ir/func_graph.cc          |  17 +--
 mindspore/ccsrc/ir/func_graph.h           |   8 +-
 mindspore/ccsrc/ir/func_graph_cloner.cc   |  16 ++-
 mindspore/ccsrc/ir/manager.cc             | 154 ++++++++++------------
 mindspore/ccsrc/ir/manager.h              | 135 ++++++++++---------
 mindspore/ccsrc/optimizer/irpass/inline.h |   6 +-
 mindspore/ccsrc/vm/transform.cc           |   3 +-
 7 files changed, 167 insertions(+), 172 deletions(-)

diff --git a/mindspore/ccsrc/ir/func_graph.cc b/mindspore/ccsrc/ir/func_graph.cc
index 8a58f320f1..40417a33da 100644
--- a/mindspore/ccsrc/ir/func_graph.cc
+++ b/mindspore/ccsrc/ir/func_graph.cc
@@ -263,18 +263,15 @@ const FuncGraphSet &FuncGraph::func_graphs_used_total() {
   return used;
 }
 
-const FuncGraphCounterMap &FuncGraph::func_graph_users() {
-  auto mng = manager_.lock();
-  MS_EXCEPTION_IF_NULL(mng);
-  auto &users = mng->func_graph_users();
-  return users[shared_from_base<FuncGraph>()];
-}
-
-const AnfNodeCounterMap &FuncGraph::func_graph_user_cnodes() {
+const CNodeIndexCounterMap &FuncGraph::func_graph_cnodes_index() {
   auto mng = manager_.lock();
+  if (mng == nullptr) {
+    MS_LOG(EXCEPTION) << "BUG: no manager for this func graph: " << ToString()
+                      << " NodeInfo: " << trace::GetDebugInfo(debug_info());
+  }
   MS_EXCEPTION_IF_NULL(mng);
-  auto &users = mng->func_graph_user_cnodes();
-  return users[shared_from_base<FuncGraph>()];
+  auto &cnode = mng->func_graph_cnodes_index();
+  return cnode[shared_from_base<FuncGraph>()];
 }
 
 FuncGraphPtr FuncGraph::parent() {
diff --git a/mindspore/ccsrc/ir/func_graph.h b/mindspore/ccsrc/ir/func_graph.h
index 9c3752cd81..bca5759807 100644
--- a/mindspore/ccsrc/ir/func_graph.h
+++ b/mindspore/ccsrc/ir/func_graph.h
@@ -37,6 +37,7 @@ namespace mindspore {
 using BaseRefCounterMap = OrderedMap<BaseRef, int, BaseRefHash>;
 using FuncGraphCounterMap = OrderedMap<FuncGraphPtr, int>;
 using AnfNodeCounterMap = OrderedMap<AnfNodePtr, int>;
+using CNodeIndexCounterMap = OrderedMap<CNodeIndexPairPtr, int, CNodeIndexHasher, CNodeIndexEqual>;
 
 const char FUNC_GRAPH_FLAG_IGNORE_VALUES[] = "ignore_values";
 const char FUNC_GRAPH_FLAG_DEFER_INLINE[] = "defer_inline";
@@ -203,11 +204,8 @@ class FuncGraph : public FuncGraphBase {
   // get all func graphs nested used by this func graph
   const FuncGraphSet &func_graphs_used_total();
 
-  // get all users of this func graph
-  const FuncGraphCounterMap &func_graph_users();
-
-  // get all user cnodes of this func graph
-  const AnfNodeCounterMap &func_graph_user_cnodes();
+  // get all user value nodes of this func graph
+  const CNodeIndexCounterMap &func_graph_cnodes_index();
 
   // Return the parent of this graph.
   FuncGraphPtr parent();
diff --git a/mindspore/ccsrc/ir/func_graph_cloner.cc b/mindspore/ccsrc/ir/func_graph_cloner.cc
index c086b8d7d1..c8012276f1 100644
--- a/mindspore/ccsrc/ir/func_graph_cloner.cc
+++ b/mindspore/ccsrc/ir/func_graph_cloner.cc
@@ -182,9 +182,11 @@ void Cloner::CloneFuncGraphValueNodes(const FuncGraphPtr &func_graph, const Func
   }
   target_func_graph->set_return(return_node);
 
-  auto &value_nodes = manager_->func_graph_valuenodes()[func_graph];
-  for (auto &value_node : value_nodes) {
-    CloneValueNode(value_node.first, target_func_graph);
+  auto &cnodes = manager_->func_graph_cnodes_index()[func_graph];
+  for (auto &cnode : cnodes) {
+    auto parent = cnode.first->first->cast<CNodePtr>();
+    auto valuenode = parent->input(cnode.first->second);
+    CloneValueNode(valuenode, target_func_graph);
   }
 }
 
@@ -386,8 +388,8 @@ void Cloner::LiftParameters(const FuncGraphPtr &func_graph_user, const FuncGraph
   if (lift_params.empty()) {
     return;
   }
-  for (auto &user : func_graph_user->func_graph_users()) {
-    LiftParameters(user.first, func_graph_user, lift_params);
+  for (auto &cnode : func_graph_user->func_graph_cnodes_index()) {
+    LiftParameters(cnode.first->first->func_graph(), func_graph_user, lift_params);
   }
 }
 
@@ -395,8 +397,8 @@ void Cloner::Lift() {
   for (auto &func_graph_params : repl_func_graph_params_) {
     auto &func_graph = func_graph_params.first;
     auto &params = func_graph_params.second;
-    for (auto &user : func_graph->func_graph_users()) {
-      LiftParameters(user.first, func_graph, params);
+    for (auto &cnode : func_graph->func_graph_cnodes_index()) {
+      LiftParameters(cnode.first->first->func_graph(), func_graph, params);
     }
   }
 }
diff --git a/mindspore/ccsrc/ir/manager.cc b/mindspore/ccsrc/ir/manager.cc
index 150e68ef4d..1ed747eefd 100644
--- a/mindspore/ccsrc/ir/manager.cc
+++ b/mindspore/ccsrc/ir/manager.cc
@@ -78,13 +78,16 @@ void FuncGraphManager::Reset() {
   node_users_ = NodeUsersMap();
 
   signals_ = std::make_shared<Signals>();
+  // FuncGraph --> AnfNode
   nodes_ = std::make_shared<NodesCollector>(this);
+
+  // FuncGraph --> {AnfNode, Count}
   valuenodes_ = std::make_shared<ValueNodesCollector>(this);
   free_variables_direct_ = std::make_shared<FVDirectCollector>(this);
-  func_graph_valuenodes_ = std::make_shared<FuncGraphValueNodesCollector>(this);
+  func_graph_cnodes_index_ = std::make_shared<FuncGraphUsersCNodeIndexCollector>(this);
+
+  // FuncGraph --> {FuncGraph, Count}
   func_graphs_used_ = std::make_shared<FuncGraphsUsedCollector>(this);
-  func_graph_users_ = std::make_shared<FuncGraphUsersCollector>(this);
-  func_graph_user_cnodes_ = std::make_shared<FuncGraphUserNodesCollector>(this);
   func_graph_child_direct_ = std::make_shared<FuncGraphChildDirect>(this);
   func_graph_parents_direct_ = std::make_shared<FuncGraphParentsDirectCollector>(this);
   func_graph_j_direct_ = std::make_shared<FuncGraphJDirectCollector>(this);
@@ -300,9 +303,9 @@ void FuncGraphManager::MaybeDropFuncGraphs(const FuncGraphSet &func_graphs, bool
       MS_LOG(DEBUG) << "Cannot drop as roots contains func graph: " << func_graph->ToString();
       continue;
     }
-    MS_EXCEPTION_IF_NULL(func_graph_users_);
-    auto &users = func_graph_users_->count_func_graphs_map()[func_graph];
-    if (!users.empty() && !ignore_users) {
+    MS_EXCEPTION_IF_NULL(func_graph_cnodes_index_);
+    auto &users_cnode_index = func_graph_cnodes_index_->count_nodes_map()[func_graph];
+    if (!users_cnode_index.empty() && !ignore_users) {
       MS_LOG(DEBUG) << "Cannot drop as users not empty: " << func_graph->ToString();
       continue;
     }
@@ -472,10 +475,6 @@ void FuncGraphManager::MoveAllCNodeDropGraph(FuncGraphPtr source, FuncGraphPtr t
       node->set_scope(scope);
     }
   }
-  for (auto &used : source->func_graphs_used()) {
-    (void)func_graph_users_->Inc(used.first, target, used.second);
-    (void)this->func_graph_users()[used.first].erase(source);
-  }
   for (auto &child : this->func_graph_child_direct()[source]) {
     (void)func_graph_parents_direct_->Inc(child.first, target, child.second);
     (void)this->func_graph_parents_direct()[child.first].erase(source);
@@ -661,7 +660,9 @@ DepCollector::DepCollector(const FuncGraphManager *const manager) : FuncGraphAna
 
 void DepCollector::OnDropEdge(AnfNodePtr node, int index, AnfNodePtr inp) { OnModEdge(node, index, inp, kDecEdge); }
 
-bool CounterAnfNodeCollector::Inc(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count = 1) {
+template <typename ValueT, class CollectorHash, class CollectorEqual>
+bool CounterAnfNodeCollector<ValueT, CollectorHash, CollectorEqual>::Inc(const FuncGraphPtr &func_graph,
+                                                                         const ValueT &key, int count) {
   auto &d = count_nodes_map_[func_graph];
   if (d.count(key) == 0) {
     d[key] = count;
@@ -672,7 +673,9 @@ bool CounterAnfNodeCollector::Inc(const FuncGraphPtr &func_graph, const AnfNodeP
   return false;
 }
 
-bool CounterAnfNodeCollector::Dec(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count = 1) {
+template <typename ValueT, class CollectorHash, class CollectorEqual>
+bool CounterAnfNodeCollector<ValueT, CollectorHash, CollectorEqual>::Dec(const FuncGraphPtr &func_graph,
+                                                                         const ValueT &key, int count) {
   MS_EXCEPTION_IF_NULL(func_graph);
   auto &d = count_nodes_map_[func_graph];
   if (d.count(key) != 0) {
@@ -682,7 +685,7 @@ bool CounterAnfNodeCollector::Dec(const FuncGraphPtr &func_graph, const AnfNodeP
     } else {
       d[key] -= count;
       if (d[key] < 0) {
-        MS_LOG(EXCEPTION) << "Count of key '" << key->ToString()
+        MS_LOG(EXCEPTION) << "Count of key '" << key
                           << "' dec from 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info());
       }
     }
@@ -690,52 +693,15 @@ bool CounterAnfNodeCollector::Dec(const FuncGraphPtr &func_graph, const AnfNodeP
   return false;
 }
 
-bool CounterAnfNodeCollector::Mod(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count) {
+template <typename ValueT, class CollectorHash, class CollectorEqual>
+bool CounterAnfNodeCollector<ValueT, CollectorHash, CollectorEqual>::Mod(const FuncGraphPtr &func_graph,
+                                                                         const ValueT &key, int count) {
   if (count > 0) {
     return Inc(func_graph, key, count);
   } else if (count < 0) {
     return Dec(func_graph, key, -count);
   } else {
-    MS_LOG(EXCEPTION) << "Count of key '" << key->ToString()
-                      << "' cannot be 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info());
-  }
-}
-
-bool CounterFuncGraphCollector::Inc(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count = 1) {
-  auto &d = count_func_graphs_map_[func_graph];
-  if (d.count(key) == 0) {
-    d[key] = count;
-    return true;
-  } else {
-    d[key] += count;
-  }
-  return false;
-}
-
-bool CounterFuncGraphCollector::Dec(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count = 1) {
-  auto &d = count_func_graphs_map_[func_graph];
-  if (d.count(key) != 0) {
-    if (d[key] == count) {
-      (void)d.erase(key);
-      return true;
-    } else {
-      d[key] -= count;
-      if (d[key] < 0) {
-        MS_LOG(EXCEPTION) << "Count of key '" << key->ToString()
-                          << "' dec from 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info());
-      }
-    }
-  }
-  return false;
-}
-
-bool CounterFuncGraphCollector::Mod(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count) {
-  if (count > 0) {
-    return Inc(func_graph, key, count);
-  } else if (count < 0) {
-    return Dec(func_graph, key, -count);
-  } else {
-    MS_LOG(EXCEPTION) << "Count of key '" << key->ToString()
+    MS_LOG(EXCEPTION) << "Count of key '" << key
                       << "' cannot be 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info());
   }
 }
@@ -754,16 +720,21 @@ void ValueNodesCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) {
   (void)count_nodes_map_.erase(src);
 }
 
-// if inp is a graph ValueNode, this graph's FuncGraphValueNodesCollector's value is inp self
-void FuncGraphValueNodesCollector::OnModEdge(AnfNodePtr, int, AnfNodePtr inp, EdgeProcessDirection direction) {
+void FuncGraphUsersCNodeIndexCollector::OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp,
+                                                  EdgeProcessDirection direction) {
+  MS_EXCEPTION_IF_NULL(node);
   if (IsValueNode<FuncGraph>(inp)) {
-    (void)Mod(GetValueNode<FuncGraphPtr>(inp), inp, direction);
+    (void)Mod(GetValueNode<FuncGraphPtr>(inp), std::make_shared<CNodeIndexPair>(std::make_pair(node, index)),
+              direction);
   }
 }
 
-void FuncGraphValueNodesCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) {
+void FuncGraphUsersCNodeIndexCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) {
   for (auto &it : count_nodes_map_[src]) {
-    (void)Inc(dst, it.first, it.second);
+    // Ignore the user graph who may own itself.
+    if (dst != it.first->first->func_graph()) {
+      (void)Inc(dst, it.first, it.second);
+    }
   }
   (void)count_nodes_map_.erase(src);
 }
@@ -794,6 +765,45 @@ static FuncGraphPtr ParentProxy(const FuncGraphPtr &fg) {
   return gn;
 }
 
+bool CounterFuncGraphCollector::Inc(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count = 1) {
+  auto &d = count_func_graphs_map_[func_graph];
+  if (d.count(key) == 0) {
+    d[key] = count;
+    return true;
+  } else {
+    d[key] += count;
+  }
+  return false;
+}
+
+bool CounterFuncGraphCollector::Dec(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count = 1) {
+  auto &d = count_func_graphs_map_[func_graph];
+  if (d.count(key) != 0) {
+    if (d[key] == count) {
+      (void)d.erase(key);
+      return true;
+    } else {
+      d[key] -= count;
+      if (d[key] < 0) {
+        MS_LOG(EXCEPTION) << "Count of key '" << key->ToString()
+                          << "' dec from 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info());
+      }
+    }
+  }
+  return false;
+}
+
+bool CounterFuncGraphCollector::Mod(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count) {
+  if (count > 0) {
+    return Inc(func_graph, key, count);
+  } else if (count < 0) {
+    return Dec(func_graph, key, -count);
+  } else {
+    MS_LOG(EXCEPTION) << "Count of key '" << key->ToString()
+                      << "' cannot be 0. NodeInfo: " << trace::GetDebugInfo(func_graph->debug_info());
+  }
+}
+
 void FuncGraphChildDirect::OnModEdge(AnfNodePtr node, int, AnfNodePtr inp, EdgeProcessDirection direction) {
   MS_EXCEPTION_IF_NULL(node);
   MS_EXCEPTION_IF_NULL(inp);
@@ -859,32 +869,6 @@ void FuncGraphsUsedCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst)
   (void)count_func_graphs_map_.erase(src);
 }
 
-void FuncGraphUsersCollector::OnModEdge(AnfNodePtr node, int, AnfNodePtr inp, EdgeProcessDirection direction) {
-  MS_EXCEPTION_IF_NULL(node);
-  if (IsValueNode<FuncGraph>(inp)) {
-    (void)Mod(GetValueNode<FuncGraphPtr>(inp), node->func_graph(), direction);
-  }
-}
-
-void FuncGraphUsersCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr) {
-  // all graph use in src need to change to dst, so add dst user
-  (void)count_func_graphs_map_.erase(src);
-}
-
-void FuncGraphUserNodesCollector::OnModEdge(AnfNodePtr node, int, AnfNodePtr inp, EdgeProcessDirection direction) {
-  MS_EXCEPTION_IF_NULL(node);
-  if (IsValueNode<FuncGraph>(inp)) {
-    (void)Mod(GetValueNode<FuncGraphPtr>(inp), node, direction);
-  }
-}
-
-void FuncGraphUserNodesCollector::OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) {
-  for (auto &it : count_nodes_map_[src]) {
-    (void)Inc(dst, it.first, it.second);
-  }
-  (void)count_nodes_map_.erase(src);
-}
-
 void FuncGraphJDirectCollector::OnModEdge(AnfNodePtr node, int, AnfNodePtr inp, EdgeProcessDirection direction) {
   if (IsValueNode<FuncGraph>(inp) && IsPrimitiveCNode(node, prim::kPrimJ)) {
     (void)Mod(node->func_graph(), GetValueNode<FuncGraphPtr>(inp), direction);
diff --git a/mindspore/ccsrc/ir/manager.h b/mindspore/ccsrc/ir/manager.h
index 54c1e8a692..7f36b53205 100644
--- a/mindspore/ccsrc/ir/manager.h
+++ b/mindspore/ccsrc/ir/manager.h
@@ -100,8 +100,12 @@ struct Signals {
 
 enum EdgeProcessDirection { kDecEdge = -1, kIncEdge = 1 };
 
+using CNodeIndexPair = std::pair<AnfNodePtr, int>;
+using CNodeIndexPairPtr = std::shared_ptr<CNodeIndexPair>;
+
 using FuncGraphToFuncGraphCounterMap = OrderedMap<FuncGraphPtr, OrderedMap<FuncGraphPtr, int>>;
-using FuncGraphToAnfNodeCounterMap = OrderedMap<FuncGraphPtr, OrderedMap<AnfNodePtr, int>>;
+template <typename ValueT, class CollectorHash = std::hash<ValueT>, class CollectorEqual = std::equal_to<ValueT>>
+using FuncGraphToAnfNodeCounterMap = OrderedMap<FuncGraphPtr, OrderedMap<ValueT, int, CollectorHash, CollectorEqual>>;
 
 // analysis base class
 class FuncGraphAnalysis {
@@ -174,46 +178,56 @@ class NodesCollector final : public DepCollector {
   void OnDropNode(AnfNodePtr n) override;
 };
 
-class CounterFuncGraphCollector : public DepCollector {
- public:
-  explicit CounterFuncGraphCollector(const FuncGraphManager *m) : DepCollector(m) {}
-  ~CounterFuncGraphCollector() override = default;
-  FuncGraphToFuncGraphCounterMap &count_func_graphs_map() { return count_func_graphs_map_; }
-  // inherit from FuncGraphAnalysis
-  size_t size() const override { return count_func_graphs_map_.size(); }
-  void OnAddFuncGraph(FuncGraphPtr fg) final { count_func_graphs_map_[fg] = OrderedMap<FuncGraphPtr, int>(); }
-  void OnDropFuncGraph(FuncGraphPtr fg) final { (void)count_func_graphs_map_.erase(fg); }
-  bool Inc(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count);
-  bool Dec(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count);
-  bool Mod(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count);
-
-  FuncGraphToFuncGraphCounterMap count_func_graphs_map_;
+struct CNodeIndexHasher {
+  std::size_t operator()(const CNodeIndexPairPtr pair) const {
+    MS_EXCEPTION_IF_NULL(pair);
+    MS_EXCEPTION_IF_NULL(pair->first);
+    return hash_combine(pair->first->hash(), std::hash<int>()(pair->second));
+  }
+};
 
- protected:
-  void ExtraReset() override { count_func_graphs_map_.clear(); }
+struct CNodeIndexEqual {
+  bool operator()(const CNodeIndexPairPtr lhs, const CNodeIndexPairPtr rhs) const {
+    if (lhs == nullptr || rhs == nullptr) {
+      return false;
+    }
+    if (lhs == rhs) {
+      return true;
+    }
+    if (lhs->first != rhs->first) {
+      return false;
+    }
+    if (lhs->second != rhs->second) {
+      return false;
+    }
+    return true;
+  }
 };
 
+template <typename ValueT, class CollectorHash = std::hash<ValueT>, class CollectorEqual = std::equal_to<ValueT>>
 class CounterAnfNodeCollector : public DepCollector {
  public:
   explicit CounterAnfNodeCollector(const FuncGraphManager *m) : DepCollector(m) {}
   ~CounterAnfNodeCollector() override = default;
-  FuncGraphToAnfNodeCounterMap &count_nodes_map() { return count_nodes_map_; }
+  FuncGraphToAnfNodeCounterMap<ValueT, CollectorHash, CollectorEqual> &count_nodes_map() { return count_nodes_map_; }
 
   size_t size() const override { return count_nodes_map_.size(); }
-  void OnAddFuncGraph(FuncGraphPtr fg) final { count_nodes_map_[fg] = OrderedMap<AnfNodePtr, int>(); }
+  void OnAddFuncGraph(FuncGraphPtr fg) final {
+    count_nodes_map_[fg] = OrderedMap<ValueT, int, CollectorHash, CollectorEqual>();
+  }
   void OnDropFuncGraph(FuncGraphPtr fg) final { (void)count_nodes_map_.erase(fg); }
 
-  bool Inc(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count);
-  bool Dec(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count);
-  bool Mod(const FuncGraphPtr &func_graph, const AnfNodePtr &key, int count);
+  bool Inc(const FuncGraphPtr &func_graph, const ValueT &key, int count);
+  bool Dec(const FuncGraphPtr &func_graph, const ValueT &key, int count);
+  bool Mod(const FuncGraphPtr &func_graph, const ValueT &key, int count);
 
-  FuncGraphToAnfNodeCounterMap count_nodes_map_;
+  FuncGraphToAnfNodeCounterMap<ValueT, CollectorHash, CollectorEqual> count_nodes_map_;
 
  protected:
   void ExtraReset() override { count_nodes_map_.clear(); }
 };
 
-class ValueNodesCollector final : public CounterAnfNodeCollector {
+class ValueNodesCollector final : public CounterAnfNodeCollector<AnfNodePtr> {
  public:
   explicit ValueNodesCollector(const FuncGraphManager *m) : CounterAnfNodeCollector(m) {}
   ~ValueNodesCollector() override = default;
@@ -223,17 +237,19 @@ class ValueNodesCollector final : public CounterAnfNodeCollector {
   void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override;
 };
 
-class FuncGraphValueNodesCollector final : public CounterAnfNodeCollector {
+// Record the CNode and its input index, who points to the function graph.
+class FuncGraphUsersCNodeIndexCollector final
+    : public CounterAnfNodeCollector<CNodeIndexPairPtr, CNodeIndexHasher, CNodeIndexEqual> {
  public:
-  explicit FuncGraphValueNodesCollector(const FuncGraphManager *m) : CounterAnfNodeCollector(m) {}
-  ~FuncGraphValueNodesCollector() override = default;
+  explicit FuncGraphUsersCNodeIndexCollector(const FuncGraphManager *m) : CounterAnfNodeCollector(m) {}
+  ~FuncGraphUsersCNodeIndexCollector() override = default;
   void OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) override;
 
  protected:
   void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override;
 };
 
-class FVDirectCollector final : public CounterAnfNodeCollector {
+class FVDirectCollector final : public CounterAnfNodeCollector<AnfNodePtr> {
  public:
   explicit FVDirectCollector(const FuncGraphManager *m) : CounterAnfNodeCollector(m) {}
   ~FVDirectCollector() override = default;
@@ -243,6 +259,25 @@ class FVDirectCollector final : public CounterAnfNodeCollector {
   void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override;
 };
 
+class CounterFuncGraphCollector : public DepCollector {
+ public:
+  explicit CounterFuncGraphCollector(const FuncGraphManager *m) : DepCollector(m) {}
+  ~CounterFuncGraphCollector() override = default;
+  FuncGraphToFuncGraphCounterMap &count_func_graphs_map() { return count_func_graphs_map_; }
+  // inherit from FuncGraphAnalysis
+  size_t size() const override { return count_func_graphs_map_.size(); }
+  void OnAddFuncGraph(FuncGraphPtr fg) final { count_func_graphs_map_[fg] = OrderedMap<FuncGraphPtr, int>(); }
+  void OnDropFuncGraph(FuncGraphPtr fg) final { (void)count_func_graphs_map_.erase(fg); }
+  bool Inc(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count);
+  bool Dec(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count);
+  bool Mod(const FuncGraphPtr &func_graph, const FuncGraphPtr &key, int count);
+
+  FuncGraphToFuncGraphCounterMap count_func_graphs_map_;
+
+ protected:
+  void ExtraReset() override { count_func_graphs_map_.clear(); }
+};
+
 class FuncGraphChildDirect final : public CounterFuncGraphCollector {
  public:
   explicit FuncGraphChildDirect(const FuncGraphManager *m) : CounterFuncGraphCollector(m) {}
@@ -279,28 +314,6 @@ class FuncGraphsUsedCollector final : public CounterFuncGraphCollector {
   void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override;
 };
 
-// graph's all user graphs: key is g, value is graphs who used g
-class FuncGraphUsersCollector final : public CounterFuncGraphCollector {
- public:
-  explicit FuncGraphUsersCollector(const FuncGraphManager *m) : CounterFuncGraphCollector(m) {}
-  void OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) override;
-  ~FuncGraphUsersCollector() override = default;
-
- protected:
-  void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override;
-};
-
-// graph's all user cnodes: key is g, value is cnodes who used g
-class FuncGraphUserNodesCollector final : public CounterAnfNodeCollector {
- public:
-  explicit FuncGraphUserNodesCollector(const FuncGraphManager *m) : CounterAnfNodeCollector(m) {}
-  void OnMoveAllCNode(FuncGraphPtr src, FuncGraphPtr dst) override;
-  ~FuncGraphUserNodesCollector() override = default;
-
- protected:
-  void OnModEdge(AnfNodePtr node, int index, AnfNodePtr inp, EdgeProcessDirection direction) override;
-};
-
 class FuncGraphJDirectCollector final : public CounterFuncGraphCollector {
  public:
   explicit FuncGraphJDirectCollector(const FuncGraphManager *m) : CounterFuncGraphCollector(m) {}
@@ -433,7 +446,9 @@ class ScopeComputer final : public DepComputer {
 
 using FVTotalMap = OrderedMap<FuncGraphPtr, OrderedMap<BaseRef, int, BaseRefHash>>;
 
-class FVTotalComputer final : public DepComputer, public CounterAnfNodeCollector, public CounterFuncGraphCollector {
+class FVTotalComputer final : public DepComputer,
+                              public CounterAnfNodeCollector<AnfNodePtr>,
+                              public CounterFuncGraphCollector {
  public:
   explicit FVTotalComputer(const FuncGraphManager *m)
       : DepComputer(m), CounterAnfNodeCollector(m), CounterFuncGraphCollector(m) {}
@@ -549,18 +564,18 @@ class FuncGraphManager : public std::enable_shared_from_this<FuncGraphManager> {
 
   FuncGraphToAnfNodeMap &nodes() const { return nodes_->nodes_analysis_; }
 
-  FuncGraphToAnfNodeCounterMap &valuenodes() const { return valuenodes_->count_nodes_map_; }
+  FuncGraphToAnfNodeCounterMap<AnfNodePtr> &valuenodes() const { return valuenodes_->count_nodes_map_; }
 
-  FuncGraphToAnfNodeCounterMap &free_variables_direct() const { return free_variables_direct_->count_nodes_map_; }
+  FuncGraphToAnfNodeCounterMap<AnfNodePtr> &free_variables_direct() const {
+    return free_variables_direct_->count_nodes_map_;
+  }
 
-  FuncGraphToAnfNodeCounterMap &func_graph_valuenodes() const { return func_graph_valuenodes_->count_nodes_map_; }
+  FuncGraphToAnfNodeCounterMap<CNodeIndexPairPtr, CNodeIndexHasher, CNodeIndexEqual> &func_graph_cnodes_index() const {
+    return func_graph_cnodes_index_->count_nodes_map_;
+  }
 
   FuncGraphToFuncGraphCounterMap &func_graphs_used() const { return func_graphs_used_->count_func_graphs_map_; }
 
-  FuncGraphToFuncGraphCounterMap &func_graph_users() const { return func_graph_users_->count_func_graphs_map_; }
-
-  FuncGraphToAnfNodeCounterMap &func_graph_user_cnodes() const { return func_graph_user_cnodes_->count_nodes_map_; }
-
   FuncGraphToFuncGraphCounterMap &func_graph_child_direct() const {
     return func_graph_child_direct_->count_func_graphs_map_;
   }
@@ -598,10 +613,8 @@ class FuncGraphManager : public std::enable_shared_from_this<FuncGraphManager> {
   std::shared_ptr<NodesCollector> nodes_;
   std::shared_ptr<ValueNodesCollector> valuenodes_;
   std::shared_ptr<FVDirectCollector> free_variables_direct_;
-  std::shared_ptr<FuncGraphValueNodesCollector> func_graph_valuenodes_;
+  std::shared_ptr<FuncGraphUsersCNodeIndexCollector> func_graph_cnodes_index_;
   std::shared_ptr<FuncGraphsUsedCollector> func_graphs_used_;
-  std::shared_ptr<FuncGraphUsersCollector> func_graph_users_;
-  std::shared_ptr<FuncGraphUserNodesCollector> func_graph_user_cnodes_;
   std::shared_ptr<FuncGraphChildDirect> func_graph_child_direct_;
   std::shared_ptr<FuncGraphParentsDirectCollector> func_graph_parents_direct_;
   std::shared_ptr<FuncGraphJDirectCollector> func_graph_j_direct_;
diff --git a/mindspore/ccsrc/optimizer/irpass/inline.h b/mindspore/ccsrc/optimizer/irpass/inline.h
index a7b6b975bb..8ebd0f6eb7 100644
--- a/mindspore/ccsrc/optimizer/irpass/inline.h
+++ b/mindspore/ccsrc/optimizer/irpass/inline.h
@@ -81,10 +81,10 @@ bool IsTrivial(const FuncGraphPtr &fg, AnfNodePtr) {
 }
 
 bool IsUniqueUse(const FuncGraphPtr &fg, AnfNodePtr) {
-  auto &users = fg->func_graph_users();
+  auto &cnodes = fg->func_graph_cnodes_index();
   int n_use =
-    std::accumulate(users.begin(), users.end(), 0,
-                    [](int sum, const std::pair<const FuncGraphPtr, int> &item) { return sum + item.second; });
+    std::accumulate(cnodes.begin(), cnodes.end(), 0,
+                    [](int sum, const std::pair<const CNodeIndexPairPtr, int> &item) { return sum + item.second; });
   return n_use == 1;
 }
 
diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc
index b14bf54869..9147f75fb2 100644
--- a/mindspore/ccsrc/vm/transform.cc
+++ b/mindspore/ccsrc/vm/transform.cc
@@ -486,7 +486,8 @@ void CompileGraph::AddExternal(const LinConvertResult &result) {
 }
 
 void TraverseGraphMap(
-  const FuncGraphManagerPtr &manager_ptr, FuncGraphTransaction *const tr, const FuncGraphToAnfNodeCounterMap &cts,
+  const FuncGraphManagerPtr &manager_ptr, FuncGraphTransaction *const tr,
+  const FuncGraphToAnfNodeCounterMap<AnfNodePtr> &cts,
   const std::function<std::shared_ptr<FuncGraph>(const PrimitivePtr, const AbstractFunctionPtr)> &get_prim_graph) {
   MS_EXCEPTION_IF_NULL(manager_ptr);
   MS_EXCEPTION_IF_NULL(tr);

From 63f3a2caacf4263d0b33e9f7a15ef1ff3a966ea5 Mon Sep 17 00:00:00 2001
From: limingqi107 <limingqi@huawei.com>
Date: Wed, 29 Apr 2020 10:38:08 +0800
Subject: [PATCH 184/242] gpu optimize some return values of dynamic memory
 pool

---
 .../ccsrc/device/gpu/gpu_kernel_runtime.cc    | 22 ++++++++++++-------
 .../ccsrc/device/gpu/gpu_memory_manager.cc    |  2 +-
 mindspore/ccsrc/device/kernel_runtime.cc      | 15 ++++++++++---
 mindspore/ccsrc/device/memory_manager.cc      | 13 ++++++++---
 mindspore/ccsrc/device/memory_manager.h       |  4 ++--
 .../mem_reuse/mem_dynamic_allocator.cc        | 22 +++++++++++--------
 6 files changed, 52 insertions(+), 26 deletions(-)

diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
index 2d53097dd8..17817ebeba 100644
--- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc
@@ -225,23 +225,24 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
     MS_EXCEPTION_IF_NULL(input);
     input->addr = device_address->ptr_;
     input->size = device_address->size_;
-    kernel_inputs->push_back(input);
+    kernel_inputs->emplace_back(input);
   }
-
   auto output_sizes = kernel_mod.GetOutputSizeList();
   for (size_t i = 0; i < output_sizes.size(); ++i) {
     auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i);
     MS_EXCEPTION_IF_NULL(device_address);
     if (device_address->ptr_ == nullptr) {
-      mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
+      auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
+      if (!ret) {
+        MS_LOG(EXCEPTION) << "Malloc device memory failed.";
+      }
     }
     kernel::AddressPtr output = std::make_shared<kernel::Address>();
     MS_EXCEPTION_IF_NULL(output);
     output->addr = device_address->ptr_;
     output->size = output_sizes[i];
-    kernel_outputs->push_back(output);
+    kernel_outputs->emplace_back(output);
   }
-
   auto workspace_sizes = kernel_mod.GetWorkspaceSizeList();
   for (size_t i = 0; i < workspace_sizes.size(); ++i) {
     if (workspace_sizes[i] == 0) {
@@ -249,12 +250,14 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod
       continue;
     }
     auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]);
-    MS_EXCEPTION_IF_NULL(device_ptr);
+    if (!device_ptr) {
+      MS_LOG(EXCEPTION) << "Malloc device memory failed.";
+    }
     kernel::AddressPtr workspace = std::make_shared<kernel::Address>();
     MS_EXCEPTION_IF_NULL(workspace);
     workspace->addr = device_ptr;
     workspace->size = workspace_sizes[i];
-    kernel_workspaces->push_back(workspace);
+    kernel_workspaces->emplace_back(workspace);
   }
 }
 
@@ -334,7 +337,10 @@ void GPUKernelRuntime::AllocCommunicationOpMemory(bool is_need_alloc_memory, boo
       }
     }
   }
-  mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list);
+  auto ret = mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list);
+  if (!ret) {
+    MS_LOG(EXCEPTION) << "Malloc device memory failed.";
+  }
 }
 
 void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel,
diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
index 6e81130b9c..9a63921add 100644
--- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
+++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc
@@ -40,7 +40,7 @@ void GPUMemoryManager::MallocDeviceMemory() {
   if (context_ptr->enable_dynamic_mem_pool()) {
     auto device_addr = MallocMemFromMemPool(1);
     if (!device_addr) {
-      MS_LOG(ERROR) << "Dynamic memory pool init error.";
+      MS_LOG(EXCEPTION) << "Dynamic memory pool init error.";
     }
   } else {
     // Need to reserve 20% space for dynamic memory
diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc
index d1a068b584..d3fccc11fd 100644
--- a/mindspore/ccsrc/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/device/kernel_runtime.cc
@@ -180,7 +180,10 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector<tensor::TensorPtr>
       auto device_address =
         CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id);
       MS_EXCEPTION_IF_NULL(device_address);
-      mem_manager_->MallocMemFromMemPool(device_address, tensor_size);
+      auto ret = mem_manager_->MallocMemFromMemPool(device_address, tensor_size);
+      if (!ret) {
+        MS_LOG(EXCEPTION) << "Malloc device memory failed.";
+      }
       AnfAlgo::SetOutputAddr(device_address, index, item.get());
     }
   }
@@ -209,7 +212,10 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
     auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
     auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type);
     MS_EXCEPTION_IF_NULL(device_address);
-    mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
+    auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]);
+    if (!ret) {
+      MS_LOG(EXCEPTION) << "Malloc device memory failed.";
+    }
     AnfAlgo::SetOutputAddr(device_address, i, kernel.get());
   }
 }
@@ -224,7 +230,10 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) {
     for (size_t i = 0; i < workspace_lists.size(); ++i) {
       auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown);
       MS_EXCEPTION_IF_NULL(device_address);
-      mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]);
+      auto ret = mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]);
+      if (!ret) {
+        MS_LOG(EXCEPTION) << "Malloc device memory failed.";
+      }
       AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get());
     }
   }
diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc
index 8dd8dfb5e0..d2a38038c6 100644
--- a/mindspore/ccsrc/device/memory_manager.cc
+++ b/mindspore/ccsrc/device/memory_manager.cc
@@ -141,11 +141,14 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
   }
 }
 
-void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) {
+bool MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) {
   auto device_ptr = MallocMemFromMemPool(size);
-  MS_EXCEPTION_IF_NULL(device_ptr);
+  if (!device_ptr) {
+    return false;
+  }
   address->ptr_ = device_ptr;
   address->from_mem_pool_ = true;
+  return true;
 }
 
 void *MemoryManager::MallocMemFromMemPool(size_t size) {
@@ -168,9 +171,12 @@ void MemoryManager::FreeMemFromMemPool(void *device_ptr) {
   }
 }
 
-void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
+bool MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
                                                    std::vector<size_t> size_list) {
   auto device_ptr_list = MallocContinuousMemFromMemPool(total_size, size_list);
+  if (device_ptr_list.size() == 0) {
+    return false;
+  }
   if (addr_list.size() != device_ptr_list.size()) {
     MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list.";
   }
@@ -180,6 +186,7 @@ void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList ad
     addr_list[i]->ptr_ = device_ptr_list[i];
     addr_list[i]->from_mem_pool_ = true;
   }
+  return true;
 }
 
 std::vector<void *> MemoryManager::MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list) {
diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/device/memory_manager.h
index dae0861506..be250e0f3f 100644
--- a/mindspore/ccsrc/device/memory_manager.h
+++ b/mindspore/ccsrc/device/memory_manager.h
@@ -46,11 +46,11 @@ class MemoryManager {
   uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size);
   virtual uint8_t *MallocMem(int flag, size_t size);
 
-  virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size);
+  virtual bool MallocMemFromMemPool(const DeviceAddressPtr address, size_t size);
   virtual void *MallocMemFromMemPool(size_t size);
   virtual void FreeMemFromMemPool(const DeviceAddressPtr address);
   virtual void FreeMemFromMemPool(void *device_ptr);
-  virtual void MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
+  virtual bool MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size,
                                               std::vector<size_t> size_list);
   virtual std::vector<void *> MallocContinuousMemFromMemPool(size_t total_size, std::vector<size_t> size_list);
 
diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc
index b7280f52ae..a2dfce2241 100644
--- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc
+++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc
@@ -38,9 +38,12 @@ DeviceMemPtr DynamicMemPoolBestFit::AllocTensorMem(size_t size) {
 
 std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t total_size,
                                                                           std::vector<size_t> size_list) {
+  std::vector<DeviceMemPtr> device_addr_list;
   // Pre-alloc the one whole piece memory.
   auto device_addr = AllocTensorMem(total_size);
-  MS_EXCEPTION_IF_NULL(device_addr);
+  if (!device_addr) {
+    return device_addr_list;
+  }
   // Remove the pre-alloc memory.
   auto mem_block = FindMemBlock(device_addr);
   MS_EXCEPTION_IF_NULL(mem_block);
@@ -54,7 +57,6 @@ std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t
   (void)mem_block->block_all_mem_buf_map_.erase(iter);
   // Split the pre-alloc memory into continuous memory by the size list.
   DynamicMemBufPtr continuous_mem_buf;
-  std::vector<DeviceMemPtr> device_addr_list;
   auto buf_addr = device_addr;
   for (size_t i = 0; i < size_list.size(); i++) {
     continuous_mem_buf = std::make_shared<DynamicMemBuf>(buf_addr, kMemBufUsed, size_list[i]);
@@ -102,13 +104,16 @@ DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size) {
 
 DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) {
   size_t alloc_mem_size = CalMemBlockAllocSize(size);
-
+  if (alloc_mem_size == 0) {
+    return nullptr;
+  }
   // Add new memory block
   DeviceMemPtr device_addr = nullptr;
   auto real_alloc_size = AllocDeviceMem(alloc_mem_size, &device_addr);
   if (real_alloc_size < size) {
-    MS_LOG(EXCEPTION) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size["
-                      << size << "].";
+    MS_LOG(WARNING) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" << size
+                    << "].";
+    return nullptr;
   }
   auto mem_block = std::make_shared<DynamicMemBlock>(device_addr, real_alloc_size);
   MS_EXCEPTION_IF_NULL(mem_block);
@@ -135,10 +140,10 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) {
 size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) {
   auto device_free_mem_size = free_mem_size();
   if (device_free_mem_size < size) {
-    MS_LOG(EXCEPTION) << "Memory not enough: current free memory size[" << device_free_mem_size
-                      << "] is smaller than required size[" << size << "].";
+    MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size
+                    << "] is smaller than required size[" << size << "].";
+    return 0;
   }
-
   auto alloc_mem_size = mem_alloc_unit_size();
   // Growing at twice of alloc size
   while (alloc_mem_size < size) {
@@ -156,7 +161,6 @@ void DynamicMemPoolBestFit::DivideMemBuf(size_t size, const DynamicMemBufPtr &me
   MS_EXCEPTION_IF_NULL(mem_buf);
   auto mem_block = FindMemBlock(mem_buf->device_addr_);
   MS_EXCEPTION_IF_NULL(mem_block);
-
   // Divide new memory buf
   size_t newbuf_size = mem_buf->size_ - size;
   mem_buf->size_ = size;

From 8ca1f87a49567d90f83850e268b095a843cf69d0 Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Wed, 29 Apr 2020 02:56:42 -0400
Subject: [PATCH 185/242] fix globalbatchnorm bug

---
 mindspore/nn/layer/normalization.py | 63 +++++++++++++----------------
 1 file changed, 27 insertions(+), 36 deletions(-)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index 66f17e3f38..dd4ac67273 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -116,53 +116,44 @@ class _BatchNorm(Cell):
         group_list = [list(i) for i in world_rank_list]
         return group_list
 
-    def _global_sync(self, x):
-        """calculate global batch normalization output"""
+    def _shape_infer(self, x):
+        """global batch normalization shape and axes infer"""
         if len(self.shape(x)) == 4:
-            axes = (0, 2, 3)
+            axes = (0,2,3)
             re_shape = (1, self.num_features, 1, 1)
-            x_mean = self.reduce_mean(x, axes)
-            x_mean_square = self.reduce_mean(self.square(x), axes)
-            global_batch_mean = self.all_reduce(x_mean) / self.group
-            global_batch_mean_square = self.all_reduce(x_mean_square) / self.group
-            global_mean = global_batch_mean
-            global_var = global_batch_mean_square - self.square(global_mean)
-            var_sqrt = self.sqrt(global_var + self.eps)
-            mean_first = (x - global_mean) / var_sqrt
-            y = mean_first * self.reshape(self.gamma, re_shape) + self.reshape(self.beta, re_shape)
-
-            mean_sub = self.sub_mean(self.reshape(self.moving_mean, re_shape), global_mean)
-            tmp_mean = self.mul_mean(mean_sub, self.cast(self.momentum, self.dtype(mean_sub)))
-            mean_sub2 = self.sub_var(self.reshape(self.moving_mean, re_shape), global_var)
-            tmp_variance = self.mul_var(mean_sub2, self.cast(self.momentum, self.dtype(mean_sub2)))
-            y = F.depend(y, self.assign_sub_mean(self.reshape(self.moving_mean, re_shape), tmp_mean))
-            y = F.depend(y, self.assign_sub_var(self.reshape(self.moving_variance, re_shape), tmp_variance))
         else:
             axes = (0,)
             re_shape = (1, self.num_features)
-            x_mean = self.reduce_mean(x, axes)
-            x_mean_square = self.reduce_mean(self.square(x), axes)
-            global_batch_mean = self.all_reduce(x_mean) / self.group
-            global_batch_mean_square = self.all_reduce(x_mean_square) / self.group
-            global_mean = global_batch_mean
-            global_var = global_batch_mean_square - self.square(global_mean)
-            var_sqrt = self.sqrt(global_var + self.eps)
-            mean_first = (x - global_mean) / var_sqrt
-            y = mean_first * self.gamma + self.beta
-
-            mean_sub = self.sub_mean(self.moving_mean, global_mean)
-            temp_mean = self.mul_mean(mean_sub, self.cast(self.momentum, self.dtype(mean_sub)))
-            mean_sub2 = self.sub_var(self.moving_variance, global_var)
-            temp_variance = self.mul_var(mean_sub2, self.cast(self.momentum, self.dtype(mean_sub2)))
-            y = F.depend(y, self.assign_sub_mean(self.reshape(self.moving_mean, re_shape), temp_mean))
-            y = F.depend(y, self.assign_sub_var(self.reshape(self.moving_variance, re_shape), temp_variance))
+        return axes, re_shape
+
+    def _global_sync(self, x, axes, re_shape):
+        """calculate global batch normalization output"""
+        axes = (0, 2, 3)
+        re_shape = (1, self.num_features, 1, 1)
+        x_mean = self.reduce_mean(x, axes)
+        x_mean_square = self.reduce_mean(self.square(x), axes)
+        global_batch_mean = self.all_reduce(x_mean) / self.group
+        global_batch_mean_square = self.all_reduce(x_mean_square) / self.group
+        global_mean = global_batch_mean
+        global_var = global_batch_mean_square - self.square(global_mean)
+        var_sqrt = self.sqrt(global_var + self.eps)
+        mean_first = (x - global_mean) / var_sqrt
+        y = mean_first * self.reshape(self.gamma, re_shape) + self.reshape(self.beta, re_shape)
+
+        mean_sub = self.sub_mean(self.reshape(self.moving_mean, re_shape), global_mean)
+        tmp_mean = self.mul_mean(mean_sub, self.cast(self.momentum, self.dtype(mean_sub)))
+        mean_sub2 = self.sub_var(self.reshape(self.moving_mean, re_shape), global_var)
+        tmp_variance = self.mul_var(mean_sub2, self.cast(self.momentum, self.dtype(mean_sub2)))
+        y = F.depend(y, self.assign_sub_mean(self.reshape(self.moving_mean, re_shape), tmp_mean))
+        y = F.depend(y, self.assign_sub_var(self.reshape(self.moving_variance, re_shape), tmp_variance))
         return y
 
     def construct(self, x):
         if self.training and self.use_batch_statistics:
             if self.is_ge_backend:
                 if self.is_global:
-                    y = self._global_sync(x)
+                    axes, re_shape = self._shape_infer(x)
+                    y = self._global_sync(x, axes, re_shape)
                 else:
                     y, batch_mean, batch_var, _, _ = \
                         self.bn_train(x,

From 8261cfd01902be5a0f4f14a16ca51d0938dd6d3f Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Wed, 29 Apr 2020 03:00:53 -0400
Subject: [PATCH 186/242] fix globalbatchnorm bug

---
 mindspore/nn/layer/normalization.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index dd4ac67273..6e92369550 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -128,8 +128,6 @@ class _BatchNorm(Cell):
 
     def _global_sync(self, x, axes, re_shape):
         """calculate global batch normalization output"""
-        axes = (0, 2, 3)
-        re_shape = (1, self.num_features, 1, 1)
         x_mean = self.reduce_mean(x, axes)
         x_mean_square = self.reduce_mean(self.square(x), axes)
         global_batch_mean = self.all_reduce(x_mean) / self.group

From b9d85bc51102bcdc39e68b1e6d7d3f105a376e4c Mon Sep 17 00:00:00 2001
From: zhangzhenghai <zhangzhenghai@huawei.com>
Date: Wed, 29 Apr 2020 15:04:26 +0800
Subject: [PATCH 187/242] update RELEASE.md.

---
 RELEASE.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 416efd824a..6857ede676 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -60,8 +60,8 @@
     * Fix support for fp16 kernels on nvidia 1080Ti([!571](https://gitee.com/mindspore/mindspore/pulls/571)).
     * Fix parsing of tuple type parameters ([!316](https://gitee.com/mindspore/mindspore/pulls/316)).
 * Data processing
-    * Fix TypeErrors about can't pickle mindspore._c_dataengine.DEPipeline objects([!434](https://gitee.com/mindspore/mindspore/pulls/434))
-    * Add TFRecord file verification([!406](https://gitee.com/mindspore/mindspore/pulls/406))
+    * Fix TypeErrors about can't pickle mindspore._c_dataengine.DEPipeline objects([!434](https://gitee.com/mindspore/mindspore/pulls/434)).
+    * Add TFRecord file verification([!406](https://gitee.com/mindspore/mindspore/pulls/406)).
 
 ## Contributors
 Thanks goes to these wonderful people:

From 0de0aee54e6632f400a18e85dc4c25d6ce1beda4 Mon Sep 17 00:00:00 2001
From: huanghui <huanghui44@huawei.com>
Date: Tue, 28 Apr 2020 21:37:02 +0800
Subject: [PATCH 188/242] Add BatchNormGrad split pass

---
 .../ir_fission/batch_norm_grad_split.cc       | 132 ++++++++++++++++++
 .../ascend/ir_fission/batch_norm_grad_split.h |  33 +++++
 mindspore/ccsrc/utils/utils.h                 |   2 +
 .../ir_fission/batch_norm_grad_split_test.cc  |  59 ++++++++
 .../pre_activate/batch_norm_grad_split.py     |  61 ++++++++
 5 files changed, 287 insertions(+)
 create mode 100644 mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc
 create mode 100644 mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h
 create mode 100644 tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_split_test.cc
 create mode 100644 tests/ut/cpp/python_input/gtest_input/pre_activate/batch_norm_grad_split.py

diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc
new file mode 100644
index 0000000000..cb8670dd00
--- /dev/null
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.cc
@@ -0,0 +1,132 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "pre_activate/ascend/ir_fission/batch_norm_grad_split.h"
+
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "utils/utils.h"
+#include "utils/context/ms_context.h"
+#include "common/utils.h"
+#include "pre_activate/common/helper.h"
+#include "device/kernel_info.h"
+#include "session/anf_runtime_algorithm.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+void CreateOutputsOfUpdateGrad(const FuncGraphPtr &graph, const CNodePtr &bn_grad_node,
+                               std::vector<AnfNodePtr> *bn_update_grad_outputs) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(bn_grad_node);
+  auto bn_grad_inputs = bn_grad_node->inputs();
+  if (bn_grad_inputs.size() < kBNGradInputNum) {
+    MS_LOG(EXCEPTION) << "BNGrad has wrong inputs size";
+  }
+  std::vector<AnfNodePtr> bn_update_grad_inputs = {
+    NewValueNode(std::make_shared<Primitive>(kBNTrainingUpdateGradOpName)), bn_grad_inputs[1], bn_grad_inputs[2],
+    bn_grad_inputs[4], bn_grad_inputs[5]};
+  auto bn_update_grad = graph->NewCNode(bn_update_grad_inputs);
+  MS_EXCEPTION_IF_NULL(bn_update_grad);
+  bn_update_grad->set_kernel_info(std::make_shared<device::KernelInfo>());
+  bn_update_grad->set_scope(bn_grad_node->scope());
+
+  auto types = {AnfAlgo::GetOutputInferDataType(bn_grad_node, 1), AnfAlgo::GetOutputInferDataType(bn_grad_node, 2)};
+  auto shapes = {AnfAlgo::GetOutputInferShape(bn_grad_node, 1), AnfAlgo::GetOutputInferShape(bn_grad_node, 2)};
+  AnfAlgo::SetOutputInferTypeAndShape(types, shapes, bn_update_grad.get());
+
+  AnfAlgo::CopyNodeAttr(kAttrEpsilon, bn_grad_node, bn_update_grad);
+  CreateMultipleOutputsOfAnfNode(graph, bn_update_grad, kBNTrainingUpdateGradOutputNum, bn_update_grad_outputs);
+}
+
+void CreateOutputsOfReduceGrad(const FuncGraphPtr &graph, const CNodePtr &bn_grad_node,
+                               const std::vector<AnfNodePtr> &bn_update_grad_outputs,
+                               std::vector<AnfNodePtr> *bn_reduce_grad_outputs) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(bn_grad_node);
+  auto bn_grad_inputs = bn_grad_node->inputs();
+  if (bn_grad_inputs.size() < kBNGradInputNum) {
+    MS_LOG(EXCEPTION) << "BNGrad has wrong inputs size";
+  }
+  if (bn_update_grad_outputs.size() != kBNTrainingUpdateGradOutputNum) {
+    MS_LOG(EXCEPTION) << "BNTrainingReduceGrad_outputs has wrong size";
+  }
+  std::vector<AnfNodePtr> bn_reduce_grad_inputs = {
+    NewValueNode(std::make_shared<Primitive>(kBNTrainingReduceGradOpName)),
+    bn_grad_inputs[1],
+    bn_grad_inputs[2],
+    bn_update_grad_outputs[0],
+    bn_update_grad_outputs[1],
+    bn_grad_inputs[3],
+    bn_grad_inputs[4],
+    bn_grad_inputs[5]};
+  auto bn_reduce_grad = graph->NewCNode(bn_reduce_grad_inputs);
+  MS_EXCEPTION_IF_NULL(bn_reduce_grad);
+  bn_reduce_grad->set_kernel_info(std::make_shared<device::KernelInfo>());
+  bn_reduce_grad->set_scope(bn_grad_node->scope());
+
+  auto types = {AnfAlgo::GetOutputInferDataType(bn_grad_node, 0)};
+  auto shapes = {AnfAlgo::GetOutputInferShape(bn_grad_node, 0)};
+  AnfAlgo::SetOutputInferTypeAndShape(types, shapes, bn_reduce_grad.get());
+
+  AnfAlgo::CopyNodeAttr(kAttrEpsilon, bn_grad_node, bn_reduce_grad);
+  (*bn_reduce_grad_outputs).push_back(bn_reduce_grad);
+}
+
+}  // namespace
+const BaseRef BatchNormGradSplit::DefinePattern() const {
+  VarPtr Xs = std::make_shared<SeqVar>();
+  auto prim = std::make_shared<Primitive>(kBatchNormGradOpName);
+  return VectorRef({prim, Xs});
+}
+
+const AnfNodePtr BatchNormGradSplit::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
+                                             const EquivPtr &) const {
+  MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(func_graph);
+  auto cnode = node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
+  auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
+  MS_EXCEPTION_IF_NULL(primitive);
+  if (!primitive->HasAttr(kAttrIsTraining)) {
+    MS_LOG(INFO) << "Op BatchNormGrad must have attrs of is_training";
+    return nullptr;
+  }
+  if (!AnfAlgo::GetNodeAttr<bool>(cnode, kAttrIsTraining)) {
+    MS_LOG(INFO) << "is_training must be true";
+    return nullptr;
+  }
+
+  std::vector<AnfNodePtr> bn_update_grad_outputs;
+  CreateOutputsOfUpdateGrad(func_graph, cnode, &bn_update_grad_outputs);
+  if (bn_update_grad_outputs.size() != kBNTrainingUpdateGradOutputNum) {
+    MS_LOG(EXCEPTION) << "bn_update_grad_outputs has wrong size";
+  }
+
+  std::vector<AnfNodePtr> bn_reduce_grad_outputs;
+  CreateOutputsOfReduceGrad(func_graph, cnode, bn_update_grad_outputs, &bn_reduce_grad_outputs);
+  if (bn_reduce_grad_outputs.size() != kSingleOutputNum) {
+    MS_LOG(EXCEPTION) << "bn_reduce_grad_outputs has wrong size";
+  }
+
+  std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple), bn_reduce_grad_outputs[0],
+                                               bn_update_grad_outputs[0], bn_update_grad_outputs[1]};
+  auto make_tuple = func_graph->NewCNode(make_tuple_inputs);
+  return make_tuple;
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h b/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h
new file mode 100644
index 0000000000..e539fdb27c
--- /dev/null
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fission/batch_norm_grad_split.h
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_SPLIT_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_SPLIT_H_
+
+#include "pre_activate/common/optimizer.h"
+#include "pre_activate/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+class BatchNormGradSplit : public PatternProcessPass {
+ public:
+  explicit BatchNormGradSplit(bool multigraph = true) : PatternProcessPass("batch_norm_grad_split", multigraph) {}
+  ~BatchNormGradSplit() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_BATCH_NORM_GRAD_SPLIT_H_
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index 59d7f27c11..5b9ab0a1a0 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -107,6 +107,7 @@ constexpr auto kLambNextMVOpName = "LambNextMV";
 constexpr auto kConfusionTransposeDOpName = "ConfusionTransposeD";
 constexpr auto kAdamApplyOneWithDecayOpName = "AdamApplyOneWithDecay";
 constexpr auto kBatchNormOpName = "BatchNorm";
+constexpr auto kBatchNormGradOpName = "BatchNormGrad";
 constexpr auto kAdamApplyOneOpName = "AdamApplyOne";
 constexpr auto kDropoutGenMask = "DropoutGenMask";
 constexpr auto kResizeNearestNeighborGrad = "ResizeNearestNeighborGrad";
@@ -162,6 +163,7 @@ constexpr auto kAttrLabelForInsertStreamActive = "label_for_insert_stream_active
 constexpr auto kAttrFusion = "fusion";
 constexpr auto kAttrGroup = "group";
 constexpr auto kAttrOp = "op";
+constexpr auto kAttrIsTraining = "is_training";
 
 // attr value
 constexpr auto kValueTargetSwitch = "target_switch";
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_split_test.cc
new file mode 100644
index 0000000000..68c327ade1
--- /dev/null
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_split_test.cc
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/backend_common_test.h"
+#include "common/py_func_graph_fetcher.h"
+#include "operator/ops.h"
+#include "ir/meta_tensor.h"
+#include "debug/anf_ir_dump.h"
+#include "utils/utils.h"
+#include "pre_activate/common/optimizer.h"
+#include "pre_activate/ascend/ir_fission/batch_norm_grad_split.h"
+#include "session/anf_runtime_algorithm.h"
+
+namespace mindspore {
+namespace opt {
+class TestHWBatchNormGradSplit : public BackendCommon {
+ public:
+  TestHWBatchNormGradSplit() : get_py_fun_("gtest_input.pre_activate.batch_norm_grad_split", true) {}
+
+ public:
+  UT::PyFuncGraphFetcher get_py_fun_;
+};
+
+TEST_F(TestHWBatchNormGradSplit, test_split) {
+  get_py_fun_.SetDoResolve(true);
+  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_batch_norm_grad_split", "before");
+  EXPECT_NE(g, nullptr);
+  std::vector<int> shp_x{1, 64, 112, 112};
+  std::vector<int> shp_b{64};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp_x);
+  auto b_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp_b);
+  AbstractBasePtrList args_spec_list{x_abstract, x_abstract, b_abstract, b_abstract, b_abstract, b_abstract};
+  auto kernel_graph = GetKernelGraph(g, args_spec_list);
+  EXPECT_NE(kernel_graph, nullptr);
+
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  auto pass = std::make_shared<opt::BatchNormGradSplit>();
+  pm->AddPass(pass);
+  optimizer->AddPassManager(pm);
+  auto new_graph = optimizer->Optimize(kernel_graph);
+
+  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_batch_norm_grad_split", "after");
+  EXPECT_TRUE(CheckEqualGraph(g_after, new_graph));
+}
+}  // namespace opt
+}  // namespace mindspore
diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/batch_norm_grad_split.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/batch_norm_grad_split.py
new file mode 100644
index 0000000000..dc783f1fbd
--- /dev/null
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/batch_norm_grad_split.py
@@ -0,0 +1,61 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+from mindspore.ops import operations as P
+from mindspore.ops.operations import _grad_ops as G
+from mindspore.ops import Primitive
+
+batch_norm_grad = G.BatchNormGrad(is_training=True)
+bn_training_update_grad = Primitive('BNTrainingUpdateGrad')
+bn_training_reduce_grad = Primitive('BNTrainingReduceGrad')
+make_tuple = Primitive('make_tuple')
+tuple_getitem = Primitive('tuple_getitem')
+
+class FnDict:
+    def __init__(self):
+        self.fnDict = {}
+
+    def __call__(self, fn):
+        self.fnDict[fn.__name__] = fn
+
+    def __getitem__(self, name):
+        return self.fnDict[name]
+
+def test_batch_norm_grad_split(tag):
+    fns = FnDict()
+
+    @fns
+    def before(i0, i1, i2, i3, i4, i5):
+        bn_grad_output = batch_norm_grad(i0, i1, i2, i3, i4, i5)
+        item0 = tuple_getitem(bn_grad_output, 0)
+        item1 = tuple_getitem(bn_grad_output, 1)
+        item2 = tuple_getitem(bn_grad_output, 2)
+        output = make_tuple(item0, item1, item2)
+        return output
+
+    @fns
+    def after(i0, i1, i2, i3, i4, i5):
+        bn_update_grad_output = bn_training_update_grad(i0, i1, i3, i4)
+        update_item0 = tuple_getitem(bn_update_grad_output, 0)
+        update_item1 = tuple_getitem(bn_update_grad_output, 1)
+        bn_reduce_grad_output = bn_training_reduce_grad(i0, i1, update_item0, update_item1, i2, i3, i4)
+        output = make_tuple(bn_reduce_grad_output, update_item0, update_item1)
+        item0 = tuple_getitem(output, 0)
+        item1 = tuple_getitem(output, 1)
+        item2 = tuple_getitem(output, 2)
+        output = make_tuple(item0, item1, item2)
+        return make_tuple(output)
+
+    return fns[tag]

From 6c9a54afa12ecc722bd29d3a728a3923205f0c03 Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Wed, 29 Apr 2020 03:34:58 -0400
Subject: [PATCH 189/242] fix globalbatchnorm bug

---
 mindspore/nn/layer/normalization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index 6e92369550..2a1ca28ed4 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -119,7 +119,7 @@ class _BatchNorm(Cell):
     def _shape_infer(self, x):
         """global batch normalization shape and axes infer"""
         if len(self.shape(x)) == 4:
-            axes = (0,2,3)
+            axes = (0, 2, 3)
             re_shape = (1, self.num_features, 1, 1)
         else:
             axes = (0,)

From a22a4a57c224f0d2e2aa1da8628e5c8674ff4b9e Mon Sep 17 00:00:00 2001
From: Zhang Qinghua <zhangqinghua3@huawei.com>
Date: Wed, 29 Apr 2020 15:36:21 +0800
Subject: [PATCH 190/242] Modify UT case after adjust the collectors of
 manager.

---
 tests/ut/cpp/ir/manager_test.cc | 49 +++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/tests/ut/cpp/ir/manager_test.cc b/tests/ut/cpp/ir/manager_test.cc
index a7a19a7d24..8816277c49 100644
--- a/tests/ut/cpp/ir/manager_test.cc
+++ b/tests/ut/cpp/ir/manager_test.cc
@@ -127,12 +127,18 @@ class NestingSpecs {
       return;
     }
 
-    auto counter_p = dynamic_pointer_cast<CounterAnfNodeCollector>(results);
+    auto counter_p = dynamic_pointer_cast<CounterAnfNodeCollector<AnfNodePtr>>(results);
     if (counter_p != nullptr) {
       CheckAnfNodeCounter(counter_p);
       return;
     }
 
+    auto counter_pair = dynamic_pointer_cast<CounterAnfNodeCollector<CNodeIndexPairPtr>>(results);
+    if (counter_pair != nullptr) {
+      CheckCNodeIndexPairCounter(counter_pair);
+      return;
+    }
+
     auto nodes = dynamic_pointer_cast<NodesCollector>(results);
     if (nodes != nullptr) {
       CheckNodes(nodes);
@@ -226,7 +232,7 @@ class NestingSpecs {
 
   // Add CheckNesting function
 
-  void CheckAnfNodeCounter(std::shared_ptr<CounterAnfNodeCollector> results) {
+  void CheckAnfNodeCounter(std::shared_ptr<CounterAnfNodeCollector<AnfNodePtr>> results) {
     std::map<std::string, std::set<std::string>> clean_results;
     for (auto& iter : results->count_nodes_map()) {
       auto key = iter.first;
@@ -252,6 +258,32 @@ class NestingSpecs {
     ASSERT_EQ(clean_results, expected_);
   }
 
+  void CheckCNodeIndexPairCounter(std::shared_ptr<CounterAnfNodeCollector<CNodeIndexPairPtr>> results) {
+    std::map<std::string, std::set<std::string>> clean_results;
+    for (auto& iter : results->count_nodes_map()) {
+      auto key = iter.first;
+      auto value = iter.second;
+      if (key == nullptr) {
+        continue;
+      }
+      std::string k = Name(key);
+
+      std::set<std::string> v;
+      for (auto& node : value) {
+        auto fg = node.first->first;
+        if (!Name(fg).empty()) {
+          v.insert(Name(fg));
+        }
+      }
+
+      if (!v.empty()) {
+        clean_results[k] = v;
+      }
+    }
+
+    ASSERT_EQ(clean_results, expected_);
+  }
+
   void CheckGraphCounter(std::shared_ptr<CounterFuncGraphCollector> results) {
     std::map<std::string, std::set<std::string>> clean_results;
     for (auto& iter : results->count_func_graphs_map()) {
@@ -447,9 +479,8 @@ void TestManager::CheckAnalysisSize(std::shared_ptr<FuncGraphManager> mng) {
   ASSERT_EQ(size, mng->free_variables_total().size());
   ASSERT_EQ(size, mng->valuenodes().size());
   ASSERT_EQ(size, mng->free_variables_direct().size());
-  ASSERT_EQ(size, mng->func_graph_valuenodes().size());
+  ASSERT_EQ(size, mng->func_graph_cnodes_index().size());
   ASSERT_EQ(size, mng->func_graph_parents_direct().size());
-  ASSERT_EQ(size, mng->func_graph_users().size());
   ASSERT_EQ(size, mng->func_graphs_used().size());
 }
 
@@ -508,10 +539,6 @@ TEST_F(TestManager, test_nested_manual) {
   ASSERT_EQ(1, graphs_used[f].size());
   ASSERT_EQ(0, graphs_used[g].size());
 
-  auto graph_users = mng->func_graph_users();
-  ASSERT_EQ(0, graph_users[f].size());
-  ASSERT_EQ(1, graph_users[g].size());
-
   auto fv_direct = mng->free_variables_direct();
   ASSERT_EQ(0, fv_direct[f].size());
   ASSERT_EQ(1, fv_direct[g].size());
@@ -520,9 +547,9 @@ TEST_F(TestManager, test_nested_manual) {
   ASSERT_EQ(0, fv_total[f].size());
   ASSERT_EQ(1, fv_total[g].size());
 
-  auto graph_valuenodes = mng->func_graph_valuenodes();
-  ASSERT_EQ(0, graph_valuenodes[f].size());
-  ASSERT_EQ(1, graph_valuenodes[g].size());
+  auto cnodes = mng->func_graph_cnodes_index();
+  ASSERT_EQ(0, cnodes[f].size());
+  ASSERT_EQ(1, cnodes[g].size());
 }
 
 TEST_F(TestManager, test_deep_nested2_manual) {

From 7f602016f4948e6298061a7675ab034d3611eb91 Mon Sep 17 00:00:00 2001
From: "wangnan39@huawei.com" <wangnan39@huawei.com>
Date: Wed, 29 Apr 2020 10:25:04 +0800
Subject: [PATCH 191/242] add parameter verification for rmsprop, and modify
 default value in annotation

---
 mindspore/nn/optim/adam.py               | 13 +++--
 mindspore/nn/optim/optimizer.py          | 18 +++----
 mindspore/nn/optim/rmsprop.py            | 36 +++++++-------
 mindspore/nn/optim/sgd.py                |  8 +--
 tests/ut/python/nn/optim/test_rmsprop.py | 62 ++++++++++++++++++++++++
 5 files changed, 98 insertions(+), 39 deletions(-)
 create mode 100644 tests/ut/python/nn/optim/test_rmsprop.py

diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index 055eaae7c6..87c46380f6 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -145,9 +145,12 @@ class Adam(Optimizer):
                                                         When the learning_rate is float or learning_rate is a Tensor
                                                         but the dims of the Tensor is 0, use fixed learning rate.
                                                         Other cases are not supported. Default: 1e-3.
-        beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0).
-        beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0).
-        eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
+        beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0). Default:
+                       0.9.
+        beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0). Default:
+                       0.999.
+        eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0. Default:
+                     1e-8.
         use_locking (bool): Whether to enable a lock to protect updating variable tensors.
             If True, updating of the var, m, and v tensors will be protected by a lock.
             If False, the result is unpredictable. Default: False.
@@ -155,8 +158,8 @@ class Adam(Optimizer):
             If True, updates the gradients using NAG.
             If False, updates the gradients without using NAG. Default: False.
         weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
-        loss_scale (float): A floating point value for the loss scale. Default: 1.0.
-            Should be equal to or greater than 1.
+        loss_scale (float): A floating point value for the loss scale. Should be equal to or greater than 1. Default:
+                            1.0.
         decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default:
                                  lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
 
diff --git a/mindspore/nn/optim/optimizer.py b/mindspore/nn/optim/optimizer.py
index bab539461e..34abc2b1c2 100755
--- a/mindspore/nn/optim/optimizer.py
+++ b/mindspore/nn/optim/optimizer.py
@@ -46,8 +46,8 @@ class Optimizer(Cell):
         learning_rate (float): A floating point value for the learning rate. Should be greater than 0.
         parameters (list): A list of parameter, which will be updated. The element in `parameters`
             should be class mindspore.Parameter.
-        weight_decay (float): A floating point value for the weight decay. If the type of `weight_decay`
-            input is int, it will be convertd to float. Default: 0.0.
+        weight_decay (float): A floating point value for the weight decay. It should be equal to or greater than 0.
+            If the type of `weight_decay` input is int, it will be convertd to float. Default: 0.0.
         loss_scale (float): A floating point value for the loss scale. It should be greater than 0. If the
             type of `loss_scale` input is int, it will be convertd to float. Default: 1.0.
         decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default: lambda
@@ -87,21 +87,15 @@ class Optimizer(Cell):
 
         if isinstance(weight_decay, int):
             weight_decay = float(weight_decay)
-
-        validator.check_float_legal_value('weight_decay', weight_decay, None)
+        validator.check_value_type("weight_decay", weight_decay, [float], None)
+        validator.check_number_range("weight_decay", weight_decay, 0.0, float("inf"), Rel.INC_LEFT, None)
 
         if isinstance(loss_scale, int):
             loss_scale = float(loss_scale)
+        validator.check_value_type("loss_scale", loss_scale, [float], None)
+        validator.check_number_range("loss_scale", loss_scale, 0.0, float("inf"), Rel.INC_NEITHER, None)
 
-        validator.check_float_legal_value('loss_scale', loss_scale, None)
-
-        if loss_scale <= 0.0:
-            raise ValueError("Loss scale should be greater than 0, but got {}".format(loss_scale))
         self.loss_scale = loss_scale
-
-        if weight_decay < 0.0:
-            raise ValueError("Weight decay should be equal or greater than 0, but got {}".format(weight_decay))
-
         self.learning_rate = Parameter(learning_rate, name="learning_rate")
         self.parameters = ParameterTuple(parameters)
         self.reciprocal_scale = 1.0 / loss_scale
diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py
index a8f118b709..b1271587b4 100644
--- a/mindspore/nn/optim/rmsprop.py
+++ b/mindspore/nn/optim/rmsprop.py
@@ -15,6 +15,7 @@
 """rmsprop"""
 from mindspore.ops import functional as F, composite as C, operations as P
 from mindspore._checkparam import Validator as validator
+from mindspore._checkparam import Rel
 from .optimizer import Optimizer
 
 rmsprop_opt = C.MultitypeFuncGraph("rmsprop_opt")
@@ -91,14 +92,16 @@ class RMSProp(Optimizer):
                                                         take the i-th value as the learning rate.
                                                         When the learning_rate is float or learning_rate is a Tensor
                                                         but the dims of the Tensor is 0, use fixed learning rate.
-                                                        Other cases are not supported.
-        decay (float): Decay rate.
-        momentum (float): Hyperparameter of type float, means momentum for the moving average.
-        epsilon (float): Term added to the denominator to improve numerical stability. Should be greater than 0.
+                                                        Other cases are not supported. Default: 0.1.
+        decay (float): Decay rate. Should be equal to or greater than 0. Default: 0.9.
+        momentum (float): Hyperparameter of type float, means momentum for the moving average. Should be equal to or
+                          greater than 0.Default: 0.0.
+        epsilon (float): Term added to the denominator to improve numerical stability. Should be greater than
+                         0. Default: 1e-10.
         use_locking (bool): Enable a lock to protect the update of variable and accumlation tensors. Default: False.
-        centered (bool): If True, gradients are normalized by the estimated variance of the gradient. Default: False
-        loss_scale (float): A floating point value for the loss scale. Default: 1.0.
-        weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
+        centered (bool): If True, gradients are normalized by the estimated variance of the gradient. Default: False.
+        loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0.
+        weight_decay (float): Weight decay (L2 penalty). Should be equal to or greater than 0. Default: 0.0.
         decay_filter (Function): A function to determine whether to apply weight decay on parameters. Default:
                                  lambda x: 'beta' not in x.name and 'gamma' not in x.name.
 
@@ -118,17 +121,15 @@ class RMSProp(Optimizer):
                  use_locking=False, centered=False, loss_scale=1.0, weight_decay=0.0,
                  decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
         super(RMSProp, self).__init__(learning_rate, params, weight_decay, loss_scale, decay_filter)
-
-        if isinstance(momentum, float) and momentum < 0.0:
-            raise ValueError("momentum should be at least 0.0, but got momentum {}".format(momentum))
-
-        if decay < 0.0:
-            raise ValueError("decay should be at least 0.0, but got dampening {}".format(decay))
-        self.decay = decay
-        self.epsilon = epsilon
-
+        validator.check_value_type("decay", decay, [float], self.cls_name)
+        validator.check_number_range("decay", decay, 0.0, float("inf"), Rel.INC_LEFT, self.cls_name)
+        validator.check_value_type("momentum", momentum, [float], self.cls_name)
+        validator.check_number_range("momentum", momentum, 0.0, float("inf"), Rel.INC_LEFT, self.cls_name)
+        validator.check_value_type("epsilon", epsilon, [float], self.cls_name)
+        validator.check_number_range("epsilon", epsilon, 0.0, float("inf"), Rel.INC_NEITHER, self.cls_name)
         validator.check_value_type("use_locking", use_locking, [bool], self.cls_name)
         validator.check_value_type("centered", centered, [bool], self.cls_name)
+
         self.centered = centered
         if centered:
             self.opt = P.ApplyCenteredRMSProp(use_locking)
@@ -137,11 +138,10 @@ class RMSProp(Optimizer):
             self.opt = P.ApplyRMSProp(use_locking)
 
         self.momentum = momentum
-
         self.ms = self.parameters.clone(prefix="mean_square", init='zeros')
         self.moment = self.parameters.clone(prefix="moment", init='zeros')
         self.hyper_map = C.HyperMap()
-
+        self.epsilon = epsilon
         self.decay = decay
 
     def construct(self, gradients):
diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py
index bf2ed21d50..388fe5db47 100755
--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@@ -49,12 +49,12 @@ class SGD(Optimizer):
                                                         When the learning_rate is float or learning_rate is a Tensor
                                                         but the dims of the Tensor is 0, use fixed learning rate.
                                                         Other cases are not supported. Default: 0.1.
-        momentum (float): A floating point value the momentum. Default: 0.
-        dampening (float): A floating point value of dampening for momentum. Default: 0.
-        weight_decay (float): Weight decay (L2 penalty). Default: 0.
+        momentum (float): A floating point value the momentum. Default: 0.0.
+        dampening (float): A floating point value of dampening for momentum. Default: 0.0.
+        weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
         nesterov (bool): Enables the Nesterov momentum. Default: False.
         loss_scale (float): A floating point value for the loss scale, which should be larger
-        than 0.0. Default: 1.0.
+                            than 0.0. Default: 1.0.
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
diff --git a/tests/ut/python/nn/optim/test_rmsprop.py b/tests/ut/python/nn/optim/test_rmsprop.py
new file mode 100644
index 0000000000..647f1e8d45
--- /dev/null
+++ b/tests/ut/python/nn/optim/test_rmsprop.py
@@ -0,0 +1,62 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+""" test adam """
+import numpy as np
+import pytest
+import mindspore.nn as nn
+from mindspore.common.api import _executor
+from mindspore import Tensor, Parameter
+from mindspore.nn import TrainOneStepCell, WithLossCell
+from mindspore.ops import operations as P
+from mindspore.nn.optim import RMSProp
+
+
+class Net(nn.Cell):
+    """ Net definition """
+    def __init__(self):
+        super(Net, self).__init__()
+        self.weight = Parameter(Tensor(np.ones([64, 10]).astype(np.float32)), name="weight")
+        self.bias = Parameter(Tensor(np.ones([10]).astype((np.float32))), name="bias")
+        self.matmul = P.MatMul()
+        self.biasAdd = P.BiasAdd()
+
+    def construct(self, x):
+        x = self.biasAdd(self.matmul(x, self.weight), self.bias)
+        return x
+
+
+def test_rmsprop_compile():
+    """ test_adamw_compile """
+    inputs = Tensor(np.ones([1, 64]).astype(np.float32))
+    label = Tensor(np.zeros([1, 10]).astype(np.float32))
+    net = Net()
+    net.set_train()
+
+    loss = nn.SoftmaxCrossEntropyWithLogits()
+    optimizer = RMSProp(net.trainable_params(), learning_rate=0.1)
+
+    net_with_loss = WithLossCell(net, loss)
+    train_network = TrainOneStepCell(net_with_loss, optimizer)
+    _executor.compile(train_network, inputs, label)
+
+
+def test_rmsprop_e():
+    net = Net()
+    with pytest.raises(ValueError):
+        RMSProp(net.get_parameters(), momentum=-0.1, learning_rate=0.1)
+
+    with pytest.raises(TypeError):
+        RMSProp(net.get_parameters(), momentum=1, learning_rate=0.1)
+

From 1cfcfccd71fcd099053f4a0fdef4449977511cc4 Mon Sep 17 00:00:00 2001
From: caifubi <caifubi1@huawei.com>
Date: Wed, 29 Apr 2020 15:18:30 +0800
Subject: [PATCH 192/242] Add debug log for task name and task id

---
 .../device/ascend/ascend_kernel_runtime.cc    | 19 ++++++++++++++++++-
 .../device/ascend/ascend_kernel_runtime.h     |  1 +
 .../device/ascend/profiling/profiling_utils.h |  2 ++
 .../device/ascend/tasksink/task_generator.cc  |  4 +---
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
index 10517299cc..d9b3e6ebe4 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.cc
@@ -343,6 +343,22 @@ bool AscendKernelRuntime::LoadTask(const session::KernelGraph *graph) {
   return true;
 }
 
+void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) {
+  auto task_ids = ge::model_runner::ModelRunner::Instance().GetTaskIdList(graph_id);
+  auto graph_task_names = ProfilingUtils::graph_kernel_name();
+  auto iter = graph_task_names.find(graph_id);
+  if (iter != graph_task_names.end()) {
+    const auto &task_names = iter->second;
+    if (task_ids.size() != task_names.size()) {
+      MS_LOG(WARNING) << "Task_ids and task_names size not match";
+      return;
+    }
+    for (size_t i = 0; i < task_ids.size(); ++i) {
+      MS_LOG(INFO) << "Task_id:" << task_ids[i] << " task_name:" << task_names[i];
+    }
+  }
+}
+
 bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
   MS_LOG(INFO) << "RunTask start. GraphId:" << graph->graph_id();
@@ -363,7 +379,8 @@ bool AscendKernelRuntime::RunTask(const session::KernelGraph *graph) {
 
   bool status = ge::model_runner::ModelRunner::Instance().RunModel(graph->graph_id(), input_tensors, output_tensors);
   if (!status) {
-    MS_LOG(INFO) << "run task failed";
+    MS_LOG(ERROR) << "run task failed";
+    DebugTaskIdName(graph->graph_id());
     return false;
   }
   return true;
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
index 5d0f61d0a6..920e28cb87 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
@@ -57,6 +57,7 @@ class AscendKernelRuntime : public KernelRuntime {
   void ReleaseDeviceRes() override;
   bool GraphWithEmptyTaskList(const session::KernelGraph *graph) const;
   bool CheckGraphIdValid(GraphId graph_id) const;
+  static void DebugTaskIdName(GraphId graph_id) ;
 
   rtContext_t rt_context_{nullptr};
   bool initialized_{false};
diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
index 59909c1f2f..6986eaab54 100644
--- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
+++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h
@@ -101,6 +101,8 @@ class ProfilingUtils {
                                 NotNull<session::KernelGraph *> graph_ptr,
                                 NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
 
+  static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name() { return graph_kernel_name_; }
+
   inline static constexpr char kProfiling[] = "Profiling";
   inline static constexpr char kNotify[] = "notify";
   inline static constexpr char kProfilerTraceId[] = "profiler_trace_id";
diff --git a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
index 7b2a7dad9f..bdcc178b54 100644
--- a/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
+++ b/mindspore/ccsrc/device/ascend/tasksink/task_generator.cc
@@ -147,9 +147,7 @@ bool TaskGenerator::LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list,
     }
     current_op_index++;
   }
-  if (ProfilingManager::GetInstance().IsProfiling()) {
-    ProfilingUtils::SetGraphKernelName(graph_id, kernel_name_list);
-  }
+  ProfilingUtils::SetGraphKernelName(graph_id, kernel_name_list);
   return true;
 }
 }  // namespace tasksink

From 064d370a61708be83677a7bb4031771e0addca29 Mon Sep 17 00:00:00 2001
From: buxue <yiren19920727@163.com>
Date: Wed, 29 Apr 2020 10:36:16 +0800
Subject: [PATCH 193/242] fix bug of Acosh bprop and Elu bprop

---
 mindspore/_checkparam.py                 | 2 +-
 mindspore/ops/_grad/grad_math_ops.py     | 2 +-
 mindspore/ops/_grad/grad_nn_ops.py       | 2 +-
 mindspore/ops/operations/nn_ops.py       | 4 +++-
 tests/ut/python/ops/test_nn_ops_check.py | 2 +-
 5 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/mindspore/_checkparam.py b/mindspore/_checkparam.py
index 78288ad090..ef0fe4d868 100644
--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@@ -128,7 +128,7 @@ class Validator:
 
     @staticmethod
     def check_number(arg_name, arg_value, value, rel, prim_name):
-        """Integer value judgment."""
+        """Number value judgment."""
         rel_fn = Rel.get_fns(rel)
         if not rel_fn(arg_value, value):
             rel_str = Rel.get_strs(rel).format(value)
diff --git a/mindspore/ops/_grad/grad_math_ops.py b/mindspore/ops/_grad/grad_math_ops.py
index 2f39fe8745..c20cacbc34 100755
--- a/mindspore/ops/_grad/grad_math_ops.py
+++ b/mindspore/ops/_grad/grad_math_ops.py
@@ -727,7 +727,7 @@ def get_bprop_acosh(self):
     input_grad = G.AcoshGrad()
 
     def bprop(x, out, dout):
-        dx = input_grad(x, dout)
+        dx = input_grad(out, dout)
         return (dx,)
     return bprop
 
diff --git a/mindspore/ops/_grad/grad_nn_ops.py b/mindspore/ops/_grad/grad_nn_ops.py
index fc94544176..3964fef5d2 100755
--- a/mindspore/ops/_grad/grad_nn_ops.py
+++ b/mindspore/ops/_grad/grad_nn_ops.py
@@ -281,7 +281,7 @@ def get_bprop_elu(self):
     input_grad = G.EluGrad()
 
     def bprop(x, out, dout):
-        dx = input_grad(dout, x)
+        dx = input_grad(dout, out)
         return (dx,)
 
     return bprop
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 48762a88bd..d4d15a0ed5 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -308,7 +308,8 @@ class Elu(PrimitiveWithInfer):
     The data type of input tensor should be float.
 
     Args:
-        alpha (float): The coefficient of negative factor whose type is float. Default: 1.0.
+        alpha (float): The coefficient of negative factor whose type is float,
+            only support '1.0' currently. Default: 1.0.
 
     Inputs:
         - **input_x** (Tensor) - The input tensor whose data type should be float.
@@ -328,6 +329,7 @@ class Elu(PrimitiveWithInfer):
     def __init__(self, alpha=1.0):
         """Init Elu"""
         validator.check_value_type("alpha", alpha, [float], self.name)
+        validator.check_number("alpha", alpha, 1.0, Rel.EQ, self.name)
 
     def infer_shape(self, input_x):
         return input_x
diff --git a/tests/ut/python/ops/test_nn_ops_check.py b/tests/ut/python/ops/test_nn_ops_check.py
index c2a751aa0c..4060bb2e15 100755
--- a/tests/ut/python/ops/test_nn_ops_check.py
+++ b/tests/ut/python/ops/test_nn_ops_check.py
@@ -123,7 +123,7 @@ raise_set = [
         'skip': ['backward']}),
     # input is Tensor(int32)
     ('Elu1', {
-        'block': (P.Elu(alpha=0.9), {'exception': TypeError, 'error_keywords': ['Elu']}),
+        'block': (P.Elu(), {'exception': TypeError, 'error_keywords': ['Elu']}),
         'desc_inputs': [Tensor(np.ones([3, 4]).astype(np.int32))],
         'skip': ['backward']}),
 

From 7b81ca68dc8ee17a6daf624e3eb215ec3cd48f92 Mon Sep 17 00:00:00 2001
From: zhaojichen <zhaojichen1@huawei.com>
Date: Wed, 29 Apr 2020 04:53:23 -0400
Subject: [PATCH 194/242] fix globalbatchnorm bug

---
 mindspore/nn/layer/normalization.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index 2a1ca28ed4..7a102b0bbe 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -116,15 +116,7 @@ class _BatchNorm(Cell):
         group_list = [list(i) for i in world_rank_list]
         return group_list
 
-    def _shape_infer(self, x):
-        """global batch normalization shape and axes infer"""
-        if len(self.shape(x)) == 4:
-            axes = (0, 2, 3)
-            re_shape = (1, self.num_features, 1, 1)
-        else:
-            axes = (0,)
-            re_shape = (1, self.num_features)
-        return axes, re_shape
+
 
     def _global_sync(self, x, axes, re_shape):
         """calculate global batch normalization output"""
@@ -150,7 +142,7 @@ class _BatchNorm(Cell):
         if self.training and self.use_batch_statistics:
             if self.is_ge_backend:
                 if self.is_global:
-                    axes, re_shape = self._shape_infer(x)
+                    axes, re_shape = _shape_infer(F.shape(x), self.num_features)
                     y = self._global_sync(x, axes, re_shape)
                 else:
                     y, batch_mean, batch_var, _, _ = \
@@ -189,6 +181,17 @@ def _channel_check(channel, num_channel):
     if channel != num_channel:
         raise ValueError("the input channel is not equal with num_channel")
 
+@constexpr
+def _shape_infer(x_shape, num_feature):
+    """global batch normalization shape and axes infer"""
+    if len(x_shape) == 4:
+        axes = (0, 2, 3)
+        re_shape = (1, num_feature, 1, 1)
+    else:
+        axes = (0,)
+        re_shape = (1, num_feature)
+    return axes, re_shape
+
 class BatchNorm1d(_BatchNorm):
     r"""
     Batch normalization layer over a 2D input.

From d5adfa52100405774c840f2ec9266a83a02ecc27 Mon Sep 17 00:00:00 2001
From: chujinjin <chujinjin52@huawei.com>
Date: Wed, 29 Apr 2020 10:48:15 +0800
Subject: [PATCH 195/242] add accuracy for resnet cifar

---
 tests/st/tbe_networks/test_resnet_cifar_1p.py | 198 ++++++++++++++++++
 1 file changed, 198 insertions(+)
 create mode 100644 tests/st/tbe_networks/test_resnet_cifar_1p.py

diff --git a/tests/st/tbe_networks/test_resnet_cifar_1p.py b/tests/st/tbe_networks/test_resnet_cifar_1p.py
new file mode 100644
index 0000000000..058ec3aeec
--- /dev/null
+++ b/tests/st/tbe_networks/test_resnet_cifar_1p.py
@@ -0,0 +1,198 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import pytest
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.ops import operations as P
+from mindspore.nn.optim.momentum import Momentum
+from mindspore.train.model import Model
+from mindspore import context
+import mindspore.common.dtype as mstype
+import os
+import numpy as np
+import mindspore.ops.functional as F
+from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+import mindspore.dataset as ds
+import mindspore.dataset.transforms.c_transforms as C
+import mindspore.dataset.transforms.vision.c_transforms as vision
+from resnet import resnet50
+import random
+import time
+
+random.seed(1)
+np.random.seed(1)
+ds.config.set_seed(1)
+
+data_home = "/home/workspace/mindspore_dataset"
+
+
+def create_dataset(repeat_num=1, training=True, batch_size=32):
+    data_dir = data_home + "/cifar-10-batches-bin"
+    if not training:
+        data_dir = data_home + "/cifar-10-verify-bin"
+    data_set = ds.Cifar10Dataset(data_dir)
+
+    resize_height = 224
+    resize_width = 224
+    rescale = 1.0 / 255.0
+    shift = 0.0
+
+    # define map operations
+    random_crop_op = vision.RandomCrop(
+        (32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
+    random_horizontal_op = vision.RandomHorizontalFlip()
+    # interpolation default BILINEAR
+    resize_op = vision.Resize((resize_height, resize_width))
+    rescale_op = vision.Rescale(rescale, shift)
+    normalize_op = vision.Normalize(
+        (0.4465, 0.4822, 0.4914), (0.2010, 0.1994, 0.2023))
+    changeswap_op = vision.HWC2CHW()
+    type_cast_op = C.TypeCast(mstype.int32)
+
+    c_trans = []
+    if training:
+        c_trans = [random_crop_op, random_horizontal_op]
+    c_trans += [resize_op, rescale_op, normalize_op,
+                changeswap_op]
+
+    # apply map operations on images
+    data_set = data_set.map(input_columns="label", operations=type_cast_op)
+    data_set = data_set.map(input_columns="image", operations=c_trans)
+
+    # apply shuffle operations
+    data_set = data_set.shuffle(buffer_size=1000)
+
+    # apply batch operations
+    data_set = data_set.batch(batch_size=batch_size, drop_remainder=True)
+
+    # apply repeat operations
+    data_set = data_set.repeat(repeat_num)
+
+    return data_set
+
+
+class CrossEntropyLoss(nn.Cell):
+    def __init__(self):
+        super(CrossEntropyLoss, self).__init__()
+        self.cross_entropy = P.SoftmaxCrossEntropyWithLogits()
+        self.mean = P.ReduceMean()
+        self.one_hot = P.OneHot()
+        self.one = Tensor(1.0, mstype.float32)
+        self.zero = Tensor(0.0, mstype.float32)
+
+    def construct(self, logits, label):
+        label = self.one_hot(label, F.shape(logits)[1], self.one, self.zero)
+        loss = self.cross_entropy(logits, label)[0]
+        loss = self.mean(loss, (-1,))
+        return loss
+
+
+class LossGet(Callback):
+    def __init__(self, per_print_times=1):
+        super(LossGet, self).__init__()
+        if not isinstance(per_print_times, int) or per_print_times < 0:
+            raise ValueError("print_step must be int and >= 0.")
+        self._per_print_times = per_print_times
+        self._loss = 0.0
+
+    def step_end(self, run_context):
+        cb_params = run_context.original_args()
+        loss = cb_params.net_outputs
+
+        if isinstance(loss, (tuple, list)):
+            if isinstance(loss[0], Tensor) and isinstance(loss[0].asnumpy(), np.ndarray):
+                loss = loss[0]
+
+        if isinstance(loss, Tensor) and isinstance(loss.asnumpy(), np.ndarray):
+            loss = np.mean(loss.asnumpy())
+
+        cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
+
+        if isinstance(loss, float) and (np.isnan(loss) or np.isinf(loss)):
+            raise ValueError("epoch: {} step: {}. Invalid loss, terminating training."
+                             .format(cb_params.cur_epoch_num, cur_step_in_epoch))
+        if self._per_print_times != 0 and cb_params.cur_step_num % self._per_print_times == 0:
+            self._loss = loss
+            print("epoch: %s step: %s, loss is %s" % (cb_params.cur_epoch_num, cur_step_in_epoch, loss))
+
+    def get_loss(self):
+        return self._loss
+
+
+def train_process(device_id, epoch_size, num_classes, device_num, batch_size):
+    os.system("mkdir " + str(device_id))
+    os.chdir(str(device_id))
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(enable_task_sink=True, device_id=device_id)
+    context.set_context(enable_loop_sink=True)
+    context.set_context(enable_mem_reuse=True)
+    context.set_context(mode=context.GRAPH_MODE)
+    net = resnet50(batch_size, num_classes)
+    loss = CrossEntropyLoss()
+    opt = Momentum(filter(lambda x: x.requires_grad,
+                          net.get_parameters()), 0.01, 0.9)
+
+    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
+
+    dataset = create_dataset(epoch_size, training=True, batch_size=batch_size)
+    batch_num = dataset.get_dataset_size()
+    config_ck = CheckpointConfig(save_checkpoint_steps=batch_num, keep_checkpoint_max=1)
+    ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10_device_id_" + str(device_id), directory="./",
+                                 config=config_ck)
+    loss_cb = LossGet()
+    model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb])
+
+
+def eval(batch_size, num_classes):
+    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(enable_task_sink=True, device_id=0)
+    context.set_context(enable_loop_sink=True)
+    context.set_context(enable_mem_reuse=True)
+
+    net = resnet50(batch_size, num_classes)
+    loss = CrossEntropyLoss()
+    opt = Momentum(filter(lambda x: x.requires_grad,
+                          net.get_parameters()), 0.01, 0.9)
+
+    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
+    checkpoint_path = "./train_resnet_cifar10_device_id_0-1_1562.ckpt"
+    param_dict = load_checkpoint(checkpoint_path)
+    load_param_into_net(net, param_dict)
+    net.set_train(False)
+    eval_dataset = create_dataset(1, training=False)
+    res = model.eval(eval_dataset)
+    print("result: ", res)
+    return res
+
+
+@pytest.mark.level0
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.env_onecard
+def test_resnet_cifar_1p():
+    device_num = 1
+    epoch_size = 1
+    num_classes = 10
+    batch_size = 32
+    device_id = 0
+    train_process(device_id, epoch_size, num_classes, device_num, batch_size)
+    time.sleep(3)
+    acc = eval(batch_size, num_classes)
+    os.chdir("../")
+    os.system("rm -rf " + str(device_id))
+    print("End training...")
+    assert (acc['acc'] > 0.35)

From 43208b91dbdc3c6651b78ce81d9dc594bc7fc9b7 Mon Sep 17 00:00:00 2001
From: laiyongqiang <laiyongqiang1@huawei.com>
Date: Wed, 29 Apr 2020 17:08:48 +0800
Subject: [PATCH 196/242] reorder getnext for getnex parallel

---
 mindspore/ccsrc/device/kernel_adjust.cc | 19 +++++++++++++++++++
 mindspore/ccsrc/device/kernel_adjust.h  |  1 +
 2 files changed, 20 insertions(+)

diff --git a/mindspore/ccsrc/device/kernel_adjust.cc b/mindspore/ccsrc/device/kernel_adjust.cc
index e8f38aa339..596cf6790d 100644
--- a/mindspore/ccsrc/device/kernel_adjust.cc
+++ b/mindspore/ccsrc/device/kernel_adjust.cc
@@ -55,6 +55,24 @@ void KernelAdjust::Reorder(const std::shared_ptr<session::KernelGraph> &kernel_g
   kernel_graph_ptr->set_execution_order(new_order_list);
 }
 
+void KernelAdjust::ReorderGetNext(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) {
+  MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
+  const std::vector<CNodePtr> &origin_cnode_list = kernel_graph_ptr->execution_order();
+  std::vector<CNodePtr> getnext_list;
+  std::vector<CNodePtr> other_list;
+  for (const auto &cnode : origin_cnode_list) {
+    if (AnfAlgo::GetCNodeName(cnode) == kGetNextOpName) {
+      getnext_list.emplace_back(cnode);
+    } else {
+      other_list.emplace_back(cnode);
+    }
+  }
+  std::vector<CNodePtr> new_order_list;
+  new_order_list.insert(new_order_list.end(), getnext_list.begin(), getnext_list.end());
+  new_order_list.insert(new_order_list.end(), other_list.begin(), other_list.end());
+  kernel_graph_ptr->set_execution_order(new_order_list);
+}
+
 bool KernelAdjust::NeedInsertSwitch() {
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
@@ -124,6 +142,7 @@ void KernelAdjust::InsertSwitchLoop(const std::shared_ptr<session::KernelGraph>
     return;
   }
   MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
+  ReorderGetNext(kernel_graph_ptr);
   std::map<std::string, mindspore::ParameterPtr> switch_loop_input;
   CreateSwitchOpParameters(kernel_graph_ptr, &switch_loop_input);
 
diff --git a/mindspore/ccsrc/device/kernel_adjust.h b/mindspore/ccsrc/device/kernel_adjust.h
index 3dced257c1..4c69641a34 100644
--- a/mindspore/ccsrc/device/kernel_adjust.h
+++ b/mindspore/ccsrc/device/kernel_adjust.h
@@ -63,6 +63,7 @@ class KernelAdjust {
   KernelAdjust() = default;
   ~KernelAdjust() = default;
 
+  void ReorderGetNext(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
   CNodePtr CreateRecvApplyKernel(const std::shared_ptr<session::KernelGraph> &graph_ptr, uint32_t event_id);
   CNodePtr CreateSendApplyKernel(const std::shared_ptr<session::KernelGraph> &graph_ptr, uint32_t event_id);
   uint32_t FindFirstStreamSwitchLabel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);

From 9fd9a1af71270e2eb8171cae6f215543020b1762 Mon Sep 17 00:00:00 2001
From: lianliguang <lianliguang@huawei.com>
Date: Wed, 29 Apr 2020 15:31:41 +0800
Subject: [PATCH 197/242] add warning info to statistics how much nodes using
 raise or reduce to selected kernel info

---
 .../ccsrc/device/ascend/kernel_select_ascend.cc |  7 +++++--
 .../ccsrc/device/ascend/kernel_select_ascend.h  |  2 +-
 mindspore/ccsrc/session/ascend_session.cc       | 17 ++++++++++++++++-
 mindspore/ccsrc/utils/utils.h                   |  5 ++++-
 4 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
index d8779bc550..9e54adc635 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
+++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.cc
@@ -342,7 +342,7 @@ void AddNodeAndKernelDataType(const CNodePtr &kernel_node, const kernel::KernelB
                               std::vector<int> *node_mix_precision_datatype_index) {
   MS_EXCEPTION_IF_NULL(node_mix_precision_datatype);
   bool add_node_datatype_flag = false;
-  if (node_mix_precision_datatype->size() == 0) {
+  if (node_mix_precision_datatype->empty()) {
     add_node_datatype_flag = true;
   }
   for (size_t input_index = 0; input_index < kernel_build_info.GetInputNum(); ++input_index) {
@@ -464,8 +464,9 @@ std::vector<std::shared_ptr<kernel::KernelBuildInfo>> FilterRaisedOrReducePrecis
 }
 }  // namespace
 
-void SelectKernelInfo(const CNodePtr &kernel_node) {
+int SelectKernelInfo(const CNodePtr &kernel_node) {
   std::vector<std::shared_ptr<kernel::KernelBuildInfo>> kernel_info_list;
+  int status = kStatusAllMatched;
   MS_EXCEPTION_IF_NULL(kernel_node);
   bool precision_reduce = false;
   std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info = nullptr;
@@ -486,11 +487,13 @@ void SelectKernelInfo(const CNodePtr &kernel_node) {
                               << "] cannot find valid kernel info, not supported the type" << buffer.str();
     } else {
       PrintRaiseOrReducePrecisionSelectedInfo(kernel_node, selected_kernel_info, precision_reduce);
+      status = precision_reduce ? kStatusReducePrecision : kStatusRaisePrecision;
     }
   }
   AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info, kernel_node.get());
   // Set format and data type for input tensor.
   SetTensorDeviceInfo(*selected_kernel_info, kernel_node);
+  return status;
 }
 
 bool CheckKernelAccuracySupported(const CNodePtr &kernel_node,
diff --git a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h b/mindspore/ccsrc/device/ascend/kernel_select_ascend.h
index 100cd8e1e1..af353815bf 100644
--- a/mindspore/ccsrc/device/ascend/kernel_select_ascend.h
+++ b/mindspore/ccsrc/device/ascend/kernel_select_ascend.h
@@ -21,7 +21,7 @@
 namespace mindspore {
 namespace device {
 namespace ascend {
-void SelectKernelInfo(const CNodePtr &kernel_node);
+int SelectKernelInfo(const CNodePtr &kernel_node);
 bool CheckKernelAccuracySupported(const CNodePtr &kernel_node, const kernel::KernelBuildInfoPtr &new_kernel_build_info);
 }  // namespace ascend
 }  // namespace device
diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc
index bd5fba6d4b..8d6e3f906a 100755
--- a/mindspore/ccsrc/session/ascend_session.cc
+++ b/mindspore/ccsrc/session/ascend_session.cc
@@ -312,10 +312,25 @@ py::tuple AscendSession::RunOp(const OpRunInfo &op_run_info, const GraphInfo &gr
 // compile graph steps
 void AscendSession::SelectKernel(const KernelGraph &kernel_graph) const {
   MS_LOG(INFO) << "Start!";
+  size_t raise_precision_count = 0;
+  size_t reduce_precision_count = 0;
   for (const auto &cnode : kernel_graph.execution_order()) {
-    device::ascend::SelectKernelInfo(cnode);
+    auto status = device::ascend::SelectKernelInfo(cnode);
+    if (status == kStatusRaisePrecision) {
+      raise_precision_count++;
+    } else if (status == kStatusReducePrecision) {
+      reduce_precision_count++;
+    }
     MS_LOG(INFO) << "Select ApplyKernel: " << cnode->DebugString();
   }
+  if (raise_precision_count > 0) {
+    MS_LOG(WARNING) << "There has " << raise_precision_count
+                    << " node/nodes used raise precision to selected the kernel!";
+  }
+  if (reduce_precision_count > 0) {
+    MS_LOG(WARNING) << "There has " << reduce_precision_count
+                    << " node/nodes used reduce precision to selected the kernel!";
+  }
   MS_LOG(INFO) << "Finish!";
 }
 
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index 59d7f27c11..2624defced 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -184,7 +184,10 @@ constexpr auto kControlDependBehindIndex = 2;
 // index define of depend
 constexpr auto kRealInputIndexInDepend = 1;
 constexpr auto kDependAttachNodeIndex = 2;
-
+// status of kernel select result
+const int kStatusReducePrecision = -1;
+const int kStatusRaisePrecision = 1;
+const int kStatusAllMatched = 0;
 // format
 constexpr auto kOpFormat_DEFAULT = "DefaultFormat";
 constexpr auto kOpFormat_NC1KHKWHWC0 = "NC1KHKWHWC0";

From b96df362f8dd00ae7152d9dbf94c4da63c7d0202 Mon Sep 17 00:00:00 2001
From: guohongzilong <2713219276@qq.com>
Date: Wed, 29 Apr 2020 17:55:12 +0800
Subject: [PATCH 198/242] add parser of case which parameter in tuple in run_op
 function

---
 mindspore/ops/primitive.py                          |  4 ++++
 tests/ut/python/pynative_mode/ops/test_multitype.py | 11 +++++++++++
 2 files changed, 15 insertions(+)

diff --git a/mindspore/ops/primitive.py b/mindspore/ops/primitive.py
index 24c81003bd..78e8778c52 100644
--- a/mindspore/ops/primitive.py
+++ b/mindspore/ops/primitive.py
@@ -329,6 +329,10 @@ def _run_op(obj, op_name, args):
         if hasattr(arg, '__parameter__'):
             op_inputs.append(arg.default_input)
             op_mask[i] = 1
+        elif isinstance(arg, tuple):
+            convert = lambda x: x.default_input if hasattr(x, '__parameter__') else x
+            args_ = tuple(convert(x) for x in arg)
+            op_inputs.append(args_)
         else:
             op_inputs.append(arg)
     output = real_run_op(obj, op_name, tuple(op_inputs), tuple(op_mask))
diff --git a/tests/ut/python/pynative_mode/ops/test_multitype.py b/tests/ut/python/pynative_mode/ops/test_multitype.py
index 0073041b96..58fd31256d 100644
--- a/tests/ut/python/pynative_mode/ops/test_multitype.py
+++ b/tests/ut/python/pynative_mode/ops/test_multitype.py
@@ -16,6 +16,7 @@
 import numpy as np
 
 from mindspore.common.api import ms_function
+from mindspore.common.parameter import Parameter
 from mindspore.ops import Primitive
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
@@ -24,6 +25,7 @@ from ...ut_filter import non_graph_engine
 
 
 tensor_add = P.TensorAdd()
+op_add = P.AddN()
 scala_add = Primitive('scalar_add')
 add = C.MultitypeFuncGraph('add')
 
@@ -50,5 +52,14 @@ def test_multitype_tensor():
     mainf(tensor1, tensor2)
 
 
+@non_graph_engine
+def test_multitype_tuple():
+    tensor1 = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32'))
+    params1 = Parameter(tensor1, name="params1")
+    tensor2 = Tensor(np.array([[1.2, 2.1], [2.2, 3.2]]).astype('float32'))
+    output = op_add((params1, tensor2))
+    assert output == Tensor(np.array([[2.4, 4.2], [4.4, 6.4]]).astype('float32'))
+
+
 def test_multitype_scalar():
     mainf(1, 2)

From e31db0e1f7eed759d55fdaa8d52cd73f22175c48 Mon Sep 17 00:00:00 2001
From: xiefangqi <xiefangqi2@huawei.com>
Date: Wed, 29 Apr 2020 17:51:02 +0800
Subject: [PATCH 199/242] minddata fix gpu issue

---
 mindspore/dataset/engine/iterators.py | 31 +++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/mindspore/dataset/engine/iterators.py b/mindspore/dataset/engine/iterators.py
index ebee204b37..2cf95aa086 100644
--- a/mindspore/dataset/engine/iterators.py
+++ b/mindspore/dataset/engine/iterators.py
@@ -17,6 +17,7 @@
 from abc import abstractmethod
 import copy
 import weakref
+from importlib import import_module
 
 from mindspore._c_dataengine import DEPipeline
 from mindspore._c_dataengine import OpName
@@ -24,14 +25,29 @@ from mindspore._c_dataengine import OpName
 from mindspore import log as logger
 from . import datasets as de
 
+try:
+    context = import_module("mindspore.context")
+except ModuleNotFoundError:
+    context = None
+
 ITERATORS_LIST = list()
 
 
 def _cleanup():
+    """Release all the Iterator."""
     for itr_ref in ITERATORS_LIST:
-        itr = itr_ref()
-        if itr is not None:
-            itr.release()
+        if context:
+            device_type = context.get_context("device_target")
+            if device_type == "GPU":
+                itr_ref.release()
+            else:
+                itr = itr_ref()
+                if itr is not None:
+                    itr.release()
+        else:
+            itr = itr_ref()
+            if itr is not None:
+                itr.release()
 
 
 def alter_tree(node):
@@ -85,7 +101,14 @@ class Iterator:
     """
 
     def __init__(self, dataset):
-        ITERATORS_LIST.append(weakref.ref(self))
+        if context:
+            device_type = context.get_context("device_target")
+            if device_type == "GPU":
+                ITERATORS_LIST.append(self)
+            else:
+                ITERATORS_LIST.append(weakref.ref(self))
+        else:
+            ITERATORS_LIST.append(weakref.ref(self))
         # create a copy of tree and work on it.
         self.dataset = copy.deepcopy(dataset)
         self.dataset = alter_tree(self.dataset)

From 84691e0cdf720435f2f1ea6253bb295a650e2fb6 Mon Sep 17 00:00:00 2001
From: lizhenyu <lizhenyu13@huawei.com>
Date: Wed, 29 Apr 2020 17:13:43 +0800
Subject: [PATCH 200/242] fix pylint and codedex warnings

---
 mindspore/_akg/gpu/squeeze_grad.py            |  4 ++--
 mindspore/_akg/message.py                     | 19 +++++-----------
 mindspore/_akg/op_build.py                    |  6 ++---
 mindspore/_akg/ops/math/mean.py               |  4 ++--
 .../_akg/ops/math/{sum.py => sum_value.py}    |  0
 mindspore/ccsrc/kernel/common_utils.cc        | 22 +++++++++----------
 .../ops/_op_impl/akg/gpu/squeeze_grad.py      |  1 -
 7 files changed, 24 insertions(+), 32 deletions(-)
 rename mindspore/_akg/ops/math/{sum.py => sum_value.py} (100%)

diff --git a/mindspore/_akg/gpu/squeeze_grad.py b/mindspore/_akg/gpu/squeeze_grad.py
index 8180ff9638..ae31de8e84 100644
--- a/mindspore/_akg/gpu/squeeze_grad.py
+++ b/mindspore/_akg/gpu/squeeze_grad.py
@@ -15,14 +15,14 @@
 """squeeze grad"""
 import _akg.topi as topi
 
-def SqueezeGrad(y_grad, x_shape, axis=None):
+
+def SqueezeGrad(y_grad, x_shape):
     """
     Computes gradients for squeeze op.
 
     Args:
         y_grad (tvm.tensor.Tensor): the gradient needed to be propagation.
         x_shape (Union[list, tuple]): output Tensor shape.
-        axis (Union[list, tuple, int, None], optional): eliminated axis by squeeze.
 
     Returns:
         tvm.tensor.Tensor: output gradient.
diff --git a/mindspore/_akg/message.py b/mindspore/_akg/message.py
index 4528771848..3d1f81f914 100644
--- a/mindspore/_akg/message.py
+++ b/mindspore/_akg/message.py
@@ -46,7 +46,8 @@ def compilewithjson(json_str):
         impl_path = os.path.realpath(kernel_info['impl_path'])
         if os.path.isfile(impl_path):
             custom_mod_name = Path(impl_path).resolve().stem
-            mod_spec = importlib.util.spec_from_file_location(custom_mod_name, impl_path)
+            mod_spec = importlib.util.spec_from_file_location(
+                custom_mod_name, impl_path)
             custom_mod = importlib.util.module_from_spec(mod_spec)
             mod_spec.loader.exec_module(custom_mod)
             op_func = getattr(custom_mod, op_name, None)
@@ -57,7 +58,8 @@ def compilewithjson(json_str):
             op_func = getattr(gpu, op_name, None)
 
     if op_func is None:
-        logging.error("this op not supported, please check op name %s", str(op_name))
+        logging.error(
+            "this op not supported, please check op name %s", str(op_name))
         return False
 
     args = {}
@@ -87,25 +89,16 @@ def compilewithjson(json_str):
 
     output = op_func(**args)
 
-    schedule_func = None
-    attrs = {}
     if isinstance(output, (list, tuple)):
         from inspect import isfunction
         tmp_outputs = []
         for elem in output:
-            if isfunction(elem):
-                schedule_func = elem
-            elif isinstance(elem, dict):
-                for key, value in elem.items():
-                    if key not in attrs or not attrs[key]:
-                        attrs[key] = value
-            else:
+            if not isfunction(elem) or isinstance(elem, dict):
                 tmp_outputs.append(elem)
 
         output = tmp_outputs
     else:
         output = [output]
 
-
     tsr = tsr + [i for i in output if TensorUtils.is_output_value(i)]
-    return op_build([op_name], output, tsr, schedule_func, processor, kernel_info['op'], attrs)
+    return op_build([op_name], output, tsr, processor, kernel_info['op'])
diff --git a/mindspore/_akg/op_build.py b/mindspore/_akg/op_build.py
index aa6a65cff1..92101f657e 100644
--- a/mindspore/_akg/op_build.py
+++ b/mindspore/_akg/op_build.py
@@ -25,8 +25,8 @@ from _akg import save_gpu_param as gpu_utils
 from _akg.utils import validation_check as vc_util
 
 
-@vc_util.check_input_type(list, (list, tuple), (list, tuple), (types.FunctionType, type(None)), str, str, dict)
-def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attrs):
+@vc_util.check_input_type(list, (list, tuple), (list, tuple), str, str)
+def op_build(opnames, computes, args, device, kernel_name):
     """op_build"""
     kernel_meta_path = "./cuda_meta_" + str(os.getpid()) + "/"
     if device == "cuda":
@@ -60,7 +60,7 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr
                     kernel_info = (ptx_code, json_file, kernel_name)
                     gpu_utils.save_gpu_params(s, args, kernel_info)
             os.chmod(ptx_file, 0o400)
-        except Exception:
+        except IOError:
             logging.error(traceback.format_exc())
             return None
         return True
diff --git a/mindspore/_akg/ops/math/mean.py b/mindspore/_akg/ops/math/mean.py
index 8764387d33..e8300f22fc 100644
--- a/mindspore/_akg/ops/math/mean.py
+++ b/mindspore/_akg/ops/math/mean.py
@@ -17,7 +17,7 @@ import _akg.topi
 import _akg.tvm
 from _akg.utils import format_transform as ft_util
 from _akg.utils import validation_check as vc_util
-from _akg.ops.math import sum
+from _akg.ops.math import sum_value
 
 
 @vc_util.check_input_type(_akg.tvm.tensor.Tensor, (list, tuple, int, type(None)), (bool, type(None)))
@@ -41,7 +41,7 @@ def mean(data, axis=None, keepdims=False):
     count = 1
     for i in axis:
         count *= shape[i]
-    output, _ = sum.sum_value(data, axis, keepdims)
+    output, _ = sum_value.sum_value(data, axis, keepdims)
     res = _akg.topi.divide(output, count)
 
     return res
diff --git a/mindspore/_akg/ops/math/sum.py b/mindspore/_akg/ops/math/sum_value.py
similarity index 100%
rename from mindspore/_akg/ops/math/sum.py
rename to mindspore/_akg/ops/math/sum_value.py
diff --git a/mindspore/ccsrc/kernel/common_utils.cc b/mindspore/ccsrc/kernel/common_utils.cc
index 8316116486..54980c2cb7 100644
--- a/mindspore/ccsrc/kernel/common_utils.cc
+++ b/mindspore/ccsrc/kernel/common_utils.cc
@@ -131,18 +131,18 @@ void KernelMeta::Initialize() {
 }
 
 void KernelMeta::RemoveKernelCache() {
-  if (access(kernel_meta_path_.c_str(), 0) == 0) {
-    DIR *dir = opendir(kernel_meta_path_.c_str());
-    MS_EXCEPTION_IF_NULL(dir);
-    struct dirent *entry;
-    while ((entry = readdir(dir)) != nullptr) {
-      std::string kernel_file = entry->d_name;
-      std::string kernel_file_realpath = kernel_meta_path_ + kernel_file;
-      (void)remove(kernel_file_realpath.c_str());
-    }
-    (void)closedir(dir);
-    (void)rmdir(kernel_meta_path_.c_str());
+  DIR *dir = opendir(kernel_meta_path_.c_str());
+  if (dir == nullptr) {
+    return;
+  }
+  struct dirent *entry;
+  while ((entry = readdir(dir)) != nullptr) {
+    std::string kernel_file = entry->d_name;
+    std::string kernel_file_realpath = kernel_meta_path_ + kernel_file;
+    (void)remove(kernel_file_realpath.c_str());
   }
+  (void)closedir(dir);
+  (void)rmdir(kernel_meta_path_.c_str());
 }
 
 std::string KernelMeta::Search(const std::string &kernel_name) const {
diff --git a/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py b/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py
index ef397ea0a7..17e45a327a 100644
--- a/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py
+++ b/mindspore/ops/_op_impl/akg/gpu/squeeze_grad.py
@@ -20,7 +20,6 @@ squeeze_grad_op_info = AkgRegOp("SqueezeGrad") \
     .input(0, "y_grad") \
     .output(0, "output") \
     .attr("x_shape", "required", "listInt") \
-    .attr("axis", "optional", "listInt") \
     .dtype_format(DataType.F16_Default, DataType.F16_Default) \
     .dtype_format(DataType.F32_Default, DataType.F32_Default) \
     .get_op_info()

From 5426899569535db22b4988d6fd5f3837e290ccf3 Mon Sep 17 00:00:00 2001
From: lihongkang <lihongkang1@huawei.com>
Date: Tue, 28 Apr 2020 19:24:33 +0800
Subject: [PATCH 201/242] update mindspore/ops/operations/other_ops.py.

update mindspore/ops/operations/other_ops.py.
---
 mindspore/ops/operations/other_ops.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py
index 5e66050d9a..f2c0fccca9 100644
--- a/mindspore/ops/operations/other_ops.py
+++ b/mindspore/ops/operations/other_ops.py
@@ -76,8 +76,13 @@ class BoundingBoxEncode(PrimitiveWithInfer):
         Tensor, encoded bounding boxes.
 
     Examples:
+        >>> anchor_box = Tensor([[4,1,2,1],[2,2,2,3]],mindspore.float32)
+        >>> groundtruth_box = Tensor([[3,1,2,2],[1,2,1,4]],mindspore.float32)
         >>> boundingbox_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0))
-        >>> delta_box = boundingbox_encode(anchor_box, groundtruth_box)
+        >>> boundingbox_encode(anchor_box, groundtruth_box)
+        [[5.0000000e-01  5.0000000e-01  -6.5504000e+04  6.9335938e-01]
+         [-1.0000000e+00  2.5000000e-01  0.0000000e+00  4.0551758e-01]]
+
     """
 
     @prim_attr_register
@@ -118,9 +123,14 @@ class BoundingBoxDecode(PrimitiveWithInfer):
         Tensor, decoded boxes.
 
     Examples:
+        >>> anchor_box = Tensor([[4,1,2,1],[2,2,2,3]],mindspore.float32)
+        >>> deltas = Tensor([[3,1,2,2],[1,2,1,4]],mindspore.float32)
         >>> boundingbox_decode = P.BoundingBoxDecode(means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0),
         >>>                                          max_shape=(768, 1280), wh_ratio_clip=0.016)
-        >>> bbox = boundingbox_decode(anchor_box, deltas)
+        >>> boundingbox_decode(anchor_box, deltas)
+        [[4.1953125  0.  0.  5.1953125]
+         [2.140625  0.  3.859375  60.59375]]
+
     """
 
     @prim_attr_register

From 34bfa2f7c9199c078665107e80f7ab9a2a5d4e48 Mon Sep 17 00:00:00 2001
From: jiangzhiwen <jiangzhiwen8@huawei.com>
Date: Wed, 29 Apr 2020 17:18:12 +0800
Subject: [PATCH 202/242] fix skip

---
 .../dataset/engine/datasetops/skip_op.cc      | 100 ++++++++----------
 .../ccsrc/dataset/engine/datasetops/skip_op.h |  19 +---
 tests/ut/cpp/dataset/skip_op_test.cc          |   2 +-
 tests/ut/python/dataset/test_skip.py          |  68 +++++++++++-
 4 files changed, 118 insertions(+), 71 deletions(-)

diff --git a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
index d851f2c699..a7b642d9d1 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.cc
@@ -16,6 +16,7 @@
 #include <iostream>
 #include <utility>
 
+#include "dataset/core/config_manager.h"
 #include "dataset/engine/data_buffer.h"
 #include "dataset/engine/datasetops/skip_op.h"
 #include "dataset/engine/db_connector.h"
@@ -26,7 +27,10 @@
 namespace mindspore {
 namespace dataset {
 // Builder constructor.  Creates the builder object.
-SkipOp::Builder::Builder(int32_t count) : build_max_skips_(count) {}
+SkipOp::Builder::Builder(int32_t count) : build_max_skips_(count) {
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  builder_op_connector_size_ = cfg->op_connector_size();
+}
 
 Status SkipOp::Builder::SanityCheck() const {
   if (build_max_skips_ < 0) {
@@ -39,12 +43,13 @@ Status SkipOp::Builder::SanityCheck() const {
 // The builder "build" method creates the final object.
 Status SkipOp::Builder::Build(std::shared_ptr<SkipOp> *ptr) {
   RETURN_IF_NOT_OK(SanityCheck());
-  *ptr = std::make_shared<SkipOp>(build_max_skips_);
+  *ptr = std::make_shared<SkipOp>(build_max_skips_, builder_op_connector_size_);
   return Status::OK();
 }
 
 // Constructor of the SkipOp.
-SkipOp::SkipOp(int32_t count) : PipelineOp(0), max_skips_(count), skip_count_(0) {}
+SkipOp::SkipOp(int32_t count, int32_t op_connector_size)
+    : PipelineOp(op_connector_size), max_skips_(count), skip_count_(0) {}
 
 // Destructor
 SkipOp::~SkipOp() {}
@@ -59,49 +64,6 @@ void SkipOp::Print(std::ostream &out, bool show_all) const {
       << "\nCurrent skip count: " << skip_count_ << "\nMax skip count: " << max_skips_;
 }
 
-// Since the buffer may contain multi rows, this function will drop the rows
-// that need to skip in it, and then return the buffer.
-Status SkipOp::GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id, bool retry_if_eoe) {
-  if (child_.empty()) {
-    RETURN_STATUS_UNEXPECTED("SkipOp can't be the leaf node.");
-  }
-
-  std::unique_ptr<DataBuffer> buf;
-  RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true));
-
-  // Drop first max_skips_ rows
-  while (skip_count_ < max_skips_) {
-    if (buf->eoe() || buf->eof()) {
-      break;
-    }
-
-    // Consider the rows of buffer more than 1
-    TensorRow drop_row;
-    int row_num = buf->NumRows();
-    int drop_num = row_num + skip_count_ < max_skips_ ? row_num : max_skips_ - skip_count_;
-    skip_count_ += drop_num;
-    for (int i = 0; i < drop_num; i++) {
-      RETURN_IF_NOT_OK(buf->PopRow(&drop_row));
-    }
-    if (buf->NumRows() == 0) {
-      RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true));
-    }
-  }
-
-  // Handling eoe
-  if (buf->eoe()) {
-    RETURN_IF_NOT_OK(EoeReceived(worker_id));
-  }
-
-  // Handling eof
-  if (buf->eof()) {
-    RETURN_IF_NOT_OK(EofReceived(worker_id));
-  }
-
-  *p_buffer = std::move(buf);
-  return Status::OK();
-}
-
 // Base-class override for handling cases when an eoe is received.
 Status SkipOp::EoeReceived(int32_t worker_id) {
   skip_count_ = 0;
@@ -109,13 +71,45 @@ Status SkipOp::EoeReceived(int32_t worker_id) {
   return Status::OK();
 }
 
-// Class functor operator () override.
-// Most dataset ops operate by launching a thread (see ExecutionTree).
-// However, the SkipOp is defined as a inlined operator, so it is invalid to
-// launch the functor since this op runs inlined inside another operator.  The
-// function is overloaded to ensure that it is not called by mistake (it will
-// generate an error).
-Status SkipOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. SkipOp is an inlined operator."); }
+// main entry point for skip
+Status SkipOp::operator()() {
+  TaskManager::FindMe()->Post();
+  std::unique_ptr<DataBuffer> curr_buffer;
+  RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
+  while (curr_buffer->eof() == false) {
+    // Reset count
+    skip_count_ = 0;
+    while (curr_buffer->eoe() == false) {
+      // Drop first count rows
+      while (skip_count_ < max_skips_) {
+        if (curr_buffer->eoe() || curr_buffer->eof()) {
+          break;
+        }
+        // Consider the rows of buffer more than one
+        TensorRow drop_row;
+        int row_num = curr_buffer->NumRows();
+        int drop_num = row_num + skip_count_ < max_skips_ ? row_num : max_skips_ - skip_count_;
+        skip_count_ += drop_num;
+        for (int i = 0; i < drop_num; i++) {
+          RETURN_IF_NOT_OK(curr_buffer->PopRow(&drop_row));
+        }
+        if (curr_buffer->NumRows() == 0) {
+          RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
+        }
+      }
+      RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(curr_buffer)));
+      RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
+    }
+    // we got eoe, now try again until we got eof
+    MS_LOG(DEBUG) << "Skip operator EOE Received.";
+    RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
+    RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
+  }
+
+  MS_LOG(DEBUG) << "Skip operator EOF Received.";
+  RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
+  return Status::OK();
+}
 
 // Base-class override for handling cases when an eof is received.
 Status SkipOp::EofReceived(int32_t worker_id) {
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
index 0ae520c3ad..a16b82ed21 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
@@ -42,6 +42,7 @@ class SkipOp : public PipelineOp {
 
    private:
     int32_t build_max_skips_;
+    int32_t builder_op_connector_size_;
 
     Status SanityCheck() const;
   };
@@ -49,7 +50,7 @@ class SkipOp : public PipelineOp {
   // Constructor of the SkipOp.
   // @note The builder class should be used to call it
   // @param count - The number of skips to do
-  explicit SkipOp(int32_t count);
+  explicit SkipOp(int32_t count, int32_t op_connector_size);
 
   // Destructor
   ~SkipOp();
@@ -60,23 +61,11 @@ class SkipOp : public PipelineOp {
   void Print(std::ostream &out, bool show_all) const override;
 
   // Class functor operator () override.
-  // Most dataset ops operate by launching a thread (see ExecutionTree).
-  // However, the SkipOp is defined as a inlined operator, so it is invalid to launch the
-  // functor since this op runs inlined inside another operator.  The function is overloaded to
-  // ensure that it is not called by mistake (it will generate an error).
+  // All dataset ops operate by launching a thread (see ExecutionTree). This class functor will
+  // provide the master loop that drives the logic for performing the work
   // @return Status - The error code return
   Status operator()() override;
 
-  // This function returns the buffer that is at the top of our output connector. The caller is
-  // typically our parent node, when the parent is asking us to provide the next buffer of data.
-  // Since SkipOp is an inlined op, getting a buffer from us will simply bounce you to get
-  // a buffer from our child.
-  // @param p_buffer - output pointer to the buffer that it will fetch.
-  // @param worker_id - The worker id
-  // @param retry_if_eoe Set this flag to true to allow calling pop() again after the first pop() returns EOE.
-  // @return Status - The error code return
-  Status GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id, bool retry_if_eoe) override;
-
   // Base-class override for handling cases when an eoe is received.
   // @param worker_id - The worker id
   Status EoeReceived(int32_t worker_id) override;
diff --git a/tests/ut/cpp/dataset/skip_op_test.cc b/tests/ut/cpp/dataset/skip_op_test.cc
index c2168b24d4..697745512d 100644
--- a/tests/ut/cpp/dataset/skip_op_test.cc
+++ b/tests/ut/cpp/dataset/skip_op_test.cc
@@ -47,7 +47,7 @@ TEST_F(MindDataTestSkipOp, TestSkipOpFuntions) {
   ASSERT_TRUE(rc.IsOk());
 
   // SkipOp
-  std::shared_ptr<SkipOp> skip_op = std::make_shared<SkipOp>(5);
+  std::shared_ptr<SkipOp> skip_op = std::make_shared<SkipOp>(5, 2);
   rc = my_tree->AssociateNode(skip_op);
   ASSERT_TRUE(rc.IsOk());
 
diff --git a/tests/ut/python/dataset/test_skip.py b/tests/ut/python/dataset/test_skip.py
index 59893f6ded..ccbf40a55b 100644
--- a/tests/ut/python/dataset/test_skip.py
+++ b/tests/ut/python/dataset/test_skip.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 import numpy as np
 
 import mindspore.dataset.transforms.vision.c_transforms as vision
@@ -51,7 +50,7 @@ def generator_md():
 
 
 def test_generator_skip():
-    ds1 = ds.GeneratorDataset(generator_md, ["data"])
+    ds1 = ds.GeneratorDataset(generator_md, ["data"], num_parallel_workers=4)
 
     # Here ds1 should be [3, 4]
     ds1 = ds1.skip(3)
@@ -60,6 +59,7 @@ def test_generator_skip():
     for data in ds1:
         buf.append(data[0][0])
     assert len(buf) == 2
+    assert buf == [3, 4]
 
 
 def test_skip_1():
@@ -72,6 +72,7 @@ def test_skip_1():
     for data in ds1:
         buf.append(data[0][0])
     assert len(buf) == 0
+    assert buf == []
 
 
 def test_skip_2():
@@ -84,6 +85,7 @@ def test_skip_2():
     for data in ds1:
         buf.append(data[0][0])
     assert len(buf) == 5
+    assert buf == [0, 1, 2, 3, 4]
 
 
 def test_skip_repeat_1():
@@ -99,6 +101,7 @@ def test_skip_repeat_1():
     for data in ds1:
         buf.append(data[0][0])
     assert len(buf) == 7
+    assert buf == [3, 4, 0, 1, 2, 3, 4]
 
 
 def test_skip_repeat_2():
@@ -114,6 +117,7 @@ def test_skip_repeat_2():
     for data in ds1:
         buf.append(data[0][0])
     assert len(buf) == 4
+    assert buf == [3, 4, 3, 4]
 
 
 def test_skip_repeat_3():
@@ -132,6 +136,62 @@ def test_skip_repeat_3():
     for data in ds1:
         buf.append(data[0][0])
     assert len(buf) == 6
+    assert buf == [3, 4, 3, 4, 3, 4]
+
+def test_skip_take_1():
+    ds1 = ds.GeneratorDataset(generator_md, ["data"])
+
+    # Here ds1 should be [0, 1, 2, 3]
+    ds1 = ds1.take(4)
+
+    # Here ds1 should be [2, 3]
+    ds1 = ds1.skip(2)
+
+    buf = []
+    for data in ds1:
+        buf.append(data[0][0])
+    assert len(buf) == 2
+    assert buf == [2, 3]
+
+def test_skip_take_2():
+    ds1 = ds.GeneratorDataset(generator_md, ["data"])
+
+    # Here ds1 should be [2, 3, 4]
+    ds1 = ds1.skip(2)
+
+    # Here ds1 should be [2, 3]
+    ds1 = ds1.take(2)
+
+    buf = []
+    for data in ds1:
+        buf.append(data[0][0])
+    assert len(buf) == 2
+    assert buf == [2, 3]
+
+
+def generator_1d():
+    for i in range(64):
+        yield (np.array([i]), )
+
+def test_skip_filter_1():
+    dataset = ds.GeneratorDataset(generator_1d, ['data'])
+    dataset = dataset.skip(5)
+    dataset = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4)
+
+    buf = []
+    for item in dataset:
+        buf.append(item[0][0])
+    assert buf == [5, 6, 7, 8, 9, 10]
+
+def test_skip_filter_2():
+    dataset = ds.GeneratorDataset(generator_1d, ['data'])
+    dataset = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4)
+    dataset = dataset.skip(5)
+
+    buf = []
+    for item in dataset:
+        buf.append(item[0][0])
+    assert buf == [5, 6, 7, 8, 9, 10]
 
 
 if __name__ == "__main__":
@@ -142,3 +202,7 @@ if __name__ == "__main__":
     test_skip_repeat_1()
     test_skip_repeat_2()
     test_skip_repeat_3()
+    test_skip_take_1()
+    test_skip_take_2()
+    test_skip_filter_1()
+    test_skip_filter_2()

From 44186d7338ff67b818eb19545c9677950d6b2bbb Mon Sep 17 00:00:00 2001
From: Xiaoda Zhang <zhangxiaoda@huawei.com>
Date: Wed, 29 Apr 2020 19:47:59 +0800
Subject: [PATCH 203/242] fix some code-style warnings

---
 mindspore/ccsrc/parallel/step_parallel.cc                        | 1 -
 .../parallel/strategy_checkpoint/parallel_strategy_checkpoint.h  | 1 -
 2 files changed, 2 deletions(-)

diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index 62fb96c297..a96df98e5d 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -1427,7 +1427,6 @@ void ExtractInformation(const std::vector<AnfNodePtr> &all_nodes) {
       std::string strategy_key_name = cnode->scope()->name() + std::string(CONNSYMBOL) + instance_name;
       bool load_strategy_from_ckpt =
         StrategyCheckpoint::GetInstance().LoadCheckPointOn() && stra_map.find(strategy_key_name) != stra_map.end();
-
       if (!StrategyFound(attrs) && !load_strategy_from_ckpt) {
         MS_LOG(INFO) << "ExtractInformation: the strategy of node " << node->ToString() << " prim " << prim->name()
                      << " is empty, using batch parallel";
diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
index 0cf6229fa3..a758a9e7bb 100644
--- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
+++ b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
@@ -25,7 +25,6 @@
 
 namespace mindspore {
 namespace parallel {
-
 using StrategyMap = std::unordered_map<std::string, StrategyPtr>;
 class StrategyCheckpoint {
  public:

From e4517964cb33b148135e405071e316f146d6752e Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Wed, 29 Apr 2020 20:33:00 +0800
Subject: [PATCH 204/242] support relative and full paths

---
 .../run_distribute_train.sh                   | 26 ++++++++++++++-----
 .../run_standalone_train.sh                   | 16 +++++++++---
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/example/resnet101_imagenet2012/run_distribute_train.sh b/example/resnet101_imagenet2012/run_distribute_train.sh
index 5165f58cab..ecdcd66859 100755
--- a/example/resnet101_imagenet2012/run_distribute_train.sh
+++ b/example/resnet101_imagenet2012/run_distribute_train.sh
@@ -20,23 +20,35 @@ then
 exit 1
 fi
 
-if [ ! -f $1 ]
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+PATH1=$(get_real_path $1)
+PATH2=$(get_real_path $2)
+echo $PATH1
+echo $PATH2
+
+if [ ! -f $PATH1 ]
 then 
-    echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
+    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file"
 exit 1
 fi 
 
-if [ ! -d $2 ]
+if [ ! -d $PATH2 ]
 then 
-    echo "error: DATASET_PATH=$2 is not a directory"
+    echo "error: DATASET_PATH=$PATH2 is not a directory"
 exit 1
 fi 
 
 ulimit -u unlimited
 export DEVICE_NUM=8
 export RANK_SIZE=8
-export MINDSPORE_HCCL_CONFIG_PATH=$1
-export RANK_TABLE_FILE=$1
+export MINDSPORE_HCCL_CONFIG_PATH=$PATH1
+export RANK_TABLE_FILE=$PATH1
 
 for((i=0; i<${DEVICE_NUM}; i++))
 do
@@ -49,6 +61,6 @@ do
     cd ./train_parallel$i || exit
     echo "start training for rank $RANK_ID, device $DEVICE_ID"
     env > env.log
-    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log &
+    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log &
     cd ..
 done
diff --git a/example/resnet101_imagenet2012/run_standalone_train.sh b/example/resnet101_imagenet2012/run_standalone_train.sh
index 9ba5742515..dde018b8eb 100755
--- a/example/resnet101_imagenet2012/run_standalone_train.sh
+++ b/example/resnet101_imagenet2012/run_standalone_train.sh
@@ -20,9 +20,19 @@ then
 exit 1
 fi
 
-if [ ! -d $1 ]
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+PATH1=$(get_real_path $1)
+echo $PATH1
+
+if [ ! -d $PATH1 ]
 then 
-    echo "error: DATASET_PATH=$1 is not a directory"
+    echo "error: DATASET_PATH=$PATH1 is not a directory"
 exit 1
 fi 
 
@@ -42,5 +52,5 @@ cp *.sh ./train
 cd ./train || exit
 echo "start training for device $DEVICE_ID"
 env > env.log
-python train.py --do_train=True --dataset_path=$1 &> log &
+python train.py --do_train=True --dataset_path=$PATH1 &> log &
 cd ..

From 66e7a3684625b8ce6f324f05466cea817c5d4e55 Mon Sep 17 00:00:00 2001
From: zhaozhenlong <zhaozhenlong1@huawei.com>
Date: Tue, 28 Apr 2020 10:49:36 +0800
Subject: [PATCH 205/242] ImageGradients check 4d

fix DiagPart constant folding issue

fix argmin output type check

fix atan2 doc error

fix remove FusedBatchNorm and its grad
---
 mindspore/ccsrc/operator/ops.cc            |  2 ++
 mindspore/ccsrc/operator/ops.h             |  2 ++
 mindspore/ccsrc/transform/convert.cc       |  2 --
 mindspore/ccsrc/transform/op_declare.cc    | 22 ----------------------
 mindspore/ccsrc/transform/op_declare.h     |  4 ----
 mindspore/nn/layer/image.py                |  9 +++++----
 mindspore/ops/operations/array_ops.py      | 11 ++++++++---
 mindspore/ops/operations/math_ops.py       |  2 +-
 tests/ut/cpp/transform/convert_test.cc     |  8 ++++----
 tests/ut/python/nn/test_image_gradients.py |  8 ++++++++
 tests/ut/python/ops/test_array_ops.py      | 16 +++++++++++++---
 tests/ut/python/ops/test_math_ops.py       |  1 +
 12 files changed, 44 insertions(+), 43 deletions(-)

diff --git a/mindspore/ccsrc/operator/ops.cc b/mindspore/ccsrc/operator/ops.cc
index 6510ef79ea..0a6fb0b3f6 100755
--- a/mindspore/ccsrc/operator/ops.cc
+++ b/mindspore/ccsrc/operator/ops.cc
@@ -174,6 +174,8 @@ const PrimitivePtr kPrimAvgPoolGrad = std::make_shared<Primitive>("AvgPoolGrad")
 const PrimitivePtr kPrimFusedBatchNorm = std::make_shared<Primitive>("FusedBatchNorm");
 const PrimitivePtr kPrimConv2D = std::make_shared<Primitive>("Conv2D");
 const PrimitivePtr kPrimFusedBatchNormGrad = std::make_shared<Primitive>("FusedBatchNormGrad");
+const PrimitivePtr kPrimBatchNorm = std::make_shared<Primitive>("BatchNorm");
+const PrimitivePtr kPrimBatchNormGrad = std::make_shared<Primitive>("BatchNormGrad");
 const PrimitivePtr kPrimReluGrad = std::make_shared<Primitive>("ReluGrad");
 const PrimitivePtr kPrimConv2DBackpropInput = std::make_shared<Primitive>("Conv2DBackpropInput");
 const PrimitivePtr kPrimConv2DBackpropFilter = std::make_shared<Primitive>("Conv2DBackpropFilter");
diff --git a/mindspore/ccsrc/operator/ops.h b/mindspore/ccsrc/operator/ops.h
index b37d068d94..8c63660c3e 100755
--- a/mindspore/ccsrc/operator/ops.h
+++ b/mindspore/ccsrc/operator/ops.h
@@ -175,6 +175,8 @@ extern const PrimitivePtr kPrimTanhGrad;
 extern const PrimitivePtr kPrimPooling;
 extern const PrimitivePtr kPrimPoolingGrad;
 extern const PrimitivePtr kPrimFusedBatchNorm;
+extern const PrimitivePtr kPrimBatchNorm;
+extern const PrimitivePtr kPrimBatchNormGrad;
 extern const PrimitivePtr kPrimConv2D;
 extern const PrimitivePtr kPrimMaxPool;
 extern const PrimitivePtr kPrimMaxPoolGrad;
diff --git a/mindspore/ccsrc/transform/convert.cc b/mindspore/ccsrc/transform/convert.cc
index e7ea44b555..fb98d16c26 100644
--- a/mindspore/ccsrc/transform/convert.cc
+++ b/mindspore/ccsrc/transform/convert.cc
@@ -221,7 +221,6 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
     {prim::kPrimAssign->name(), ADPT_DESC(Assign)},
     {prim::kPrimStateSetItem->name(), ADPT_DESC(Assign)},
     {prim::kPrimReluGrad->name(), ADPT_DESC(ReluGrad)},
-    {prim::kPrimFusedBatchNormGrad->name(), ADPT_DESC(FusedBatchNormGrad)},
     {prim::kPrimBiasAddGrad->name(), ADPT_DESC(BiasAddGrad)},
     {prim::kPrimConv2D->name(), ADPT_DESC(Conv2D)},
     {prim::kPrimConv2DBackpropInput->name(), ADPT_DESC(Conv2DBackpropInputD)},
@@ -229,7 +228,6 @@ std::unordered_map<std::string, OpAdapterDescPtr> &DfGraphConvertor::get_adpt_ma
     {prim::kPrimDepthwiseConv2dNative->name(), ADPT_DESC(DepthwiseConv2D)},
     {prim::kPrimDepthwiseConv2dNativeBackpropFilter->name(), ADPT_DESC(DepthwiseConv2DBackpropFilterD)},
     {prim::kPrimDepthwiseConv2dNativeBackpropInput->name(), ADPT_DESC(DepthwiseConv2DBackpropInputD)},
-    {prim::kPrimFusedBatchNorm->name(), ADPT_DESC(FusedBatchNorm, BatchNorm)},
     {string(kNameBatchNorm), ADPT_DESC(BatchNorm)},
     {string(kNameBatchNormGrad), ADPT_DESC(BatchNormGrad)},
     {string(kNameReshape), ADPT_DESC(Reshape)},
diff --git a/mindspore/ccsrc/transform/op_declare.cc b/mindspore/ccsrc/transform/op_declare.cc
index b1195cfb1c..8159204155 100644
--- a/mindspore/ccsrc/transform/op_declare.cc
+++ b/mindspore/ccsrc/transform/op_declare.cc
@@ -703,28 +703,6 @@ INPUT_MAP(ReluGrad) = {{1, INPUT_DESC(gradients)}, {2, INPUT_DESC(features)}};
 ATTR_MAP(ReluGrad) = EMPTY_ATTR_MAP;
 OUTPUT_MAP(ReluGrad) = {{0, OUTPUT_DESC(backprops)}};
 
-// FusedBatchNorm
-INPUT_MAP(FusedBatchNorm) = {
-  {1, INPUT_DESC(x)}, {2, INPUT_DESC(scale)}, {3, INPUT_DESC(b)}, {4, INPUT_DESC(mean)}, {5, INPUT_DESC(variance)}};
-ATTR_MAP(FusedBatchNorm) = {{"mode", ATTR_DESC(mode, AnyTraits<int64_t>())},
-                            {"momentum", ATTR_DESC(moving_average_fraction, AnyTraits<float>())},
-                            {"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())}};
-OUTPUT_MAP(FusedBatchNorm) = {{0, OUTPUT_DESC(y)},
-                              {1, OUTPUT_DESC(running_mean)},
-                              {2, OUTPUT_DESC(running_variance)},
-                              {3, OUTPUT_DESC(save_mean)},
-                              {4, OUTPUT_DESC(save_inv_variance)}};
-
-// FusedBatchNromGrad
-INPUT_MAP(FusedBatchNormGrad) = {{1, INPUT_DESC(dy)},
-                                 {2, INPUT_DESC(x)},
-                                 {3, INPUT_DESC(scale)},
-                                 {4, INPUT_DESC(save_mean)},
-                                 {5, INPUT_DESC(save_inv_variance)}};
-ATTR_MAP(FusedBatchNormGrad) = {{"momentum", ATTR_DESC(momentum, AnyTraits<float>())},
-                                {"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())}};
-OUTPUT_MAP(FusedBatchNormGrad) = {{0, OUTPUT_DESC(dx)}, {1, OUTPUT_DESC(bn_scale)}, {2, OUTPUT_DESC(bn_bias)}};
-
 // BiasAddGrad
 INPUT_MAP(BiasAddGrad) = {{1, INPUT_DESC(x)}};
 ATTR_MAP(BiasAddGrad) = {{"data_format", ATTR_DESC(data_format, AnyTraits<std::string>())}};
diff --git a/mindspore/ccsrc/transform/op_declare.h b/mindspore/ccsrc/transform/op_declare.h
index a2dc16c285..21cac35121 100755
--- a/mindspore/ccsrc/transform/op_declare.h
+++ b/mindspore/ccsrc/transform/op_declare.h
@@ -82,10 +82,6 @@ DECLARE_OP_USE_OUTPUT(HcomAllGather)
 DECLARE_OP_ADAPTER(Variable)
 DECLARE_OP_ADAPTER(ReluGrad)
 DECLARE_OP_USE_OUTPUT(ReluGrad)
-DECLARE_OP_ADAPTER(FusedBatchNorm)
-DECLARE_OP_USE_OUTPUT(FusedBatchNorm)
-DECLARE_OP_ADAPTER(FusedBatchNormGrad)
-DECLARE_OP_USE_OUTPUT(FusedBatchNormGrad)
 DECLARE_OP_ADAPTER(BiasAddGrad)
 DECLARE_OP_USE_OUTPUT(BiasAddGrad)
 DECLARE_OP_ADAPTER(MaxPoolWithArgmax)
diff --git a/mindspore/nn/layer/image.py b/mindspore/nn/layer/image.py
index 3e139a2db5..f06c5fd30a 100644
--- a/mindspore/nn/layer/image.py
+++ b/mindspore/nn/layer/image.py
@@ -58,6 +58,7 @@ class ImageGradients(Cell):
         super(ImageGradients, self).__init__()
 
     def construct(self, images):
+        _check_input_4d(F.shape(images), "images", self.cls_name)
         batch_size, depth, height, width = P.Shape()(images)
         dy = images[:, :, 1:, :] - images[:, :, :height - 1, :]
         dy_last = P.Fill()(P.DType()(images), (batch_size, depth, 1, width), 0)
@@ -151,8 +152,8 @@ class SSIM(Cell):
         self.mean = P.DepthwiseConv2dNative(channel_multiplier=1, kernel_size=filter_size)
 
     def construct(self, img1, img2):
-        _check_input_4d(F.shape(img1), "img1", "SSIM")
-        _check_input_4d(F.shape(img2), "img2", "SSIM")
+        _check_input_4d(F.shape(img1), "img1", self.cls_name)
+        _check_input_4d(F.shape(img2), "img2", self.cls_name)
         P.SameTypeShape()(img1, img2)
         max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val)
         img1 = _convert_img_dtype_to_float32(img1, self.max_val)
@@ -244,8 +245,8 @@ class PSNR(Cell):
         self.max_val = max_val
 
     def construct(self, img1, img2):
-        _check_input_4d(F.shape(img1), "img1", "PSNR")
-        _check_input_4d(F.shape(img2), "img2", "PSNR")
+        _check_input_4d(F.shape(img1), "img1", self.cls_name)
+        _check_input_4d(F.shape(img2), "img2", self.cls_name)
         P.SameTypeShape()(img1, img2)
         max_val = _convert_img_dtype_to_float32(self.max_val, self.max_val)
         img1 = _convert_img_dtype_to_float32(img1, self.max_val)
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index abffde1865..b69b083e03 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -1016,6 +1016,7 @@ class Argmin(PrimitiveWithInfer):
         """init Argmin"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
         validator.check_value_type("axis", axis, [int], self.name)
+        validator.check_type_name("output_type", output_type, [mstype.int32, mstype.int64], self.name)
         self.axis = axis
         self.add_prim_attr('output_type', output_type)
 
@@ -1726,7 +1727,9 @@ class Diag(PrimitiveWithInfer):
     def infer_value(self, x):
         if x is None:
             return None
-        validator.check_integer("input x rank", len(x.shape()), 1, Rel.EQ, self.name)
+        # do constant-folding only when x rank is 1
+        if len(x.shape()) != 1:
+            return None
         ret = np.diag(x.asnumpy())
         return Tensor(ret)
 
@@ -1752,7 +1755,7 @@ class DiagPart(PrimitiveWithInfer):
         >>>                   [0, 0, 3, 0],
         >>>                   [0, 0, 0, 4]])
         >>> diag_part = P.DiagPart()
-        >>> diag_part(x)
+        >>> diag_part(input_x)
         [1, 2, 3, 4]
     """
 
@@ -1776,7 +1779,9 @@ class DiagPart(PrimitiveWithInfer):
     def infer_value(self, x):
         if x is None:
             return None
-        validator.check("x rank", len(x.shape()), "", 2, Rel.EQ, self.name)
+        # do constant-folding only when x rank is 2
+        if len(x.shape()) != 2:
+            return None
         ret = np.diag(x.asnumpy())
         return Tensor(ret)
 
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 1dfe93136b..a634ebbb71 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -2037,7 +2037,7 @@ class Atan2(_MathBinaryOp):
     r"""
     Returns arctangent of input_x/input_y element-wise.
 
-    It returns :math:`\theta\ \in\ (-\frac{\pi}{2}, \frac{\pi}{2})`
+    It returns :math:`\theta\ \in\ [-\pi, \pi]`
     such that :math:`x = r*\sin(\theta), y = r*\cos(\theta)`, where :math:`r = \sqrt{x^2 + y^2}`.
 
     Inputs:
diff --git a/tests/ut/cpp/transform/convert_test.cc b/tests/ut/cpp/transform/convert_test.cc
index 4388312592..0f47499665 100644
--- a/tests/ut/cpp/transform/convert_test.cc
+++ b/tests/ut/cpp/transform/convert_test.cc
@@ -147,13 +147,13 @@ TEST_F(TestConvert, TestReluOps) {
 }
 
 TEST_F(TestConvert, TestConvertBatchNorm) {
-  PrimitivePtr fused_batch_norm = prim::kPrimFusedBatchNorm;
-  fused_batch_norm->AddAttr("epsilon", MakeValue(0.001f));
-  fused_batch_norm->AddAttr("momentum", MakeValue(0.1f));
+  PrimitivePtr batch_norm = prim::kPrimBatchNorm;
+  batch_norm->AddAttr("epsilon", MakeValue(0.001f));
+  batch_norm->AddAttr("momentum", MakeValue(0.1f));
 
   FuncGraphPtr anf_graph = std::make_shared<FuncGraph>();
   std::vector<AnfNodePtr> inputs;
-  inputs.push_back(NewValueNode(fused_batch_norm));
+  inputs.push_back(NewValueNode(batch_norm));
   for (unsigned int i = 0; i < 5; i++) {
     inputs.push_back(anf_graph->add_parameter());
   }
diff --git a/tests/ut/python/nn/test_image_gradients.py b/tests/ut/python/nn/test_image_gradients.py
index a2b9495443..e268ceb9d9 100644
--- a/tests/ut/python/nn/test_image_gradients.py
+++ b/tests/ut/python/nn/test_image_gradients.py
@@ -14,6 +14,7 @@
 # ============================================================================
 """ test image gradients """
 import numpy as np
+import pytest
 import mindspore.nn as nn
 import mindspore.context as context
 import mindspore.common.dtype as mstype
@@ -47,3 +48,10 @@ def test_compile_multi_channel():
                              [[[10,20],[30,40]], [[50,60],[70,80]]]]), dtype=dtype)
     net = Net()
     _executor.compile(net, image)
+
+def test_invalid_5d_input():
+    dtype = mstype.float32
+    image = Tensor(np.random.random([4, 1, 16, 16, 1]), dtype=dtype)
+    net = Net()
+    with pytest.raises(ValueError):
+        _executor.compile(net, image)
\ No newline at end of file
diff --git a/tests/ut/python/ops/test_array_ops.py b/tests/ut/python/ops/test_array_ops.py
index 01e7e32d50..61b8d48fea 100644
--- a/tests/ut/python/ops/test_array_ops.py
+++ b/tests/ut/python/ops/test_array_ops.py
@@ -14,16 +14,15 @@
 # ============================================================================
 """ test array ops """
 import functools
+import pytest
 import numpy as np
 import mindspore as ms
 from mindspore import Tensor
 from mindspore.nn import Cell
 from mindspore.ops import operations as P
-from mindspore.ops import functional as F
-from mindspore.ops import composite as C
 from mindspore.ops import prim_attr_register
+from mindspore.common import dtype as mstype
 from mindspore.ops.primitive import Primitive, PrimitiveWithInfer
-from mindspore.common.dtype import get_py_obj_dtype
 from mindspore._c_expression import signature_dtype as sig_dtype
 from mindspore._c_expression import signature_rw as sig_rw
 from mindspore._c_expression import signature_kind as sig_kind
@@ -96,6 +95,17 @@ def test_select():
     expect = np.array([[1, 8, 9], [10, 5, 6]])
     assert np.all(output.asnumpy() == expect)
 
+def test_argmin_invalid_output_type():
+    P.Argmin(-1, mstype.int64)
+    P.Argmin(-1, mstype.int32)
+    with pytest.raises(TypeError):
+        P.Argmin(-1, mstype.float32)
+    with pytest.raises(TypeError):
+        P.Argmin(-1, mstype.float64)
+    with pytest.raises(TypeError):
+        P.Argmin(-1, mstype.uint8)
+    with pytest.raises(TypeError):
+        P.Argmin(-1, mstype.bool_)
 
 class CustomOP(PrimitiveWithInfer):
     __mindspore_signature__ = (sig_dtype.T, sig_dtype.T, sig_dtype.T1,
diff --git a/tests/ut/python/ops/test_math_ops.py b/tests/ut/python/ops/test_math_ops.py
index b866c7c556..a4a645a7ef 100755
--- a/tests/ut/python/ops/test_math_ops.py
+++ b/tests/ut/python/ops/test_math_ops.py
@@ -17,6 +17,7 @@ import functools
 import numpy as np
 import mindspore as ms
 import mindspore.nn as nn
+from mindspore.common.api import _executor
 from mindspore.common import dtype as mstype
 from mindspore.ops import prim_attr_register, PrimitiveWithInfer
 from mindspore import Tensor

From 615b60d59098bb3713875fe23c381cd7e88b422a Mon Sep 17 00:00:00 2001
From: gengdongjie <gengdongjie@huawei.com>
Date: Wed, 29 Apr 2020 11:41:10 +0800
Subject: [PATCH 206/242] 1. remove unused variable in resnet.py 2. add
 relative path support in resnet50 example 3. optimize allreuce split strategy

---
 .../resnet50_cifar10/run_distribute_train.sh  | 23 ++++++++++++++-----
 .../resnet50_cifar10/run_standalone_train.sh  | 16 ++++++++++---
 example/resnet50_cifar10/train.py             |  4 ++--
 mindspore/model_zoo/resnet.py                 |  4 ++--
 4 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/example/resnet50_cifar10/run_distribute_train.sh b/example/resnet50_cifar10/run_distribute_train.sh
index e78e2bf104..e4bdd775b3 100755
--- a/example/resnet50_cifar10/run_distribute_train.sh
+++ b/example/resnet50_cifar10/run_distribute_train.sh
@@ -20,22 +20,33 @@ then
 exit 1
 fi
 
-if [ ! -f $1 ]
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+PATH1=$(get_real_path $1)
+PATH2=$(get_real_path $2)
+
+if [ ! -f "$PATH1" ]
 then 
-    echo "error: DMINDSPORE_HCCL_CONFIG_PATH=$1 is not a file"
+    echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file"
 exit 1
 fi 
 
-if [ ! -d $2 ]
+if [ ! -d "$PATH2" ]
 then 
-    echo "error: DATASET_PATH=$2 is not a directory"
+    echo "error: DATASET_PATH=$PATH2 is not a directory"
 exit 1
 fi 
 
 ulimit -u unlimited
 export DEVICE_NUM=8
 export RANK_SIZE=8
-export MINDSPORE_HCCL_CONFIG_PATH=$1
+export MINDSPORE_HCCL_CONFIG_PATH=$PATH1
 
 for((i=0; i<${DEVICE_NUM}; i++))
 do
@@ -48,6 +59,6 @@ do
     cd ./train_parallel$i || exit
     echo "start training for rank $RANK_ID, device $DEVICE_ID"
     env > env.log
-    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$2 &> log &
+    python train.py --do_train=True --run_distribute=True --device_num=$DEVICE_NUM --dataset_path=$PATH2 &> log &
     cd ..
 done
diff --git a/example/resnet50_cifar10/run_standalone_train.sh b/example/resnet50_cifar10/run_standalone_train.sh
index 90423630aa..cb08cde6c9 100755
--- a/example/resnet50_cifar10/run_standalone_train.sh
+++ b/example/resnet50_cifar10/run_standalone_train.sh
@@ -20,9 +20,19 @@ then
 exit 1
 fi
 
-if [ ! -d $1 ]
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+PATH1=$(get_real_path $1)
+
+if [ ! -d "$PATH1" ]
 then 
-    echo "error: DATASET_PATH=$1 is not a directory"
+    echo "error: DATASET_PATH=$PATH1 is not a directory"
 exit 1
 fi 
 
@@ -41,5 +51,5 @@ cp *.sh ./train
 cd ./train || exit
 echo "start training for device $DEVICE_ID"
 env > env.log
-python train.py --do_train=True --dataset_path=$1 &> log &
+python train.py --do_train=True --dataset_path=$PATH1 &> log &
 cd ..
diff --git a/example/resnet50_cifar10/train.py b/example/resnet50_cifar10/train.py
index 0a3ad9dc5a..c39d1bcf88 100755
--- a/example/resnet50_cifar10/train.py
+++ b/example/resnet50_cifar10/train.py
@@ -57,12 +57,12 @@ if __name__ == '__main__':
     if not args_opt.do_eval and args_opt.run_distribute:
         context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                           mirror_mean=True)
-        auto_parallel_context().set_all_reduce_fusion_split_indices([140])
+        auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
         init()
 
     epoch_size = config.epoch_size
     net = resnet50(class_num=config.class_num)
-    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
 
 
     if args_opt.do_train:
diff --git a/mindspore/model_zoo/resnet.py b/mindspore/model_zoo/resnet.py
index 3055026718..001e1db0cf 100755
--- a/mindspore/model_zoo/resnet.py
+++ b/mindspore/model_zoo/resnet.py
@@ -168,7 +168,7 @@ class ResNet(nn.Cell):
         self.conv1 = _conv7x7(3, 64, stride=2)
         self.bn1 = _bn(64)
         self.relu = P.ReLU()
-        self.maxpool = P.MaxPoolWithArgmax(padding="same", ksize=3, strides=2)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
 
         self.layer1 = self._make_layer(block,
                                        layer_nums[0],
@@ -227,7 +227,7 @@ class ResNet(nn.Cell):
         x = self.conv1(x)
         x = self.bn1(x)
         x = self.relu(x)
-        c1, argmax = self.maxpool(x)
+        c1 = self.maxpool(x)
 
         c2 = self.layer1(c1)
         c3 = self.layer2(c2)

From 73bd2e9afb7918f346375be3e923565ed3a2f7bf Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Wed, 29 Apr 2020 22:06:57 +0800
Subject: [PATCH 207/242] modify weight init

---
 example/resnet101_imagenet2012/train.py    |   8 +-
 example/resnet101_imagenet2012/var_init.py | 192 ---------------------
 2 files changed, 5 insertions(+), 195 deletions(-)
 delete mode 100755 example/resnet101_imagenet2012/var_init.py

diff --git a/example/resnet101_imagenet2012/train.py b/example/resnet101_imagenet2012/train.py
index ca74262890..3d0a23f93a 100755
--- a/example/resnet101_imagenet2012/train.py
+++ b/example/resnet101_imagenet2012/train.py
@@ -34,7 +34,6 @@ from mindspore.communication.management import init
 import mindspore.nn as nn
 import mindspore.common.initializer as weight_init
 from crossentropy import CrossEntropy
-from var_init import default_recurisive_init, KaimingNormal
 
 random.seed(1)
 np.random.seed(1)
@@ -68,8 +67,11 @@ if __name__ == '__main__':
     default_recurisive_init(net)
     for _, cell in net.cells_and_names():
         if isinstance(cell, nn.Conv2d):
-            cell.weight.default_input = weight_init.initializer(KaimingNormal(a=math.sqrt(5),
-                                                                              mode='fan_out', nonlinearity='relu'),
+            cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),
+                                                                cell.weight.default_input.shape(),
+                                                                cell.weight.default_input.dtype())
+        if isinstance(cell, nn.Dense):
+            cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(),
                                                                 cell.weight.default_input.shape(),
                                                                 cell.weight.default_input.dtype())
     if not config.label_smooth:
diff --git a/example/resnet101_imagenet2012/var_init.py b/example/resnet101_imagenet2012/var_init.py
deleted file mode 100755
index 34d8664a49..0000000000
--- a/example/resnet101_imagenet2012/var_init.py
+++ /dev/null
@@ -1,192 +0,0 @@
-# Copyright 2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-"""weight initial"""
-import math
-import numpy as np
-from mindspore.common import initializer as init
-import mindspore.nn as nn
-from mindspore import Tensor
-
-def calculate_gain(nonlinearity, param=None):
-    r"""Return the recommended gain value for the given nonlinearity function.
-    The values are as follows:
-    ================= ====================================================
-    nonlinearity      gain
-    ================= ====================================================
-    Linear / Identity :math:`1`
-    Conv{1,2,3}D      :math:`1`
-    Sigmoid           :math:`1`
-    Tanh              :math:`\frac{5}{3}`
-    ReLU              :math:`\sqrt{2}`
-    Leaky Relu        :math:`\sqrt{\frac{2}{1 + \text{negative\_slope}^2}}`
-    ================= ====================================================
-    Args:
-        nonlinearity: the non-linear function (`nn.functional` name)
-        param: optional parameter for the non-linear function
-    """
-    linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d']
-    gain = 0
-    if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
-        gain = 1
-    elif nonlinearity == 'tanh':
-        gain = 5.0 / 3
-    elif nonlinearity == 'relu':
-        gain = math.sqrt(2.0)
-    elif nonlinearity == 'leaky_relu':
-        if param is None:
-            negative_slope = 0.01
-        elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float):
-            # True/False are instances of int, hence check above
-            negative_slope = param
-        else:
-            raise ValueError("negative_slope {} not a valid number".format(param))
-        gain = math.sqrt(2.0 / (1 + negative_slope ** 2))
-    else:
-        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
-    return gain
-
-def _calculate_correct_fan(array, mode):
-    mode = mode.lower()
-    valid_modes = ['fan_in', 'fan_out']
-    if mode not in valid_modes:
-        raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
-    fan_in, fan_out = _calculate_fan_in_and_fan_out(array)
-    return fan_in if mode == 'fan_in' else fan_out
-
-def kaiming_uniform_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
-    r"""Fills the input `Tensor` with values according to the method
-    described in `Delving deep into rectifiers: Surpassing human-level
-    performance on ImageNet classification` - He, K. et al. (2015), using a
-    uniform distribution. The resulting tensor will have values sampled from
-    :math:`\mathcal{U}(-\text{bound}, \text{bound})` where
-    .. math::
-        \text{bound} = \text{gain} \times \sqrt{\frac{3}{\text{fan\_mode}}}
-    Also known as He initialization.
-
-    Args:
-        array: an n-dimensional `tensor`
-        a: the negative slope of the rectifier used after this layer (only
-        used with ``'leaky_relu'``)
-        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
-            preserves the magnitude of the variance of the weights in the
-            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
-            backwards pass.
-        nonlinearity: the non-linear function (`nn.functional` name),
-            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
-    """
-    fan = _calculate_correct_fan(array, mode)
-    gain = calculate_gain(nonlinearity, a)
-    std = gain / math.sqrt(fan)
-    bound = math.sqrt(3.0) * std  # Calculate uniform bounds from standard deviation
-    return np.random.uniform(-bound, bound, array.shape)
-
-def kaiming_normal_(array, a=0, mode='fan_in', nonlinearity='leaky_relu'):
-    r"""Fills the input `Tensor` with values according to the method
-    described in `Delving deep into rectifiers: Surpassing human-level
-    performance on ImageNet classification` - He, K. et al. (2015), using a
-    normal distribution. The resulting tensor will have values sampled from
-    :math:`\mathcal{N}(0, \text{std}^2)` where
-    .. math::
-        \text{std} = \frac{\text{gain}}{\sqrt{\text{fan\_mode}}}
-    Also known as He initialization.
-
-    Args:
-        array: an n-dimensional `tensor`
-        a: the negative slope of the rectifier used after this layer (only
-        used with ``'leaky_relu'``)
-        mode: either ``'fan_in'`` (default) or ``'fan_out'``. Choosing ``'fan_in'``
-            preserves the magnitude of the variance of the weights in the
-            forward pass. Choosing ``'fan_out'`` preserves the magnitudes in the
-            backwards pass.
-        nonlinearity: the non-linear function (`nn.functional` name),
-            recommended to use only with ``'relu'`` or ``'leaky_relu'`` (default).
-    """
-    fan = _calculate_correct_fan(array, mode)
-    gain = calculate_gain(nonlinearity, a)
-    std = gain / math.sqrt(fan)
-    return np.random.normal(0, std, array.shape)
-
-def _calculate_fan_in_and_fan_out(array):
-    """calculate the fan_in and fan_out for input array"""
-    dimensions = len(array.shape)
-    if dimensions < 2:
-        raise ValueError("Fan in and fan out can not be computed for array with fewer than 2 dimensions")
-    num_input_fmaps = array.shape[1]
-    num_output_fmaps = array.shape[0]
-    receptive_field_size = 1
-    if dimensions > 2:
-        receptive_field_size = array[0][0].size
-    fan_in = num_input_fmaps * receptive_field_size
-    fan_out = num_output_fmaps * receptive_field_size
-    return fan_in, fan_out
-
-def assignment(arr, num):
-    """Assign the value of num to arr"""
-    if arr.shape == ():
-        arr = arr.reshape((1))
-        arr[:] = num
-        arr = arr.reshape(())
-    else:
-        if isinstance(num, np.ndarray):
-            arr[:] = num[:]
-        else:
-            arr[:] = num
-    return arr
-
-class KaimingUniform(init.Initializer):
-    def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
-        super(KaimingUniform, self).__init__()
-        self.a = a
-        self.mode = mode
-        self.nonlinearity = nonlinearity
-    def _initialize(self, arr):
-        tmp = kaiming_uniform_(arr, self.a, self.mode, self.nonlinearity)
-        assignment(arr, tmp)
-
-class KaimingNormal(init.Initializer):
-    def __init__(self, a=0, mode='fan_in', nonlinearity='leaky_relu'):
-        super(KaimingNormal, self).__init__()
-        self.a = a
-        self.mode = mode
-        self.nonlinearity = nonlinearity
-    def _initialize(self, arr):
-        tmp = kaiming_normal_(arr, self.a, self.mode, self.nonlinearity)
-        assignment(arr, tmp)
-
-def default_recurisive_init(custom_cell):
-    """weight init for conv2d and dense"""
-    for _, cell in custom_cell.cells_and_names():
-        if isinstance(cell, nn.Conv2d):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)),
-                                                         cell.weight.default_input.shape(),
-                                                         cell.weight.default_input.dtype())
-            if cell.bias is not None:
-                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
-                bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound,
-                                                                   cell.bias.default_input.shape()),
-                                                 cell.bias.default_input.dtype())
-        elif isinstance(cell, nn.Dense):
-            cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)),
-                                                         cell.weight.default_input.shape(),
-                                                         cell.weight.default_input.dtype())
-            if cell.bias is not None:
-                fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy())
-                bound = 1 / math.sqrt(fan_in)
-                cell.bias.default_input = Tensor(np.random.uniform(-bound, bound,
-                                                                   cell.bias.default_input.shape()),
-                                                 cell.bias.default_input.dtype())
-        elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)):
-            pass

From d853f94e2d90e1a14c07756f789a6be7c8a38954 Mon Sep 17 00:00:00 2001
From: ervinzhang <yizhi.zhang@huawei.com>
Date: Wed, 29 Apr 2020 09:34:51 -0400
Subject: [PATCH 208/242] fix docstring

---
 mindspore/dataset/engine/datasets.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 3d8b5b2ed0..5504cc3362 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -208,7 +208,6 @@ class Dataset:
         Add a blocking condition to the input Dataset
 
         Args:
-            input_dataset (Dataset): Input dataset to apply flow control
             num_batch (int): the number of batches without blocking at the start of each epoch
             condition_name (str): The condition name that is used to toggle sending next row
             callback (function): The callback funciton that will be invoked when sync_update is called
@@ -921,10 +920,13 @@ class Dataset:
 
     def sync_update(self, condition_name, num_batch=None, data=None):
         """
-        condition_name (str): The condition name that is used to toggle sending next row
-        num_batch (int or None): The number of batches(rows) that are released
-                         when pass_rows is None, will update the same number as sync_wait specified
-        data (dict or None): The data passed to the callback
+        Release a blocking condition and triger callback with given data
+
+        Args:
+            condition_name (str): The condition name that is used to toggle sending next row
+            num_batch (int or None): The number of batches(rows) that are released
+                When num_batch is None, it will default to the number specified by the sync_wait operator
+            data (dict or None): The data passed to the callback
         """
         notifiers_dict = self.get_sync_notifiers()
         if condition_name not in notifiers_dict:

From 270bf831a9e0a2570a74e3ee6f36886f25c9fc56 Mon Sep 17 00:00:00 2001
From: Jesse Lee <jesse.lee@huawei.com>
Date: Wed, 29 Apr 2020 10:26:00 -0400
Subject: [PATCH 209/242] Random Data Op

---
 mindspore/ccsrc/dataset/api/de_pipeline.cc    |  41 ++
 mindspore/ccsrc/dataset/api/de_pipeline.h     |   3 +
 .../ccsrc/dataset/api/python_bindings.cc      |   2 +
 mindspore/ccsrc/dataset/engine/data_schema.cc |  19 +
 mindspore/ccsrc/dataset/engine/data_schema.h  |   7 +
 .../engine/datasetops/source/CMakeLists.txt   |   1 +
 .../datasetops/source/random_data_op.cc       | 414 ++++++++++++++++
 .../engine/datasetops/source/random_data_op.h | 271 +++++++++++
 mindspore/dataset/__init__.py                 |   2 +-
 mindspore/dataset/engine/datasets.py          |  51 ++
 mindspore/dataset/engine/iterators.py         |   2 +
 tests/ut/cpp/dataset/random_data_op_test.cc   | 457 ++++++++++++++++++
 .../dataset/testRandomData/datasetSchema.json |  14 +
 .../testRandomData/datasetSchema2.json        |  14 +
 .../ut/python/dataset/test_random_dataset.py  |  70 +++
 15 files changed, 1367 insertions(+), 1 deletion(-)
 create mode 100644 mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
 create mode 100644 mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
 create mode 100644 tests/ut/cpp/dataset/random_data_op_test.cc
 create mode 100644 tests/ut/data/dataset/testRandomData/datasetSchema.json
 create mode 100644 tests/ut/data/dataset/testRandomData/datasetSchema2.json
 create mode 100644 tests/ut/python/dataset/test_random_dataset.py

diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/dataset/api/de_pipeline.cc
index c3dfeafe48..be133ea7a9 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@@ -28,6 +28,7 @@
 #include "dataset/engine/datasetops/source/manifest_op.h"
 #include "dataset/engine/datasetops/source/cifar_op.h"
 #include "dataset/engine/datasetops/source/celeba_op.h"
+#include "dataset/engine/datasetops/source/random_data_op.h"
 #include "dataset/engine/datasetops/source/text_file_op.h"
 #include "dataset/engine/datasetops/filter_op.h"
 #include "mindrecord/include/shard_category.h"
@@ -65,6 +66,7 @@ static std::unordered_map<uint32_t, pFunction> g_parse_op_func_ = {{kStorage, &D
                                                                    {kCifar10, &DEPipeline::ParseCifar10Op},
                                                                    {kCifar100, &DEPipeline::ParseCifar100Op},
                                                                    {kCelebA, &DEPipeline::ParseCelebAOp},
+                                                                   {kRandomData, &DEPipeline::ParseRandomDataOp},
                                                                    {kTextFile, &DEPipeline::ParseTextFileOp}};
 
 DEPipeline::DEPipeline() : iterator_(nullptr) {
@@ -972,6 +974,45 @@ Status DEPipeline::ParseCifar100Op(const py::dict &args, std::shared_ptr<Dataset
   return Status::OK();
 }
 
+Status DEPipeline::ParseRandomDataOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
+  // Required arguments
+  RandomDataOp::Builder builder;
+
+  if (args["num_samples"].is_none()) {
+    std::string err_msg = "Error: num_samples is a required argument";
+    RETURN_STATUS_UNEXPECTED(err_msg);
+  }
+  std::vector<std::string> columns_to_load;
+  bool schema_exists = false;
+  // Optional arguments
+  for (auto arg : args) {
+    std::string key = py::str(arg.first);
+    py::handle value = arg.second;
+    if (key == "num_parallel_workers") {
+      (void)builder.SetNumWorkers(ToInt(value));
+    } else if (key == "schema_file_path" || key == "schema_json_string") {
+      schema_exists = true;
+    } else if (key == "num_samples") {
+      (void)builder.SetTotalRows(ToInt(value));
+    } else if (key == "columns_list") {
+      columns_to_load = ToStringVector(value);
+    }
+  }
+  if (schema_exists) {
+    std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
+    if (args.contains("schema_file_path")) {
+      RETURN_IF_NOT_OK(schema->LoadSchemaFile(ToString(args["schema_file_path"]), columns_to_load));
+    } else {
+      RETURN_IF_NOT_OK(schema->LoadSchemaString(ToString(args["schema_json_string"]), columns_to_load));
+    }
+    (void)builder.SetDataSchema(std::move(schema));
+  }
+  std::shared_ptr<RandomDataOp> op;
+  RETURN_IF_NOT_OK(builder.Build(&op));
+  *ptr = op;
+  return Status::OK();
+}
+
 int32_t DEPipeline::GetNumClasses() const { return num_classes_; }
 
 Status DEPipeline::ParseMnistOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.h b/mindspore/ccsrc/dataset/api/de_pipeline.h
index 7f9c6c459a..699348f157 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.h
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.h
@@ -60,6 +60,7 @@ enum OpName {
   kCifar10,
   kCifar100,
   kCelebA,
+  kRandomData,
   kTextFile
 };
 
@@ -142,6 +143,8 @@ class DEPipeline {
 
   Status ParseCifar100Op(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
 
+  Status ParseRandomDataOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
+
   void PrintTree();
 
   int32_t GetNumClasses() const;
diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/dataset/api/python_bindings.cc
index dedee8e9b3..41a4143283 100644
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@@ -47,6 +47,7 @@
 #include "dataset/engine/datasetops/source/mnist_op.h"
 #include "dataset/engine/datasetops/source/manifest_op.h"
 #include "dataset/engine/datasetops/source/mindrecord_op.h"
+#include "dataset/engine/datasetops/source/random_data_op.h"
 #include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
 #include "dataset/engine/datasetops/source/sampler/random_sampler.h"
@@ -489,6 +490,7 @@ PYBIND11_MODULE(_c_dataengine, m) {
     .value("VOC", OpName::kVoc)
     .value("CIFAR10", OpName::kCifar10)
     .value("CIFAR100", OpName::kCifar100)
+    .value("RANDOMDATA", OpName::kRandomData)
     .value("CELEBA", OpName::kCelebA)
     .value("TEXTFILE", OpName::kTextFile);
 
diff --git a/mindspore/ccsrc/dataset/engine/data_schema.cc b/mindspore/ccsrc/dataset/engine/data_schema.cc
index 4fe5d665c6..c7d2aa44f0 100644
--- a/mindspore/ccsrc/dataset/engine/data_schema.cc
+++ b/mindspore/ccsrc/dataset/engine/data_schema.cc
@@ -466,5 +466,24 @@ Status DataSchema::PreLoadExceptionCheck(const nlohmann::json &js) {
                   "\"columns\" node is required in the schema json file.");
   return Status::OK();
 }
+
+// Loops through all columns in the schema and returns a map with the column
+// name to column index number.
+Status DataSchema::GetColumnNameMap(std::unordered_map<std::string, int32_t> *out_column_name_map) {
+  if (out_column_name_map == nullptr) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                  "unexpected null output column name map.");
+  }
+
+  for (int32_t i = 0; i < col_descs_.size(); ++i) {
+    if (col_descs_[i].name().empty()) {
+      return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                    "Constructing column name map from schema, but found empty column name.");
+    }
+    (*out_column_name_map)[col_descs_[i].name()] = i;
+  }
+
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/data_schema.h b/mindspore/ccsrc/dataset/engine/data_schema.h
index 4b2be76f07..9debd2d466 100644
--- a/mindspore/ccsrc/dataset/engine/data_schema.h
+++ b/mindspore/ccsrc/dataset/engine/data_schema.h
@@ -20,6 +20,7 @@
 #include <map>
 #include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 #include <nlohmann/json.hpp>
 #include "dataset/core/constants.h"
@@ -180,6 +181,12 @@ class DataSchema {
 
   static const char DEFAULT_DATA_SCHEMA_FILENAME[];
 
+  // Loops through all columns in the schema and returns a map with the column
+  // name to column index number.
+  // @param out_column_name_map - The output map of columns names to column index
+  // @return Status - The error code return
+  Status GetColumnNameMap(std::unordered_map<std::string, int32_t> *out_column_name_map);
+
  private:
   // Internal helper function. Parses the json schema file in any order and produces a schema that
   // does not follow any particular order (json standard does not enforce any ordering protocol).
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
index 8801205f6c..b29c11226d 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
@@ -17,6 +17,7 @@ add_library(engine-datasetops-source OBJECT
     ${FEATURE_SRCS}
     manifest_op.cc
     cifar_op.cc
+    random_data_op.cc
     celeba_op.cc
     text_file_op.cc
     )
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
new file mode 100644
index 0000000000..0f4632d3b9
--- /dev/null
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
@@ -0,0 +1,414 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dataset/engine/datasetops/source/random_data_op.h"
+#include <iomanip>
+#include <random>
+#include "dataset/engine/execution_tree.h"
+#include "dataset/core/config_manager.h"
+#include "dataset/util/random.h"
+#include "dataset/util/wait_post.h"
+
+namespace mindspore {
+namespace dataset {
+// Builder constructor.  Creates the builder object.
+RandomDataOp::Builder::Builder()
+    : builder_data_schema_(nullptr),
+      builder_num_workers_(0),
+      builder_op_connector_size_(0),
+      builder_rows_per_buffer_(0),
+      builder_total_rows_(0) {
+  // Some arguments to the RandomDataOp have a default argument that is taken from the config.
+  // The user may override these defaults by using the builder set methods.
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  builder_rows_per_buffer_ = cfg->rows_per_buffer();
+  builder_num_workers_ = cfg->num_parallel_workers();
+  builder_op_connector_size_ = cfg->op_connector_size();
+}
+
+// The build method that produces the instantiated RandomDataOp as a shared pointer
+Status RandomDataOp::Builder::Build(std::shared_ptr<RandomDataOp> *out_op) {
+  RETURN_IF_NOT_OK(SanityCheck());
+
+  *out_op = std::make_shared<RandomDataOp>(builder_num_workers_, builder_op_connector_size_, builder_rows_per_buffer_,
+                                           builder_total_rows_, std::move(builder_data_schema_));
+
+  // If the user did not provide a schema, then we will ask the op to generate a pseudo-random
+  // schema.
+  // See details of generateSchema function to learn what type of schema it will create.
+  if ((*out_op)->data_schema_ == nullptr) {
+    RETURN_IF_NOT_OK((*out_op)->GenerateSchema());
+  }
+
+  // Extract the column name mapping from the schema and save it in the class.
+  // This will be needed when constructing buffers.
+  RETURN_IF_NOT_OK((*out_op)->data_schema_->GetColumnNameMap(&((*out_op)->column_name_map_)));
+
+  return Status::OK();
+}
+
+// Check if the required parameters are set by the builder.
+Status RandomDataOp::Builder::SanityCheck() const {
+  // There actually is no required arguments for the random data op at all.
+  // Some arguments are preset with global values from config, and if they are not given by the user
+  // then we create them randomly.  Leaving this function here for consistency with other operators.
+  return Status::OK();
+}
+
+// Constructor for RandomDataOp
+RandomDataOp::RandomDataOp(int32_t num_workers, int32_t op_connector_size, int64_t rows_per_buffer, int64_t total_rows,
+                           std::unique_ptr<DataSchema> data_schema)
+    : ParallelOp(num_workers, op_connector_size),
+      buffer_id_(0),
+      rows_per_buffer_(rows_per_buffer),
+      total_rows_(total_rows),
+      epoch_buffers_sent_(0),
+      guys_in_(0),
+      guys_out_(num_workers_),
+      eoe_worker_id_(0),
+      data_schema_(std::move(data_schema)) {
+  rand_gen_.seed(GetSeed());  // seed the random generator
+  // If total rows was not given, then randomly pick a number
+  if (total_rows_ == 0) {
+    total_rows_ = GenRandomInt(1, kMaxTotalRows);
+  }
+  // Everyone is already out from the sync area.
+  all_out_.Set();
+}
+
+// A print method typically used for debugging
+void RandomDataOp::Print(std::ostream &out, bool show_all) const {
+  // Always show the id and name as first line regardless if this summary or detailed print
+  out << "(" << std::setw(2) << operator_id_ << ") <RandomDataOp>:";
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << " [total rows: " << total_rows_ << "]\n";
+  } else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nTotal_rows: " << total_rows_
+        << "\nRows per buffer: " << rows_per_buffer_
+        << "\nSchema:\n" << *data_schema_ << "\n\n";
+  }
+}
+
+// Helper function to produce a default/random schema if one didn't exist
+Status RandomDataOp::GenerateSchema() {
+  if (data_schema_ != nullptr) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Generating a schema but one already exists!");
+  }
+
+  // To randomly create a schema, we need to choose:
+  // a) how many columns
+  // b) the type of each column
+  // c) the shape of each column (number of dimensions i.e. rank)
+  // d) the shape of each column (dimension values)
+  data_schema_ = std::make_unique<DataSchema>();
+  std::unique_ptr<TensorShape> newShape;
+  std::unique_ptr<ColDescriptor> newCol;
+
+  // Loop over the number of chosen columns
+  int32_t numColumns = GenRandomInt(1, kMaxNumColumns);
+  for (int32_t i = 0; i < numColumns; i++) {
+    // For each column:
+    // - choose a datatype
+    // - generate a shape that randomly chooses the number of dimensions and the dimension values.
+    DataType::Type newType = static_cast<DataType::Type>(GenRandomInt(0, kMaxDataType));
+    int32_t rank = GenRandomInt(1, kMaxRank);
+    std::vector<dsize_t> dims;
+    for (int32_t d = 0; d < rank; d++) {
+      // 0 is not a valid dimension value.  however, we can support "*" or unknown, so map the random
+      // 0 value to the unknown attribute if 0 is chosen
+      dsize_t dim_value = static_cast<dsize_t>(GenRandomInt(0, kMaxDimValue));
+      if (dim_value == 0) dim_value = TensorShape::kDimUnknown;
+      dims.push_back(dim_value);
+    }
+    newShape = std::make_unique<TensorShape>(dims);
+
+    // Create the column descriptor
+    std::string colName = "c" + std::to_string(i);
+    newCol = std::make_unique<ColDescriptor>(colName, DataType(newType), TensorImpl::kFlexible, rank,
+                                                   newShape.get());
+
+    data_schema_->AddColumn(*newCol);
+  }
+
+  return Status::OK();
+}
+
+// Class functor operator () override.
+// All DatasetOps operate by launching a thread (see ExecutionTree). This class functor will
+// provide the master loop that drives the logic for performing the work.
+Status RandomDataOp::operator()() {
+  // First, compute how many buffers we'll need to satisfy the total row count.
+  // The only reason we do this is for the purpose of throttling worker count if needed.
+  int64_t buffers_needed = total_rows_ / rows_per_buffer_;
+  if (total_rows_ % rows_per_buffer_ != 0) {
+    buffers_needed++;
+  }
+
+  // If the amount of workers we have exceeds the number of buffers to produce, then we'll have
+  // idle workers doing nothing.  In that case, let's throttle the worker count.
+  if (num_workers_ > buffers_needed) {
+    MS_LOG(INFO) << "RandomDataOp throttling worker count from " << num_workers_ << "to " << buffers_needed;
+    num_workers_ = buffers_needed;
+    num_producers_ = num_workers_;
+    guys_out_ = num_workers_;
+    // The output connector was already created with a different worker count.  We have to drop and recreate
+    // that connector.
+    DatasetOp::CreateConnector(num_producers_, num_workers_);
+  }
+
+  // Assign the number of rows to each worker in a round robin fashion.
+  worker_max_rows_.reserve(num_workers_);
+  worker_rows_packed_.reserve(num_workers_);
+  // init the counts to zero to start.
+  for (int32_t w = 0; w < num_workers_; w++) {
+    worker_max_rows_.push_back(0);
+    worker_rows_packed_.push_back(0);
+  }
+  // then assign round robin row counts
+  int32_t currentWorker = 0;
+  for (int64_t r = 0; r < total_rows_; r++) {
+    worker_max_rows_[currentWorker]++;
+    currentWorker = (currentWorker + 1) % num_workers_;
+  }
+
+  // Next, compute the total buffer count.  This stat is needed during reset logic
+  for (int32_t w = 0; w < num_workers_; w++) {
+    int64_t worker_buffers = 0;
+    worker_buffers = worker_max_rows_[w] / rows_per_buffer_;
+    if (worker_max_rows_[w] % rows_per_buffer_ != 0) worker_buffers++;
+    epoch_buffers_sent_ += worker_buffers;
+  }
+
+  // For the connector to work, we need to target the correct worker channel for the eoe.
+  // This will initialize it for the first one.  reset() handles for the rest of the epochs.
+  eoe_worker_id_ = epoch_buffers_sent_ % num_workers_;
+  epoch_buffers_sent_++;  // Add the eoe buffer to the count for subsequent epochs
+
+  // RandomDataOp doesn't need the master thread to stay around.  Kick off the workers and then master exits.
+  RETURN_IF_NOT_OK(
+    tree_->LaunchWorkers(num_workers_, std::bind(&RandomDataOp::WorkerEntry, this, std::placeholders::_1)));
+
+  // required task group setup after launching workers
+  TaskManager::FindMe()->Post();
+  RETURN_IF_NOT_OK(epoch_sync_wait_post_.Register(tree_->AllTasks()));
+
+  return Status::OK();
+}
+
+// Performs a synchronization between workers at the end of an epoch
+Status RandomDataOp::EpochSync(int32_t worker_id, bool *quitting) {
+  MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " syncing at end of epoch";
+
+  // Sync on the guys_in counter
+  // We have to wait the last guy is out.
+  all_out_.Wait();
+  // If we are not in a repeat loop, or that was the last repeat already, then setup our exit
+  // condition from the master loop.
+  if (!BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat)) {
+    *quitting = true;
+  }
+
+  auto prev = guys_in_.fetch_add(1);
+  bool last_guy_in = (prev + 1) == num_workers_;
+  // If we are the last worker to hit this sync point, we have some extra tasks
+  if (last_guy_in) {
+    MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " is the last one to sync. eoe sent as worker "
+                 << eoe_worker_id_;
+    // Prepare for sync
+    all_out_.Clear();
+    // Always flow eoe at the end
+    std::unique_ptr<DataBuffer> eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
+    RETURN_IF_NOT_OK(out_connector_->Add(eoe_worker_id_, std::move(eoe_buffer)));
+    // If we're done then also flow the eof
+    if (*quitting) {
+      // The eof needs to be sent from the next sender in the round robin, so +1
+      int32_t eof_worker_id = (eoe_worker_id_ + 1) % num_workers_;
+      MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " has no more epochs.  sending eof as worker "
+                   << eof_worker_id;
+      std::unique_ptr<DataBuffer> eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
+      RETURN_IF_NOT_OK(out_connector_->Add(eof_worker_id, std::move(eof_buffer)));
+    }
+  }
+
+  // Wait for the reset to wake us up if we're not quitting
+  if (!(*quitting)) {
+    MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " entering sync wait.";
+    RETURN_IF_NOT_OK(epoch_sync_wait_post_.Wait());
+    prev = guys_out_.fetch_add(1);
+    bool last_guy_out = (prev + 1) == num_workers_;
+    // Last guy out will clear the wait post and set the row counts
+    if (last_guy_out) {
+      MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " last guy out clearing wait post.";
+      epoch_sync_wait_post_.Clear();
+      guys_in_ = 0;
+      all_out_.Set();
+    }
+  }
+
+  MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " epoch sync complete.";
+  return Status::OK();
+}
+
+// The entry point code for when workers are launched
+Status RandomDataOp::WorkerEntry(int32_t worker_id) {
+  MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " entry";
+
+  // handshake with the master first to tell it we're alive
+  TaskManager::FindMe()->Post();
+
+  bool quitting = false;
+  std::unique_ptr<TensorQTable> new_tensor_table = nullptr;
+
+  // Loop until the quitting variable gets set to true
+  do {
+    // If we have not yet reached the row count for this worker then produce another record
+    if (worker_rows_packed_[worker_id] < worker_max_rows_[worker_id]) {
+      TensorRow new_row;
+
+      // Start a new tensor table if needed
+      if (new_tensor_table == nullptr) {
+        new_tensor_table = std::make_unique<TensorQTable>();
+      }
+
+      // Create the data for the row
+      RETURN_IF_NOT_OK(CreateRandomRow(worker_id, &new_row));
+
+      // Add the row to our table
+      new_tensor_table->push_back(std::move(new_row));
+      worker_rows_packed_[worker_id]++;
+
+      // If the tensor table is at capacity then it's time to send it to output
+      if (new_tensor_table->size() == rows_per_buffer_) {
+        RETURN_IF_NOT_OK(PackAndSend(worker_id, std::move(new_tensor_table)));
+      }
+    } else {
+      // We've reached the total row count for this worker, so it's time for epoch sync.
+      // There is likely some records built but not sent yet, so take care of those first
+      // (this buffer will be smaller than rows_per_buffer)
+      if (new_tensor_table != nullptr && new_tensor_table->size() > 0) {
+        RETURN_IF_NOT_OK(PackAndSend(worker_id, std::move(new_tensor_table)));
+      }
+
+      // Now, let's enter the epoch sync
+      RETURN_IF_NOT_OK(EpochSync(worker_id, &quitting));
+    }
+  } while (!quitting);
+
+  MS_LOG(INFO) << "RandomDataOp worker " << worker_id << " is now quitting.";
+
+  return Status::OK();
+}
+
+// A helper function to stuff the tensor table into a buffer and send it to output connector
+Status RandomDataOp::PackAndSend(int32_t worker_id, std::unique_ptr<TensorQTable> in_table) {
+  auto new_buffer = std::make_unique<DataBuffer>(GetNextBufferId(), DataBuffer::kDeBFlagNone);
+  new_buffer->set_tensor_table(std::move(in_table));
+  new_buffer->set_column_name_map(column_name_map_);
+  RETURN_IF_NOT_OK(out_connector_->Add(worker_id, std::move(new_buffer)));
+  return Status::OK();
+}
+
+// A helper function to create random data for the row
+Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
+  if (new_row == nullptr) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Missing tensor row output");
+  }
+
+  // Create a tensor for each column, then add the tensor to the row
+  for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+    const ColDescriptor current_col = data_schema_->column(i);
+    std::vector<dsize_t> current_shape = current_col.shape().AsVector();
+    std::unique_ptr<TensorShape> new_shape = nullptr;
+    std::unique_ptr<unsigned char[]> buf = nullptr;
+    std::shared_ptr<Tensor> new_tensor = nullptr;
+
+    // We need to resolve the shape to fill in any unknown dimensions with random
+    // values, then use that as our shape for this tensor.
+    for (int j = 0; j < current_shape.size(); ++j) {
+      if (current_shape[j] == TensorShape::kDimUnknown) {
+        current_shape[j] = static_cast<dsize_t>(GenRandomInt(1, kMaxDimValue));
+      }
+    }
+
+    new_shape = std::make_unique<TensorShape>(current_shape);
+    int64_t size_in_bytes = new_shape->NumOfElements() * current_col.type().SizeInBytes();
+
+    // Generate a random byte of data.  This may cause some funny data for things like doubles,floats, bools
+    // however the random data op is not too concerned about the physical data itself.
+    std::uniform_int_distribution<uint8_t> uniDist(0, 255);
+    uint8_t random_byte = uniDist(rand_gen_);
+
+    // Now, create a chunk of memory for the entire tensor and copy this byte in repeatedly.
+    buf = std::make_unique<unsigned char[]>(size_in_bytes);
+    int ret_code = memset_s(buf.get(), size_in_bytes, random_byte, size_in_bytes);
+    if (ret_code != 0) {
+      return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor.");
+    }
+
+    RETURN_IF_NOT_OK(
+      Tensor::CreateTensor(&new_tensor, current_col.tensorImpl(), *new_shape, current_col.type(), buf.get()));
+
+    // Add this tensor to the tensor row for output
+    (*new_row).push_back(std::move(new_tensor));
+  }
+  return Status::OK();
+}
+
+// Overrides base class reset method.  When an operator does a reset, it cleans up any state
+// info from it's previous execution and then initializes itself so that it can be executed
+// again.
+Status RandomDataOp::Reset() {
+  MS_LOG(INFO) << "RandomDataOp resetting.";
+
+  // Ensure all guys are in the waitpost
+  if (guys_in_ != num_workers_) {
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
+                  "Issuing a reset, but some workers are missing from epochSync!");
+  }
+
+  // reset the row counters for all workers
+  for (int32_t w = 0; w < num_workers_; w++) {
+    worker_rows_packed_[w] = 0;
+    worker_max_rows_[w] = 0;
+  }
+  buffer_id_ = 0;
+
+  // Re-assign round robin row counts, starting from the worker after the one that gave
+  // the eoe last time
+  int32_t currentWorker = (eoe_worker_id_ + 1) % num_workers_;
+  for (int64_t r = 0; r < total_rows_; r++) {
+    worker_max_rows_[currentWorker]++;
+    currentWorker = (currentWorker + 1) % num_workers_;
+  }
+
+  // Compute which worker should get the eoe for the next epoch
+  eoe_worker_id_ = ((epoch_buffers_sent_ % num_workers_) + eoe_worker_id_) % num_workers_;
+
+  // Wake up the workers to get them going again in a new epoch
+  guys_out_ = 0;
+  epoch_sync_wait_post_.Set();
+
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
+
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
new file mode 100644
index 0000000000..84e4c42702
--- /dev/null
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
@@ -0,0 +1,271 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_
+#define DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_
+
+#include <atomic>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <random>
+#include <string>
+#include <vector>
+#include <unordered_map>
+#include <utility>
+#include "dataset/util/status.h"
+#include "dataset/core/tensor.h"
+#include "dataset/core/data_type.h"
+#include "dataset/engine/data_schema.h"
+#include "dataset/engine/datasetops/parallel_op.h"
+#include "dataset/util/wait_post.h"
+
+namespace mindspore {
+namespace dataset {
+// The RandomDataOp is a leaf node storage operator that generates random data based
+// on the schema specifications.  Typically, it's used for testing and demonstrating
+// various dataset operator pipelines.  It is not "real" data to train with.
+// The data that is random created is just random and repeated bytes, there is no
+// "meaning" behind what these bytes are.
+class RandomDataOp : public ParallelOp {
+ public:
+  // Some constants to provide limits to random generation.
+  static constexpr int32_t kMaxNumColumns = 4;
+  static constexpr int32_t kMaxRank = 4;
+  static constexpr int32_t kMaxDimValue = 2048;
+  static constexpr int32_t kMaxDataType = (DataType::DE_UNKNOWN - 1);
+  static constexpr int32_t kMaxTotalRows = 1024;
+
+  // A nested builder class to aid in the construction of a RandomDataOp
+  class Builder {
+   public:
+    /**
+     * Builder constructor.  Creates the builder object.
+     * @note No default args.
+     * @return This is a constructor.
+     */
+    Builder();
+
+    /**
+     * Default destructor
+     */
+    ~Builder() = default;
+
+    /**
+     * The build method that produces the instantiated RandomDataOp as a shared pointer
+     * @param out_op - The output RandomDataOperator that was constructed
+     * @return Status - The error code return
+     */
+    Status Build(std::shared_ptr<RandomDataOp> *out_op);
+
+    /**
+     * Builder set method
+     * @param data_schema - A user-provided schema
+     * @return Builder - The modified builder by reference
+     */
+    Builder &SetDataSchema(std::unique_ptr<DataSchema> data_schema) {
+      builder_data_schema_ = std::move(data_schema);
+      return *this;
+    }
+
+    /**
+     * Builder set method
+     * @param num_workers - The number of workers
+     * @return Builder - The modified builder by reference
+     */
+    Builder &SetNumWorkers(int32_t num_workers) {
+      builder_num_workers_ = num_workers;
+      return *this;
+    }
+
+    /**
+     * Builder set method
+     * @param op_connector_size - The size of the output connector
+     * @return Builder - The modified builder by reference
+     */
+    Builder &SetOpConnectorSize(int32_t op_connector_size) {
+      builder_op_connector_size_ = op_connector_size;
+      return *this;
+    }
+
+    /**
+     * Builder set method
+     * @param rows_per_buffer - The number of rows in each DataBuffer
+     * @return Builder - The modified builder by reference
+     */
+    Builder &SetRowsPerBuffer(int64_t rows_per_buffer) {
+      builder_rows_per_buffer_ = rows_per_buffer;
+      return *this;
+    }
+
+    /**
+     * Builder set method
+     * @param total_rows - The total number of rows in the dataset
+     * @return Builder - The modified builder by reference
+     */
+    Builder &SetTotalRows(int64_t total_rows) {
+      builder_total_rows_ = total_rows;
+      return *this;
+    }
+
+   private:
+    /**
+     * Check if the required parameters are set by the builder.
+     * @return Status - The error code return
+     */
+    Status SanityCheck() const;
+
+    std::unique_ptr<DataSchema> builder_data_schema_;
+    int32_t builder_num_workers_;
+    int32_t builder_op_connector_size_;
+    int64_t builder_rows_per_buffer_;
+    int64_t builder_total_rows_;
+  };  // class Builder
+
+  /**
+   * Constructor for RandomDataOp
+   * @note Private constructor.  Must use builder to construct.
+   * @param num_workers - The number of workers
+   * @param op_connector_size - The size of the output connector
+   * @param rows_per_buffer - The number of rows in each DataBuffer
+   * @param data_schema - A user-provided schema
+   * @param total_rows - The total number of rows in the dataset
+   * @return Builder - The modified builder by reference
+   */
+  RandomDataOp(int32_t num_workers, int32_t op_connector_size, int64_t rows_per_buffer, int64_t total_rows,
+               std::unique_ptr<DataSchema> data_schema);
+
+  /**
+   * Destructor
+   */
+  ~RandomDataOp() = default;
+
+  /**
+   * A print method typically used for debugging
+   * @param out - The output stream to write output to
+   * @param show_all - A bool to control if you want to show all info or just a summary
+   */
+  void Print(std::ostream &out, bool show_all) const override;
+
+  /**
+   * << Stream output operator overload
+   * @notes This allows you to write the debug print info using stream operators
+   * @param out - reference to the output stream being overloaded
+   * @param so - reference to the ShuffleOp to display
+   * @return - the output stream must be returned
+   */
+  friend std::ostream &operator<<(std::ostream &out, const RandomDataOp &op) {
+    op.Print(out, false);
+    return out;
+  }
+
+  /**
+   * Class functor operator () override.
+   * All DatasetOps operate by launching a thread (see ExecutionTree). This class functor will
+   * provide the master loop that drives the logic for performing the work.
+   * @return Status - The error code return
+   */
+  Status operator()() override;
+
+  /**
+   * Overrides base class reset method.  When an operator does a reset, it cleans up any state
+   * info from it's previous execution and then initializes itself so that it can be executed
+   * again.
+   * @return Status - The error code return
+   */
+  Status Reset() override;
+
+  /**
+   * Quick getter for total rows.
+   */
+  int64_t GetTotalRows() const { return total_rows_; }
+
+ private:
+  /**
+   * The entry point code for when workers are launched
+   * @param worker_id - The worker id
+   * @return Status - The error code return
+   */
+  Status WorkerEntry(int32_t worker_id) override;
+
+  /**
+   * Helper function to produce a default/random schema if one didn't exist
+   @return Status - The error code return
+  */
+  Status GenerateSchema();
+
+  /**
+   * Performs a synchronization between workers at the end of an epoch
+   * @param worker_id - The worker id
+   * @return Status - The error code return
+   */
+  Status EpochSync(int32_t worker_id, bool *quitting);
+
+  /**
+   * A helper function to stuff the tensor table into a buffer and send it to output connector
+   * @param worker_id - The worker id
+   * @param in_table - The tensor table to pack and send
+   * @return Status - The error code return
+   */
+  Status PackAndSend(int32_t worker_id, std::unique_ptr<TensorQTable> in_table);
+
+  /**
+   * A helper function to create random data for the row
+   * @param worker_id - The worker id
+   * @param new_row - The output row to produce
+   * @return Status - The error code return
+   */
+  Status CreateRandomRow(int32_t worker_id, TensorRow *new_row);
+
+  /**
+   * A quick inline for producing a random number between (and including) min/max
+   * @param min - minimum number that can be generated
+   * @param max - maximum number that can be generated
+   * @return - The generated random number
+   */
+  inline int32_t GenRandomInt(int32_t min, int32_t max) {
+    std::uniform_int_distribution<int32_t> uniDist(min, max);
+    return uniDist(rand_gen_);
+  }
+
+  /**
+   * A quick inline for producing the next buffer id in sequence, threadsafe
+   * @return - The next buffer id.
+   */
+  inline int32_t GetNextBufferId() {
+    std::unique_lock<std::mutex> lock(buffer_id_mutex_);
+    return ++buffer_id_;
+  }
+
+  int32_t buffer_id_;
+  int64_t rows_per_buffer_;
+  int64_t total_rows_;
+  int64_t epoch_buffers_sent_;
+  std::atomic<int32_t> guys_in_;
+  std::atomic<int32_t> guys_out_;
+  int32_t eoe_worker_id_;
+  std::unique_ptr<DataSchema> data_schema_;
+  std::vector<int64_t> worker_max_rows_;
+  std::vector<int64_t> worker_rows_packed_;
+  std::unordered_map<std::string, int32_t> column_name_map_;
+  std::mt19937 rand_gen_;
+  WaitPost epoch_sync_wait_post_;
+  WaitPost all_out_;
+  std::mutex buffer_id_mutex_;
+};  // class RandomDataOp
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_ENGINE_DATASETOPS_SOURCE_RANDOM_DATA_OP_
diff --git a/mindspore/dataset/__init__.py b/mindspore/dataset/__init__.py
index 1b0397ae26..54068eb762 100644
--- a/mindspore/dataset/__init__.py
+++ b/mindspore/dataset/__init__.py
@@ -21,7 +21,7 @@ can also create samplers with this module to sample data.
 from .core.configuration import config
 from .engine.datasets import StorageDataset, TFRecordDataset, ImageFolderDatasetV2, MnistDataset, MindDataset, \
     GeneratorDataset, ManifestDataset, Cifar10Dataset, Cifar100Dataset, VOCDataset, CelebADataset, TextFileDataset, \
-    Schema, Shuffle, zip
+    Schema, Shuffle, zip, RandomDataset
 from .engine.samplers import DistributedSampler, PKSampler, RandomSampler, SequentialSampler, SubsetRandomSampler, \
     WeightedRandomSampler, Sampler
 from .engine.serializer_deserializer import serialize, deserialize, show
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 3d8b5b2ed0..898e0c8601 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -3146,6 +3146,57 @@ class Cifar100Dataset(SourceDataset):
         return get_num_rows(num_rows, self.num_shards)
 
 
+class RandomDataset(SourceDataset):
+    """
+    A source dataset that generates random data.
+
+    Args:
+        num_samples (int): number of samples to generate.
+        schema (str or Schema, optional): Path to the json schema file or schema object (default=None).
+            If the schema is not provided, the meta data from the TFRecord file is considered the schema.
+        columns_list (list[str], optional): List of columns to be read (default=None, read all columns)
+        num_parallel_workers (int, optional): number of workers to read the data
+            (default=None, number set in the config).
+    """
+
+    def __init__(self, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None):
+        super().__init__(num_parallel_workers)
+        schema_obj = None
+        if (schema is not None) and (not isinstance(schema, Schema)):
+            schema_obj = Schema(schema)  # read the schema file and convert to schema object to validate it
+        self.schema = schema
+        self.columns_list = columns_list
+        self.num_samples = num_samples
+        if schema_obj is not None and num_samples is None:
+            self.num_samples = schema_obj.num_rows
+
+    def get_args(self):
+        args = super().get_args()
+        if self.schema is not None:
+            if isinstance(self.schema, Schema):
+                self.schema.datasetType = 'Random'
+                if self.num_samples is not None:
+                    self.schema.num_rows = self.num_samples
+                args["schema_json_string"] = self.schema.to_json()
+            else:
+                args["schema_file_path"] = self.schema
+        args["schema"] = self.schema
+        if self.columns_list is not None:
+            args["columns_list"] = self.columns_list
+        if self.num_samples is not None:
+            args["num_samples"] = self.num_samples
+        return args
+
+    def get_dataset_size(self):
+        """
+        Get the number of batches in an epoch.
+
+        Return:
+            Number, number of batches.
+        """
+        return num_samples
+
+
 class Schema:
     """
     Class to represent a schema of dataset.
diff --git a/mindspore/dataset/engine/iterators.py b/mindspore/dataset/engine/iterators.py
index 2cf95aa086..e32c188d00 100644
--- a/mindspore/dataset/engine/iterators.py
+++ b/mindspore/dataset/engine/iterators.py
@@ -192,6 +192,8 @@ class Iterator:
             op_type = OpName.CIFAR100
         elif isinstance(dataset, de.CelebADataset):
             op_type = OpName.CELEBA
+        elif isinstance(dataset, de.RandomDataset):
+            op_type = OpName.RANDOMDATA
         elif isinstance(dataset, de.TextFileDataset):
             op_type = OpName.TEXTFILE
         else:
diff --git a/tests/ut/cpp/dataset/random_data_op_test.cc b/tests/ut/cpp/dataset/random_data_op_test.cc
new file mode 100644
index 0000000000..f8a7440c03
--- /dev/null
+++ b/tests/ut/cpp/dataset/random_data_op_test.cc
@@ -0,0 +1,457 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dataset/core/client.h"
+#include "common/common.h"
+#include "gtest/gtest.h"
+#include <memory>
+#include <vector>
+#include <iostream>
+#include "dataset/core/tensor_shape.h"
+#include "dataset/engine/datasetops/source/random_data_op.h"
+#include "dataset/engine/data_schema.h"
+
+using namespace mindspore::dataset;
+using mindspore::MsLogLevel::INFO;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::LogStream;
+
+class MindDataTestRandomDataOp : public UT::DatasetOpTesting {
+
+};
+
+// Test info:
+// - Simple test with a user-provided schema generated purely from DataSchema C API
+// - has an interation loop
+//
+// Tree:  single node tree with RandomDataOp
+//
+//    RandomDataOp
+//
+TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) {
+  Status rc;
+  int32_t rank = 0; // not used
+  MS_LOG(INFO) << "UT test RandomDataOpBasic1";
+
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  // Create a schema using the C api's
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+
+  // RandomDataOp can randomly fill in unknown dimension lengths of a shape.
+  // Most other ops cannot do that as they are limited by the physical data itself. We're
+  // more flexible with random data since it is just making stuff up on the fly.
+  TensorShape c1Shape({TensorShape::kDimUnknown, TensorShape::kDimUnknown, 3});
+  ColDescriptor c1("image",
+                   DataType(DataType::DE_INT8),
+                   TensorImpl::kFlexible,
+                   rank,  // not used
+                   &c1Shape);
+
+  // Column 2 will just be a scalar label number
+  TensorShape c2Shape({});  // empty shape is a 1-value scalar Tensor
+  ColDescriptor c2("label",
+                   DataType(DataType::DE_UINT32),
+                   TensorImpl::kFlexible,
+                   rank,
+                   &c2Shape);
+
+  testSchema->AddColumn(c1);
+  testSchema->AddColumn(c2);
+
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  RandomDataOp::Builder builder;
+
+  rc = builder.SetRowsPerBuffer(2)
+    .SetNumWorkers(1)
+    .SetDataSchema(std::move(testSchema))
+    .SetTotalRows(25)
+    .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssignRoot(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  std::ostringstream ss;
+  ss << *myRandomDataOp;
+  MS_LOG(INFO) << "RandomDataOp print: %s" << ss.str();
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    // Don't display these rows...too big to show
+    MS_LOG(INFO) << "Row fetched #: " << rowCount;
+
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 25);
+}
+
+// Test info:
+// - Simple test with a randomly generated schema
+// - no iteration loop on this one, just create the op
+//
+// Tree:  single node tree with RandomDataOp
+//
+//    RandomDataOp
+//
+TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) {
+  Status rc;
+  MS_LOG(INFO) << "UT test RandomDataOpBasic2";
+
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  RandomDataOp::Builder builder;
+
+  rc = builder.SetRowsPerBuffer(2)
+    .SetNumWorkers(1)
+    .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssignRoot(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  std::ostringstream ss;
+  ss << *myRandomDataOp;
+  MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
+}
+
+// Test info:
+// - json file test with iteration
+//
+// Tree:  single node tree with RandomDataOp
+//
+//    RandomDataOp
+//
+TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) {
+  Status rc;
+  MS_LOG(INFO) << "UT test RandomDataOpBasic3";
+
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+  rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema.json", {});
+  EXPECT_TRUE(rc.IsOk());
+
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  RandomDataOp::Builder builder;
+
+  rc = builder.SetRowsPerBuffer(2)
+    .SetNumWorkers(1)
+    .SetDataSchema(std::move(testSchema))
+    .SetTotalRows(10)
+    .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssignRoot(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  std::ostringstream ss;
+  ss << *myRandomDataOp;
+  MS_LOG(INFO) << "RandomDataOp print: " << ss.str();
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    // Don't display these rows...too big to show
+    MS_LOG(INFO) << "Row fetched #: " << rowCount;
+
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 10);
+}
+
+// Test info:
+// - json schema input it's a fairly simple one
+// - has an interation loop
+//
+// Tree:  RepeatOp over RandomDataOp
+//
+//     RepeatOp
+//        |
+//    RandomDataOp
+//
+TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) {
+  Status rc;
+  MS_LOG(INFO) << "UT test RandomDataOpBasic4";
+
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+  rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
+  EXPECT_TRUE(rc.IsOk());
+
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  RandomDataOp::Builder builder;
+
+  rc = builder.SetRowsPerBuffer(2)
+    .SetNumWorkers(1)
+    .SetDataSchema(std::move(testSchema))
+    .SetTotalRows(10)
+    .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  uint32_t numRepeats = 2;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats)
+    .Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myRepeatOp->AddChild(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    MS_LOG(INFO) << "Row display for row #: " << rowCount;
+
+    // Display the tensor by calling the printer on it
+    for (int i = 0; i < tensorList.size(); i++) {
+      std::ostringstream ss;
+      ss << *tensorList[i] << std::endl;
+      MS_LOG(INFO) << "Tensor print: %s" << ss.str();
+    }
+
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 20);
+}
+
+// Test info:
+// - json schema input it's a fairly simple one
+// - has an interation loop
+// - same as MindDataTestRandomDataOpBasic4 except that this one will have parallel workers
+//
+// Tree:  RepeatOp over RandomDataOp
+//
+//     RepeatOp
+//        |
+//    RandomDataOp
+//
+TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) {
+  Status rc;
+  MS_LOG(INFO) << "UT test RandomDataOpBasic5";
+
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+  rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
+  EXPECT_TRUE(rc.IsOk());
+
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  RandomDataOp::Builder builder;
+
+  rc = builder.SetRowsPerBuffer(2)
+    .SetNumWorkers(4)
+    .SetDataSchema(std::move(testSchema))
+    .SetTotalRows(10)
+    .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  uint32_t numRepeats = 3;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats)
+    .Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myRepeatOp->AddChild(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    MS_LOG(INFO) << "Row display for row #: " << rowCount;
+
+    // Display the tensor by calling the printer on it
+    for (int i = 0; i < tensorList.size(); i++) {
+      std::ostringstream ss;
+      ss << *tensorList[i] << std::endl;
+      MS_LOG(INFO) << "Tensor print: ", ss.str();
+    }
+
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 30);
+}
+
+// Test info:
+// - repeat shuffle random
+//
+// Tree:  RepeatOp over RandomDataOp
+//
+//     RepeatOp
+//        |
+//     ShuffleOp
+//        |
+//    RandomDataOp
+//
+TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
+  Status rc;
+  MS_LOG(INFO) << "UT test RandomDataOpTree1";
+
+  // Start with an empty execution tree
+  auto myTree = std::make_shared<ExecutionTree>();
+
+  std::unique_ptr<DataSchema> testSchema = std::make_unique<DataSchema>();
+  rc = testSchema->LoadSchemaFile(datasets_root_path_ + "/testRandomData/datasetSchema2.json", {});
+  EXPECT_TRUE(rc.IsOk());
+
+  std::shared_ptr<RandomDataOp> myRandomDataOp;
+  RandomDataOp::Builder builder;
+
+  rc = builder.SetRowsPerBuffer(2)
+    .SetNumWorkers(4)
+    .SetDataSchema(std::move(testSchema))
+    .SetTotalRows(10)
+    .Build(&myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssociateNode(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  std::shared_ptr<ShuffleOp> myShuffleOp;
+  rc = ShuffleOp::Builder()
+      .SetRowsPerBuffer(2)
+      .SetShuffleSize(4)
+      .Build(&myShuffleOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myShuffleOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  uint32_t numRepeats = 3;
+  std::shared_ptr<RepeatOp> myRepeatOp;
+  rc = RepeatOp::Builder(numRepeats)
+    .Build(&myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->AssociateNode(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myRepeatOp->AddChild(myShuffleOp);
+  EXPECT_TRUE(rc.IsOk());
+  
+  rc = myShuffleOp->AddChild(myRandomDataOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  rc = myTree->AssignRoot(myRepeatOp);
+  EXPECT_TRUE(rc.IsOk());
+
+  MS_LOG(INFO) << "Launching tree and begin iteration";
+  rc = myTree->Prepare();
+  EXPECT_TRUE(rc.IsOk());
+  rc = myTree->Launch();
+  EXPECT_TRUE(rc.IsOk());
+
+  // Start the loop of reading tensors from our pipeline
+  DatasetIterator dI(myTree);
+  TensorRow tensorList;
+  rc = dI.FetchNextTensorRow(&tensorList);
+  EXPECT_TRUE(rc.IsOk());
+  int rowCount = 0;
+  while (!tensorList.empty()) {
+    MS_LOG(INFO) << "Row display for row #: " << rowCount;
+
+    // Display the tensor by calling the printer on it
+    for (int i = 0; i < tensorList.size(); i++) {
+      std::ostringstream ss;
+      ss << *tensorList[i] << std::endl;
+      MS_LOG(INFO) << "Tensor print: " << ss.str();
+    }
+
+    rc = dI.FetchNextTensorRow(&tensorList);
+    EXPECT_TRUE(rc.IsOk());
+    rowCount++;
+  }
+  ASSERT_EQ(rowCount, 30);
+}
diff --git a/tests/ut/data/dataset/testRandomData/datasetSchema.json b/tests/ut/data/dataset/testRandomData/datasetSchema.json
new file mode 100644
index 0000000000..cea11c52b3
--- /dev/null
+++ b/tests/ut/data/dataset/testRandomData/datasetSchema.json
@@ -0,0 +1,14 @@
+{
+  "columns": {
+    "image": {
+      "type": "uint8",
+      "rank": 3,  
+      "shape": [1920,1080,3]
+    },
+    "label": {
+      "type": "int32",
+      "rank": 1,
+      "shape": [1]
+    }
+  }
+}
diff --git a/tests/ut/data/dataset/testRandomData/datasetSchema2.json b/tests/ut/data/dataset/testRandomData/datasetSchema2.json
new file mode 100644
index 0000000000..541ad7c071
--- /dev/null
+++ b/tests/ut/data/dataset/testRandomData/datasetSchema2.json
@@ -0,0 +1,14 @@
+{
+  "columns": {
+    "image": {
+      "type": "uint8",
+      "rank": 2,  
+      "shape": [28,28]
+    },
+    "label": {
+      "type": "uint8",
+      "rank": 1,
+      "shape": [1]
+    }
+  }
+}
diff --git a/tests/ut/python/dataset/test_random_dataset.py b/tests/ut/python/dataset/test_random_dataset.py
new file mode 100644
index 0000000000..a4003ad311
--- /dev/null
+++ b/tests/ut/python/dataset/test_random_dataset.py
@@ -0,0 +1,70 @@
+# Copyright 2019 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import mindspore.common.dtype as mstype
+import mindspore.dataset as ds
+from pathlib import Path
+
+# just a basic test with parallel random data op
+def test_randomdataset_basic1():
+    print("Test randomdataset basic")
+
+    schema = ds.Schema()
+    schema.add_column('image', de_type=mstype.uint8, shape=[2])
+    schema.add_column('label', de_type=mstype.uint8, shape=[1])
+
+    # apply dataset operations
+    ds1 = ds.RandomDataset(schema=schema, num_samples=50, num_parallel_workers=4)
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for data in ds1.create_dict_iterator():  # each data is a dictionary
+        # in this example, each dictionary has keys "image" and "label"
+        print("{} image: {}".format(num_iter, data["image"]))
+        print("{} label: {}".format(num_iter, data["label"]))
+        num_iter += 1
+
+    print("Number of data in ds1: ", num_iter)
+    assert(num_iter == 200)
+
+# Another simple test
+def test_randomdataset_basic2():
+    print("Test randomdataset basic 2")
+
+    schema = ds.Schema()
+    schema.add_column('image', de_type=mstype.uint8, shape=[640,480,3]) # 921600 bytes (a bit less than 1 MB per image)
+    schema.add_column('label', de_type=mstype.uint8, shape=[1])
+
+    # Make up about 10 samples
+    ds1 = ds.RandomDataset(schema=schema, num_samples=10, num_parallel_workers=1)
+
+    # cache size allows for about 4 images since each image just a bit less than 1MB, after that we will have to spill
+    ds1 = ds1.repeat(4)
+
+    num_iter = 0
+    for data in ds1.create_dict_iterator():  # each data is a dictionary
+        # in this example, each dictionary has keys "image" and "label"
+        #print(data["image"])
+        print("printing the label: {}".format(data["label"]))
+        num_iter += 1
+
+    print("Number of data in ds1: ", num_iter)
+    assert(num_iter == 40)
+
+
+if __name__ == '__main__':
+    test_randomdataset_basic1()
+    test_randomdataset_basic2()
+    print('test_randomdataset_basic Ended.\n')
+

From 0b51a847ca29a80c3d8d22ef636f15ce5aab4b7d Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Wed, 29 Apr 2020 22:27:27 +0800
Subject: [PATCH 210/242] modify weight init

---
 example/resnet101_imagenet2012/train.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/example/resnet101_imagenet2012/train.py b/example/resnet101_imagenet2012/train.py
index 3d0a23f93a..365a859395 100755
--- a/example/resnet101_imagenet2012/train.py
+++ b/example/resnet101_imagenet2012/train.py
@@ -14,7 +14,6 @@
 # ============================================================================
 """train_imagenet."""
 import os
-import math
 import argparse
 import random
 import numpy as np
@@ -64,7 +63,6 @@ if __name__ == '__main__':
     epoch_size = config.epoch_size
     net = resnet101(class_num=config.class_num)
     # weight init
-    default_recurisive_init(net)
     for _, cell in net.cells_and_names():
         if isinstance(cell, nn.Conv2d):
             cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),

From b1ea933345088775e9ef9ac786ea15806fe7e906 Mon Sep 17 00:00:00 2001
From: Jesse Lee <jesse.lee@huawei.com>
Date: Wed, 29 Apr 2020 10:54:14 -0400
Subject: [PATCH 211/242] Fix CI

---
 mindspore/ccsrc/dataset/engine/data_schema.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mindspore/ccsrc/dataset/engine/data_schema.cc b/mindspore/ccsrc/dataset/engine/data_schema.cc
index c7d2aa44f0..db2fed41bd 100644
--- a/mindspore/ccsrc/dataset/engine/data_schema.cc
+++ b/mindspore/ccsrc/dataset/engine/data_schema.cc
@@ -471,8 +471,7 @@ Status DataSchema::PreLoadExceptionCheck(const nlohmann::json &js) {
 // name to column index number.
 Status DataSchema::GetColumnNameMap(std::unordered_map<std::string, int32_t> *out_column_name_map) {
   if (out_column_name_map == nullptr) {
-    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
-                  "unexpected null output column name map.");
+    return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "unexpected null output column name map.");
   }
 
   for (int32_t i = 0; i < col_descs_.size(); ++i) {

From 1db48ba835a07db96c1dc8dfc1efe87dbc858e76 Mon Sep 17 00:00:00 2001
From: cathwong <cathy.wong@huawei.com>
Date: Thu, 30 Apr 2020 00:15:36 +0800
Subject: [PATCH 212/242] update mindspore/dataset/engine/datasets.py. Fix
 typo.

---
 mindspore/dataset/engine/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 5504cc3362..be9a81411f 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -455,7 +455,7 @@ class Dataset:
             The order of using repeat and batch reflects the number of batches. Recommend that
             repeat operation should be used after batch operation.
             If dataset_sink_mode is False, here repeat operation is invalid.
-            If dataset_sink_mode is True, repeat count should be euqal to the epoch of training. Otherwise,
+            If dataset_sink_mode is True, repeat count should be equal to the epoch of training. Otherwise,
             errors could occur since the amount of data is not the amount training requires.
 
         Args:

From bd5f74c51e36c8fb2a3417dfecc5c56451e405a6 Mon Sep 17 00:00:00 2001
From: cathwong <cathy.wong@huawei.com>
Date: Thu, 30 Apr 2020 00:19:19 +0800
Subject: [PATCH 213/242] update
 mindspore/dataset/transforms/vision/py_transforms.py.

---
 mindspore/dataset/transforms/vision/py_transforms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/dataset/transforms/vision/py_transforms.py b/mindspore/dataset/transforms/vision/py_transforms.py
index ff38c5c00d..e96efe192b 100644
--- a/mindspore/dataset/transforms/vision/py_transforms.py
+++ b/mindspore/dataset/transforms/vision/py_transforms.py
@@ -1355,7 +1355,7 @@ class RandomSharpness:
 
     Examples:
         >>> py_transforms.ComposeOp([py_transforms.Decode(),
-        >>>                          py_transforms.RandomColor(0.5,1.5),
+        >>>                          py_transforms.RandomSharpness(0.5,1.5),
         >>>                          py_transforms.ToTensor()])
 
     """

From 5236d0c3c085aa57406a2df191f0245691240775 Mon Sep 17 00:00:00 2001
From: Jesse Lee <jesse.lee@huawei.com>
Date: Wed, 29 Apr 2020 12:44:04 -0400
Subject: [PATCH 214/242] Replace print with logger.info

---
 .../ut/python/dataset/test_random_dataset.py  | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tests/ut/python/dataset/test_random_dataset.py b/tests/ut/python/dataset/test_random_dataset.py
index a4003ad311..16c43ea971 100644
--- a/tests/ut/python/dataset/test_random_dataset.py
+++ b/tests/ut/python/dataset/test_random_dataset.py
@@ -14,11 +14,12 @@
 # ==============================================================================
 import mindspore.common.dtype as mstype
 import mindspore.dataset as ds
+from mindspore import log as logger
 from pathlib import Path
 
 # just a basic test with parallel random data op
 def test_randomdataset_basic1():
-    print("Test randomdataset basic")
+    logger.info("Test randomdataset basic")
 
     schema = ds.Schema()
     schema.add_column('image', de_type=mstype.uint8, shape=[2])
@@ -31,16 +32,16 @@ def test_randomdataset_basic1():
     num_iter = 0
     for data in ds1.create_dict_iterator():  # each data is a dictionary
         # in this example, each dictionary has keys "image" and "label"
-        print("{} image: {}".format(num_iter, data["image"]))
-        print("{} label: {}".format(num_iter, data["label"]))
+        logger.info("{} image: {}".format(num_iter, data["image"]))
+        logger.info("{} label: {}".format(num_iter, data["label"]))
         num_iter += 1
 
-    print("Number of data in ds1: ", num_iter)
+    logger.info("Number of data in ds1: ", num_iter)
     assert(num_iter == 200)
 
 # Another simple test
 def test_randomdataset_basic2():
-    print("Test randomdataset basic 2")
+    logger.info("Test randomdataset basic 2")
 
     schema = ds.Schema()
     schema.add_column('image', de_type=mstype.uint8, shape=[640,480,3]) # 921600 bytes (a bit less than 1 MB per image)
@@ -55,16 +56,16 @@ def test_randomdataset_basic2():
     num_iter = 0
     for data in ds1.create_dict_iterator():  # each data is a dictionary
         # in this example, each dictionary has keys "image" and "label"
-        #print(data["image"])
-        print("printing the label: {}".format(data["label"]))
+        #logger.info(data["image"])
+        logger.info("printing the label: {}".format(data["label"]))
         num_iter += 1
 
-    print("Number of data in ds1: ", num_iter)
+    logger.info("Number of data in ds1: ", num_iter)
     assert(num_iter == 40)
 
 
 if __name__ == '__main__':
     test_randomdataset_basic1()
     test_randomdataset_basic2()
-    print('test_randomdataset_basic Ended.\n')
+    logger.info('test_randomdataset_basic Ended.\n')
 

From c5a8ffe4f4bcfb499bd25463af8cdf76d9159dd1 Mon Sep 17 00:00:00 2001
From: Junhan Hu <junhan.hu@huawei.com>
Date: Wed, 29 Apr 2020 11:52:58 -0400
Subject: [PATCH 215/242] Add schema support for GeneratorDataset

---
 mindspore/dataset/engine/datasets.py      | 21 +++++++++----
 mindspore/dataset/engine/validators.py    | 11 +++++--
 tests/ut/python/dataset/test_generator.py | 36 +++++++++++++++++++++++
 3 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 5504cc3362..b56e2ce4ae 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -2504,11 +2504,12 @@ class GeneratorDataset(SourceDataset):
             Iterable source is required to return a tuple of numpy array as a row of the dataset on iter(source).next().
             Random accessible source is required to return a tuple of numpy array as a row of the dataset on
             source[idx].
-        column_names (list[str]): List of column names of the dataset.
+        column_names (list[str], optional): List of column names of the dataset (default=None). Users are required to
+            provide either column_names or schema.
         column_types (list[mindspore.dtype], optional): List of column data types of the dataset (default=None).
             If provided, sanity check will be performed on generator output.
-        schema (Schema/String, optional): Path to the json schema file or schema object (default=None).
-            If the schema is not provided, the meta data from column_names and column_types is considered the schema.
+        schema (Schema/String, optional): Path to the json schema file or schema object (default=None). Users are
+            required to provide either column_names or schema. If both are provided, schema will be used.
         num_samples (int, optional): The number of samples to be included in the dataset
             (default=None, all images).
         num_parallel_workers (int, optional): Number of subprocesses used to fetch the dataset in parallel (default=1).
@@ -2555,8 +2556,8 @@ class GeneratorDataset(SourceDataset):
     """
 
     @check_generatordataset
-    def __init__(self, source, column_names, column_types=None, schema=None, num_samples=None, num_parallel_workers=1,
-                 shuffle=None, sampler=None, num_shards=None, shard_id=None):
+    def __init__(self, source, column_names=None, column_types=None, schema=None, num_samples=None,
+                 num_parallel_workers=1, shuffle=None, sampler=None, num_shards=None, shard_id=None):
         super().__init__(num_parallel_workers)
         self.sampler = _select_sampler(num_samples, sampler, shuffle, num_shards, shard_id)
         if self.sampler is not None and hasattr(source, "__getitem__"):
@@ -2598,6 +2599,16 @@ class GeneratorDataset(SourceDataset):
         else:
             self.column_types = column_types
 
+        if schema is not None:
+            self.schema = schema
+            if not isinstance(schema, Schema):
+                self.schema = Schema(schema)
+            self.column_names = []
+            self.column_types = []
+            for col in self.schema.columns:
+                self.column_names.append(col["name"])
+                self.column_types.append(DataType(col["type"]))
+
     def get_args(self):
         args = super().get_args()
         args["source"] = self.source
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index 4f1b394634..dbe8e47d03 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -555,10 +555,15 @@ def check_generatordataset(method):
             except TypeError:
                 raise TypeError("source should be callable, iterable or random accessible")
 
-        # check column_names; required argument
+        # check column_names or schema; required argument
         column_names = param_dict.get('column_names')
-        if column_names is None:
-            raise ValueError("column_names is not provided.")
+        schema = param_dict.get('schema')
+        if column_names is None and schema is None:
+            raise ValueError("Neither columns_names not schema are provided.")
+
+        if schema is not None:
+            if not isinstance(schema, datasets.Schema) and not isinstance(schema, str):
+                raise ValueError("schema should be a path to schema file or a schema object.")
 
         # check optional argument
         nreq_param_int = ["num_samples", "num_parallel_workers", "num_shards", "shard_id"]
diff --git a/tests/ut/python/dataset/test_generator.py b/tests/ut/python/dataset/test_generator.py
index 4daf952eba..529788fcaa 100644
--- a/tests/ut/python/dataset/test_generator.py
+++ b/tests/ut/python/dataset/test_generator.py
@@ -580,6 +580,41 @@ def test_num_samples_underflow():
         count = count + 1
     assert count == 64
 
+
+def type_tester_with_type_check_2c_schema(t, c):
+    logger.info("Test with Type {}".format(t.__name__))
+
+    schema = ds.Schema()
+    schema.add_column("data0", c[0])
+    schema.add_column("data1", c[1])
+
+    # apply dataset operations
+    data1 = ds.GeneratorDataset((lambda: generator_with_type_2c(t)), schema=schema)
+
+    data1 = data1.batch(4)
+
+    i = 0
+    for item in data1.create_dict_iterator():  # each data is a dictionary
+        golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t)
+        assert np.array_equal(item["data0"], golden)
+        i = i + 4
+
+
+def test_schema():
+    """
+    Test 2 column Generator on different data type with type check with schema input
+    """
+    logger.info("Test 2 column Generator on all data types with type check")
+
+    np_types = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float32,
+                np.float64]
+    de_types = [mstype.int8, mstype.int16, mstype.int32, mstype.int64, mstype.uint8, mstype.uint16, mstype.uint32,
+                mstype.uint64, mstype.float32, mstype.float64]
+
+    for i in range(len(np_types)):
+        type_tester_with_type_check_2c_schema(np_types[i], [de_types[i], de_types[i]])
+
+
 def manual_test_keyborad_interrupt():
     """
     Test keyborad_interrupt
@@ -626,5 +661,6 @@ if __name__ == "__main__":
     test_sequential_sampler()
     test_distributed_sampler()
     test_random_sampler()
+    test_schema()
 
 

From d15bd04bfedff4674ae943a838fab2f01ec227f8 Mon Sep 17 00:00:00 2001
From: Adel Shafiei <adel.shafiei@huawei.com>
Date: Wed, 29 Apr 2020 15:42:36 -0400
Subject: [PATCH 216/242] added input validation to reject python op in C++
 uniform augmentation operations list

---
 .../dataset/kernels/image/uniform_aug_op.cc   | 12 ++--
 .../dataset/kernels/image/uniform_aug_op.h    |  2 +-
 .../dataset/transforms/vision/c_transforms.py | 13 +++-
 .../dataset/transforms/vision/validators.py   |  4 +-
 .../ut/python/dataset/test_uniform_augment.py | 63 ++++++++++++++++++-
 5 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc
index 1214345c37..cbc5aaa2e5 100644
--- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc
+++ b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.cc
@@ -25,18 +25,14 @@ UniformAugOp::UniformAugOp(py::list op_list, int32_t num_ops) : num_ops_(num_ops
   std::shared_ptr<TensorOp> tensor_op;
   // iterate over the op list, cast them to TensorOp and add them to tensor_op_list_
   for (auto op : op_list) {
-    if (py::isinstance<py::function>(op)) {
-      // python op
-      tensor_op = std::make_shared<PyFuncOp>(op.cast<py::function>());
-    } else if (py::isinstance<TensorOp>(op)) {
-      // C++ op
-      tensor_op = op.cast<std::shared_ptr<TensorOp>>();
-    }
+    // only C++ op is accepted
+    tensor_op = op.cast<std::shared_ptr<TensorOp>>();
     tensor_op_list_.insert(tensor_op_list_.begin(), tensor_op);
   }
 
   rnd_.seed(GetSeed());
 }
+
 // compute method to apply uniformly random selected augmentations from a list
 Status UniformAugOp::Compute(const std::vector<std::shared_ptr<Tensor>> &input,
                              std::vector<std::shared_ptr<Tensor>> *output) {
@@ -57,7 +53,7 @@ Status UniformAugOp::Compute(const std::vector<std::shared_ptr<Tensor>> &input,
       continue;
     }
 
-    // apply python/C++ op
+    // apply C++ ops (note: python OPs are not accepted)
     if (count == 1) {
       (**tensor_op).Compute(input, output);
     } else if (count % 2 == 0) {
diff --git a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h
index 336bc8c859..a70edc2777 100644
--- a/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h
+++ b/mindspore/ccsrc/dataset/kernels/image/uniform_aug_op.h
@@ -36,7 +36,7 @@ class UniformAugOp : public TensorOp {
   static const int kDefNumOps;
 
   // Constructor for UniformAugOp
-  // @param list op_list: list of candidate python operations
+  // @param list op_list: list of candidate C++ operations
   // @param list num_ops: number of augemtation operations to applied
   UniformAugOp(py::list op_list, int32_t num_ops);
 
diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py
index 1b495ffe92..1806d22446 100644
--- a/mindspore/dataset/transforms/vision/c_transforms.py
+++ b/mindspore/dataset/transforms/vision/c_transforms.py
@@ -455,8 +455,19 @@ class UniformAugment(cde.UniformAugOp):
     Tensor operation to perform randomly selected augmentation
 
     Args:
-        operations: list of python operations.
+        operations: list of C++ operations (python OPs are not accepted).
         NumOps (int): number of OPs to be selected and applied.
+
+    Examples:
+        >>> transforms_list = [c_transforms.RandomHorizontalFlip(),
+        >>>                    c_transforms.RandomVerticalFlip(),
+        >>>                    c_transforms.RandomColorAdjust(),
+        >>>                    c_transforms.RandomRotation(degrees=45)]
+        >>> uni_aug = c_transforms.UniformAugment(operations=transforms_list, num_ops=2)
+        >>> transforms_all = [c_transforms.Decode(), c_transforms.Resize(size=[224, 224]),
+        >>>                   uni_aug, F.ToTensor()]
+        >>> ds_ua = ds.map(input_columns="image",
+        >>>                operations=transforms_all, num_parallel_workers=1)
     """
 
     @check_uniform_augmentation
diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py
index 2c299b077b..96d0a3bfdc 100644
--- a/mindspore/dataset/transforms/vision/validators.py
+++ b/mindspore/dataset/transforms/vision/validators.py
@@ -837,8 +837,8 @@ def check_uniform_augmentation(method):
         if not isinstance(operations, list):
             raise ValueError("operations is not a python list")
         for op in operations:
-            if not callable(op) and not isinstance(op, TensorOp):
-                raise ValueError("non-callable op in operations list")
+            if not isinstance(op, TensorOp):
+                raise ValueError("operations list only accepts C++ operations.")
 
         kwargs["num_ops"] = num_ops
         kwargs["operations"] = operations
diff --git a/tests/ut/python/dataset/test_uniform_augment.py b/tests/ut/python/dataset/test_uniform_augment.py
index ea99056116..98c22fb3cb 100644
--- a/tests/ut/python/dataset/test_uniform_augment.py
+++ b/tests/ut/python/dataset/test_uniform_augment.py
@@ -163,7 +163,68 @@ def test_cpp_uniform_augment(plot=False, num_ops=2):
         mse[i] = np.mean((images_ua[i] - images_original[i]) ** 2)
     logger.info("MSE= {}".format(str(np.mean(mse))))
 
+def test_cpp_uniform_augment_exception_pyops(num_ops=2):
+    """
+    Test UniformAugment invalid op in operations
+    """
+    logger.info("Test CPP UniformAugment invalid OP exception")
+
+    transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]),
+                     C.RandomHorizontalFlip(),
+                     C.RandomVerticalFlip(),
+                     C.RandomColorAdjust(),
+                     C.RandomRotation(degrees=45),
+                     F.Invert()]
+
+    try:
+        uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops)
+
+    except BaseException as e:
+        logger.info("Got an exception in DE: {}".format(str(e)))
+        assert "operations" in str(e)
+
+def test_cpp_uniform_augment_exception_large_numops(num_ops=6):
+    """
+    Test UniformAugment invalid large number of ops
+    """
+    logger.info("Test CPP UniformAugment invalid large num_ops exception")
+
+    transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]),
+                     C.RandomHorizontalFlip(),
+                     C.RandomVerticalFlip(),
+                     C.RandomColorAdjust(),
+                     C.RandomRotation(degrees=45)]
+
+    try:
+        uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops)
+
+    except BaseException as e:
+        logger.info("Got an exception in DE: {}".format(str(e)))
+        assert "num_ops" in str(e)
+
+def test_cpp_uniform_augment_exception_nonpositive_numops(num_ops=0):
+    """
+    Test UniformAugment invalid non-positive number of ops
+    """
+    logger.info("Test CPP UniformAugment invalid non-positive num_ops exception")
+
+    transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]),
+                     C.RandomHorizontalFlip(),
+                     C.RandomVerticalFlip(),
+                     C.RandomColorAdjust(),
+                     C.RandomRotation(degrees=45)]
+
+    try:
+        uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops)
+
+    except BaseException as e:
+        logger.info("Got an exception in DE: {}".format(str(e)))
+        assert "num_ops" in str(e)
+
 if __name__ == "__main__":
     test_uniform_augment(num_ops=1)
     test_cpp_uniform_augment(num_ops=1)
-    
+    test_cpp_uniform_augment_exception_pyops(num_ops=1)
+    test_cpp_uniform_augment_exception_large_numops(num_ops=6)
+    test_cpp_uniform_augment_exception_nonpositive_numops(num_ops=0)
+

From 1a61073e314f41679c4e7a5d51c419d934c4663d Mon Sep 17 00:00:00 2001
From: meixiaowei <meixiaowei1@huawei.com>
Date: Thu, 30 Apr 2020 09:00:19 +0800
Subject: [PATCH 217/242] add relative and full path for the eval

---
 example/resnet101_imagenet2012/README.md    |  1 -
 example/resnet101_imagenet2012/run_infer.sh | 22 ++++++++++++++++-----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/example/resnet101_imagenet2012/README.md b/example/resnet101_imagenet2012/README.md
index 852326c9d6..8514b8759d 100644
--- a/example/resnet101_imagenet2012/README.md
+++ b/example/resnet101_imagenet2012/README.md
@@ -25,7 +25,6 @@ This is an example of training ResNet101 with ImageNet dataset in MindSpore.
 ```shell
 .
 ├── crossentropy.py                 # CrossEntropy loss function
-├── var_init.py                     # weight initial
 ├── config.py                       # parameter configuration
 ├── dataset.py                      # data preprocessing
 ├── eval.py                         # eval net
diff --git a/example/resnet101_imagenet2012/run_infer.sh b/example/resnet101_imagenet2012/run_infer.sh
index 5df659275e..b82427e15f 100755
--- a/example/resnet101_imagenet2012/run_infer.sh
+++ b/example/resnet101_imagenet2012/run_infer.sh
@@ -20,15 +20,27 @@ then
 exit 1
 fi
 
-if [ ! -d $1 ]
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+PATH1=$(get_real_path $1)
+PATH2=$(get_real_path $2)
+echo $PATH1
+echo $PATH2
+
+if [ ! -d $PATH1 ]
 then 
-    echo "error: DATASET_PATH=$1 is not a directory"
+    echo "error: DATASET_PATH=$PATH1 is not a directory"
 exit 1
 fi 
 
-if [ ! -f $2 ]
+if [ ! -f $PATH2 ]
 then 
-    echo "error: CHECKPOINT_PATH=$2 is not a file"
+    echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
 exit 1
 fi 
 
@@ -48,5 +60,5 @@ cp *.sh ./infer
 cd ./infer || exit
 env > env.log
 echo "start infering for device $DEVICE_ID"
-python eval.py --do_eval=True --dataset_path=$1 --checkpoint_path=$2 &> log &
+python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log &
 cd ..

From ebe5c2a610987747e0c9926d2d82497ee78273ab Mon Sep 17 00:00:00 2001
From: leonwanghui <wanghui71leon@gmail.com>
Date: Thu, 30 Apr 2020 09:25:37 +0800
Subject: [PATCH 218/242] Fix release package link in dockerfile

---
 docker/mindspore-cpu/0.2.0-alpha/Dockerfile | 2 +-
 docker/mindspore-gpu/0.2.0-alpha/Dockerfile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/mindspore-cpu/0.2.0-alpha/Dockerfile b/docker/mindspore-cpu/0.2.0-alpha/Dockerfile
index 9524cee745..dc69d21326 100644
--- a/docker/mindspore-cpu/0.2.0-alpha/Dockerfile
+++ b/docker/mindspore-cpu/0.2.0-alpha/Dockerfile
@@ -64,4 +64,4 @@ RUN mkdir -pv /root/.pip \
     && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
 
 # Install MindSpore cpu whl package
-RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
diff --git a/docker/mindspore-gpu/0.2.0-alpha/Dockerfile b/docker/mindspore-gpu/0.2.0-alpha/Dockerfile
index 9b59f845f7..a6eaf8382a 100644
--- a/docker/mindspore-gpu/0.2.0-alpha/Dockerfile
+++ b/docker/mindspore-gpu/0.2.0-alpha/Dockerfile
@@ -80,4 +80,4 @@ RUN cd /tmp \
     && rm -f /tmp/openmpi-3.1.5.tar.gz
 
 # Install MindSpore cuda-10.1 whl package
-RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
+RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/gpu/cuda-10.1/mindspore_gpu-0.2.0-cp37-cp37m-linux_x86_64.whl

From cb70daa3559330a699ed5ced41483e24e6df6856 Mon Sep 17 00:00:00 2001
From: caifubi <caifubi1@huawei.com>
Date: Thu, 30 Apr 2020 09:33:33 +0800
Subject: [PATCH 219/242] format code

---
 .../dataset/engine/datasetops/source/random_data_op.cc   | 9 +++------
 mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h    | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
index 0f4632d3b9..306f74ad6f 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
@@ -102,9 +102,8 @@ void RandomDataOp::Print(std::ostream &out, bool show_all) const {
     // Call the super class for displaying any common detailed info
     ParallelOp::Print(out, show_all);
     // Then show any custom derived-internal stuff
-    out << "\nTotal_rows: " << total_rows_
-        << "\nRows per buffer: " << rows_per_buffer_
-        << "\nSchema:\n" << *data_schema_ << "\n\n";
+    out << "\nTotal_rows: " << total_rows_ << "\nRows per buffer: " << rows_per_buffer_ << "\nSchema:\n"
+        << *data_schema_ << "\n\n";
   }
 }
 
@@ -143,8 +142,7 @@ Status RandomDataOp::GenerateSchema() {
 
     // Create the column descriptor
     std::string colName = "c" + std::to_string(i);
-    newCol = std::make_unique<ColDescriptor>(colName, DataType(newType), TensorImpl::kFlexible, rank,
-                                                   newShape.get());
+    newCol = std::make_unique<ColDescriptor>(colName, DataType(newType), TensorImpl::kFlexible, rank, newShape.get());
 
     data_schema_->AddColumn(*newCol);
   }
@@ -411,4 +409,3 @@ Status RandomDataOp::Reset() {
 }
 }  // namespace dataset
 }  // namespace mindspore
-
diff --git a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
index 920e28cb87..b3097c7031 100644
--- a/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
+++ b/mindspore/ccsrc/device/ascend/ascend_kernel_runtime.h
@@ -57,7 +57,7 @@ class AscendKernelRuntime : public KernelRuntime {
   void ReleaseDeviceRes() override;
   bool GraphWithEmptyTaskList(const session::KernelGraph *graph) const;
   bool CheckGraphIdValid(GraphId graph_id) const;
-  static void DebugTaskIdName(GraphId graph_id) ;
+  static void DebugTaskIdName(GraphId graph_id);
 
   rtContext_t rt_context_{nullptr};
   bool initialized_{false};

From c56fe3aa2dc8d231e8816e8d5ccfc21b13c3d771 Mon Sep 17 00:00:00 2001
From: ms_yan <6576637+ms_yan@user.noreply.gitee.com>
Date: Wed, 29 Apr 2020 13:41:36 +0800
Subject: [PATCH 220/242] modify take op with an operator

---
 .../dataset/engine/datasetops/take_op.cc      | 90 ++++++++-----------
 .../ccsrc/dataset/engine/datasetops/take_op.h | 19 +---
 tests/ut/python/dataset/test_take.py          | 46 ++++++++++
 3 files changed, 85 insertions(+), 70 deletions(-)

diff --git a/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc
index 872c4c27c5..7e6055027e 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/take_op.cc
@@ -17,6 +17,7 @@
 #include <utility>
 
 #include "common/utils.h"
+#include "dataset/core/config_manager.h"
 #include "dataset/engine/data_buffer.h"
 #include "dataset/engine/datasetops/take_op.h"
 #include "dataset/engine/db_connector.h"
@@ -25,7 +26,10 @@
 namespace mindspore {
 namespace dataset {
 // Builder constructor. Creates the builder object.
-TakeOp::Builder::Builder(int32_t count) : build_max_takes_(count) {}
+TakeOp::Builder::Builder(int32_t count) : build_max_takes_(count) {
+  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
+  builder_op_connector_size_ = cfg->op_connector_size();
+}
 
 Status TakeOp::Builder::SanityCheck() const {
   if (build_max_takes_ <= 0) {
@@ -38,12 +42,13 @@ Status TakeOp::Builder::SanityCheck() const {
 // The builder "build" method creates the final object.
 Status TakeOp::Builder::Build(std::shared_ptr<TakeOp> *ptr) {
   RETURN_IF_NOT_OK(SanityCheck());
-  *ptr = std::make_shared<TakeOp>(build_max_takes_);
+  *ptr = std::make_shared<TakeOp>(build_max_takes_, builder_op_connector_size_);
   return Status::OK();
 }
 
 // Constructor of the TakeOp.
-TakeOp::TakeOp(int32_t count) : PipelineOp(0), max_takes_(count), take_count_(0) {}
+TakeOp::TakeOp(int32_t count, int32_t op_connector_size)
+    : PipelineOp(op_connector_size), max_takes_(count), take_count_(0) {}
 
 // A print method typically used for debugging
 void TakeOp::Print(std::ostream &out, bool show_all) const {
@@ -62,59 +67,41 @@ void TakeOp::Print(std::ostream &out, bool show_all) const {
   }
 }
 
-// This function will be call muti times to returns the buffer, when meet required max take count or meet
-// EOF buffer then this will stop.
-Status TakeOp::GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id, bool retry_if_eoe) {
-  if (child_.empty()) {
-    RETURN_STATUS_UNEXPECTED("TakeOp can't be the leaf node.");
-  }
-
+// Main entry point for Take
+Status TakeOp::operator()() {
+  TaskManager::FindMe()->Post();
   std::unique_ptr<DataBuffer> buf;
+  RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf));
 
-  bool last_repeat = !BitTest(op_ctrl_flags_, kDeOpRepeated) || BitTest(op_ctrl_flags_, kDeOpLastRepeat);
-  if (take_count_ == max_takes_) {
-    if (state_ == OpState::kDeOpRunning) {
-      MS_LOG(DEBUG) << "Meet max count and push-back eoe buffer.";
-      auto eoe_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
-      *p_buffer = std::move(eoe_buffer);
-      state_ = OpState::kDeOpIdle;
-
-      // Reset the count and drain
-      if (!last_repeat) {
-        take_count_ = 0;
-        RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true));
-        while (!buf->eoe() && !buf->eof()) {
-          RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true));
-        }
+  while (buf->eof() == false) {
+    if (take_count_ == max_takes_) {
+      // Do drain Operation
+      while (!buf->eoe() && !buf->eof()) {
+        RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf));
       }
-    } else if (state_ == OpState::kDeOpIdle) {
-      MS_LOG(DEBUG) << "Meet max count and push-back eof buffer.";
-      auto eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
-      *p_buffer = std::move(eof_buffer);
+    }
+
+    // Loop until non EOE is received
+    if (buf->eoe()) {
       take_count_ = 0;
-    } else {
-      MS_LOG(WARNING) << "Invalid OpState: " << state_;
+      RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(buf)));
+      RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf));
+      continue;
     }
-    return Status::OK();
-  }
-  RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf, worker_id, true));
-  // Loop until non EOE is received
-  if (buf->eoe()) {
-    take_count_ = 0;
-    *p_buffer = std::move(buf);
-    return Status::OK();
-  }
 
-  // Check if the last buf is next eof
-  if (buf->eof()) {
-    *p_buffer = std::move(buf);
-    return Status::OK();
+    // Get buffer and push back when take_count is still small
+    if (take_count_ < max_takes_) {
+      std::unique_ptr<DataBuffer> p_buffer;
+      RETURN_IF_NOT_OK(FillBuffer(&buf, &p_buffer));
+      RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(p_buffer)));
+    }
+    RETURN_IF_NOT_OK(child_[0]->GetNextBuffer(&buf));
   }
 
-  // Get buffer and push back when take_count is still small
-  if (take_count_ < max_takes_) {
-    RETURN_IF_NOT_OK(FillBuffer(&buf, p_buffer));
-  }
+  take_count_ = 0;
+  MS_LOG(DEBUG) << "Meet the end and push-back eof buffer.";
+  auto eof_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF);
+  RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(eof_buffer)));
   return Status::OK();
 }
 
@@ -139,13 +126,6 @@ Status TakeOp::FillBuffer(std::unique_ptr<DataBuffer> *buffer, std::unique_ptr<D
   return Status::OK();
 }
 
-// Class functor operator () override.
-// Most dataset ops operate by launching a thread (see ExecutionTree).
-// However, the TakeOp is defined as a inlined operator, so it is invalid to launch the
-// functor since this op runs inlined inside another operator.  The function is overloaded to
-// ensure that it is not called by mistake (it will generate an error).
-Status TakeOp::operator()() { RETURN_STATUS_UNEXPECTED("Logic error. TakeOp is an inlined operator."); }
-
 Status TakeOp::PrepareNodePostAction() {
   RETURN_IF_NOT_OK(PipelineOp::PrepareNodePostAction());
   tree_->AddToRepeatStack(shared_from_this());
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/take_op.h b/mindspore/ccsrc/dataset/engine/datasetops/take_op.h
index 02218cf610..f70a1e91a3 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/take_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/take_op.h
@@ -45,6 +45,7 @@ class TakeOp : public PipelineOp {
 
    private:
     int32_t build_max_takes_;
+    int32_t builder_op_connector_size_;
 
     Status SanityCheck() const;
   };
@@ -52,7 +53,7 @@ class TakeOp : public PipelineOp {
   // Constructor of the TakeOp.
   // @note The builder class should be used to call it
   // @param count - The number of takes to do
-  explicit TakeOp(int32_t count);
+  explicit TakeOp(int32_t count, int32_t op_connector_size);
 
   // Destructor
   ~TakeOp() = default;
@@ -72,23 +73,11 @@ class TakeOp : public PipelineOp {
     return out;
   }
 
-  // Class functor operator () override.
-  // Most dataset ops operate by launching a thread (see ExecutionTree).
-  // However, the TakeOp is defined as a inlined operator, so it is invalid to launch the
-  // functor since this op runs inlined inside another operator.  The function is overloaded to
-  // ensure that it is not called by mistake (it will generate an error).
+  // All dataset ops operate by launching a thread (see ExecutionTree). This class functor will
+  // provide the master loop that drives the logic for performing the work
   // @return Status - The error code return
   Status operator()() override;
 
-  // Gets a buffer from the child node. The caller is typically our parent node.
-  // @note This function sets the `retryIfEoe` flag when popping from the child connector. This way,
-  // this function will retry to pop the connector again and will get the non-EOE buffer if any.
-  // @param p_buffer - output pointer to the buffer that it will fetch.
-  // @param worker_id - The worker id
-  // @param retry_if_eoe Set this flag to true to allow calling pop() again after the first pop() returns EOE.
-  // @return Status - The error code return
-  Status GetNextBuffer(std::unique_ptr<DataBuffer> *p_buffer, int32_t worker_id, bool retry_if_eoe) override;
-
   // During tree prepare phase, operators may have specific post-operations to perform depending on
   // their role.
   // @notes Derived versions of this function should always call it's superclass version first
diff --git a/tests/ut/python/dataset/test_take.py b/tests/ut/python/dataset/test_take.py
index ed71f67e26..64efc7a785 100644
--- a/tests/ut/python/dataset/test_take.py
+++ b/tests/ut/python/dataset/test_take.py
@@ -30,6 +30,12 @@ def generator_10():
         yield np.array([i]),
 
 
+def filter_func_ge(data):
+    if data > 3:
+        return False
+    return True
+
+
 def test_take_01():
     """
     Test take: origin there are 3 row, and take 1 row, in this case: will not meet eoe and eof
@@ -297,6 +303,44 @@ def test_take_16():
     assert sum([1 for _ in data1]) == 5
 
 
+def test_take_17():
+    """
+    Test take: take first, then do fiter operation
+    """
+    logger.info("test_take_17")
+    data1 = ds.GeneratorDataset(generator_10, ["data"])
+
+    data1 = data1.take(8)
+    data1 = data1.filter(predicate=filter_func_ge, num_parallel_workers=4)
+
+    # Here i refers to index, d refers to data element 
+    for i, d in enumerate(data1):
+        assert i == d[0][0]
+
+    assert sum([1 for _ in data1]) == 4
+
+
+def test_take_18():
+    """
+    Test take: take first, then do fiter, skip, batch and repeat operation
+    """
+    logger.info("test_take_18")
+    data1 = ds.GeneratorDataset(generator_10, ["data"])
+
+    data1 = data1.take(8)
+    data1 = data1.filter(predicate=filter_func_ge, num_parallel_workers=4)
+    data1 = data1.skip(2)
+
+    data1 = data1.batch(2)
+    data1 = data1.repeat(2)
+
+    # Here i refers to index, d refers to data element 
+    for i, d in enumerate(data1):
+        assert 2 == d[0][0]
+
+    assert sum([1 for _ in data1]) == 2
+
+
 if __name__ == '__main__':
     test_take_01()
     test_take_02()
@@ -314,4 +358,6 @@ if __name__ == '__main__':
     test_take_14()
     test_take_15()
     test_take_16()
+    test_take_17()
+    test_take_18()
     logger.info('== test take operation finished ==')
\ No newline at end of file

From fdbef206386d8be9ea36c5f7afb54f1d07d4517e Mon Sep 17 00:00:00 2001
From: wanghua <wanghua36@huawei.com>
Date: Thu, 30 Apr 2020 10:28:46 +0800
Subject: [PATCH 221/242] fix loss scale and task_sink_mode to false at the
 same time cause segmentation fault

---
 mindspore/ccsrc/session/ascend_session.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/ccsrc/session/ascend_session.cc b/mindspore/ccsrc/session/ascend_session.cc
index 519a5aa840..ace5e34335 100755
--- a/mindspore/ccsrc/session/ascend_session.cc
+++ b/mindspore/ccsrc/session/ascend_session.cc
@@ -990,7 +990,7 @@ void AscendSession::MergeGraphExecOrder() {
     auto context_ptr = MsContext::GetInstance();
     MS_EXCEPTION_IF_NULL(context_ptr);
     if (!context_ptr->enable_task_sink()) {
-      MS_LOG(INFO) << "Control sink network should run with task-sink mode!";
+      MS_LOG(EXCEPTION) << "Control sink network should run with task-sink mode!";
     }
   }
   // if first graph is common,the final graph has no label,then set the stream of final graph same with the first graph

From 353cbc99ff4912a070b84bdf5b7098fd64eb3ef2 Mon Sep 17 00:00:00 2001
From: Zhang Qinghua <zhangqinghua3@huawei.com>
Date: Thu, 30 Apr 2020 10:38:05 +0800
Subject: [PATCH 222/242] Remove redundant process of keeping roots.

---
 mindspore/ccsrc/ir/func_graph_cloner.h | 2 +-
 mindspore/ccsrc/optimizer/optimizer.h  | 7 -------
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/mindspore/ccsrc/ir/func_graph_cloner.h b/mindspore/ccsrc/ir/func_graph_cloner.h
index 426cf447a3..10b4b0111e 100644
--- a/mindspore/ccsrc/ir/func_graph_cloner.h
+++ b/mindspore/ccsrc/ir/func_graph_cloner.h
@@ -59,7 +59,7 @@ class Cloner {
 
   // Map of replicate nodes and graphs
   std::unordered_map<AnfNodePtr, AnfNodePtr> *cloned_node() { return &repl_node_; }
-  std::unordered_map<FuncGraphPtr, FuncGraphPtr> cloned_func_graph() { return repl_func_graph_; }
+  std::unordered_map<FuncGraphPtr, FuncGraphPtr> &cloned_func_graph() { return repl_func_graph_; }
 
   // Scope of cloned graphs
   void set_scope(const ScopePtr &scope) { scope_ = scope; }
diff --git a/mindspore/ccsrc/optimizer/optimizer.h b/mindspore/ccsrc/optimizer/optimizer.h
index cadbde0842..1a0ddbc65f 100644
--- a/mindspore/ccsrc/optimizer/optimizer.h
+++ b/mindspore/ccsrc/optimizer/optimizer.h
@@ -185,13 +185,6 @@ class Optimizer : public std::enable_shared_from_this<Optimizer> {
         break;
       }
     }
-
-    auto keep_root = [&func_graph, this]() {
-      std::vector<FuncGraphPtr> func_graphs;
-      func_graphs.push_back(func_graph);
-      resource_->manager()->KeepRoots(func_graphs);
-    };
-    use_profile ? WITH(MsProfile::GetProfile()->Step("keep_roots")) keep_root : keep_root();
     return func_graph;
   }
 

From 1779479d4b9aa7a31b91255f10207b1935b8b677 Mon Sep 17 00:00:00 2001
From: leonwanghui <wanghui71leon@gmail.com>
Date: Thu, 30 Apr 2020 10:43:47 +0800
Subject: [PATCH 223/242] Fix release package link in README.md

Signed-off-by: leonwanghui <wanghui71leon@gmail.com>
---
 README.md | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index e0ca8a9417..24108cf3fe 100644
--- a/README.md
+++ b/README.md
@@ -76,13 +76,36 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.
 
     ```
-    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
+    pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
     ```
 
 2. Run the following command to verify the install.
 
+    ```python
+    import numpy as np
+    import mindspore.context as context
+    import mindspore.nn as nn
+    from mindspore import Tensor
+    from mindspore.ops import operations as P
+
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+
+    class Mul(nn.Cell):
+        def __init__(self):
+            super(Mul, self).__init__()
+            self.mul = P.Mul()
+
+        def construct(self, x, y):
+            return self.mul(x, y)
+
+    x = Tensor(np.array([1.0, 2.0, 3.0]).astype(np.float32))
+    y = Tensor(np.array([4.0, 5.0, 6.0]).astype(np.float32))
+
+    mul = Mul()
+    print(mul(x, y))
+    ```
     ```
-    python -c 'import mindspore'
+    [ 4. 10. 18.]
     ```
 
 ### From Source
@@ -111,7 +134,7 @@ currently the containerized build options are supported as follows:
     For `CPU` backend, you can directly pull and run the latest stable image using the below command:
     ```
     docker pull mindspore/mindspore-cpu:0.2.0-alpha
-    docker run -it mindspore/mindspore-cpu:0.2.0-alpha python -c 'import mindspore'
+    docker run -it mindspore/mindspore-cpu:0.2.0-alpha /bin/bash
     ```
 
 * GPU
@@ -160,7 +183,7 @@ currently the containerized build options are supported as follows:
     ```
 
 If you want to learn more about the building process of MindSpore docker images,
-please check out `docker` folder for the details.
+please check out [docker](docker/README.md) repo for the details.
 
 ## Quickstart
 

From 5a6540450ea88d61b11a4de62bf810212d5550a5 Mon Sep 17 00:00:00 2001
From: yao_yf <yaoyifan1@huawei.com>
Date: Wed, 29 Apr 2020 16:06:13 +0800
Subject: [PATCH 224/242] use param name as the key of strategy checkpoint

---
 mindspore/ccsrc/parallel/ops_info/ops_utils.h |  2 ++
 .../ccsrc/parallel/step_auto_parallel.cc      |  3 +--
 mindspore/ccsrc/parallel/step_parallel.cc     | 26 +++++++++----------
 mindspore/ccsrc/parallel/step_parallel.h      |  2 +-
 .../parallel_strategy_checkpoint.h            |  1 -
 .../parallel/test_strategy_checkpoint.py      |  8 ++++++
 6 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/mindspore/ccsrc/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/parallel/ops_info/ops_utils.h
index bdae87858d..e0b62eb233 100644
--- a/mindspore/ccsrc/parallel/ops_info/ops_utils.h
+++ b/mindspore/ccsrc/parallel/ops_info/ops_utils.h
@@ -61,6 +61,8 @@ constexpr char CROSS_BATCH[] = "cross_batch";
 constexpr char STEP_PARALLEL_BEGIN[] = "step_parallel_begin";
 constexpr char STEP_PARALLEL_END[] = "step_parallel_end";
 constexpr char STEP_AUTO_PARALLEL_BEGIN[] = "step_auto_parallel_begin.dot";
+constexpr char REQUIRES_GRAD[] = "requires_grad";
+constexpr char PARAM_NAME[] = "name";
 
 constexpr char RELU_TYPE[] = "relu";
 constexpr char RELU6_TYPE[] = "relu6";
diff --git a/mindspore/ccsrc/parallel/step_auto_parallel.cc b/mindspore/ccsrc/parallel/step_auto_parallel.cc
index f0be47642e..b16108a279 100644
--- a/mindspore/ccsrc/parallel/step_auto_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_auto_parallel.cc
@@ -387,8 +387,7 @@ OperatorInfoPtr CreateTheOperatorInfo(const PrimitivePtr &prim, const CNodePtr &
   operator_info->set_outputs_dtype(cnode->Type());
   operator_info->set_cnode(cnode);
   // key of strategy map
-  std::string instance_name = prim->instance_name();
-  std::string strategy_key_name = cnode->scope()->name() + std::string(CONNSYMBOL) + instance_name;
+  std::string strategy_key_name = NodeParameterName(cnode);
   bool load_strategy_from_ckpt =
     StrategyCheckpoint::GetInstance().LoadCheckPointOn() && stra_map->find(strategy_key_name) != stra_map->end();
   // If no strategy has been configured for this operator, then candidate strategies are generated for
diff --git a/mindspore/ccsrc/parallel/step_parallel.cc b/mindspore/ccsrc/parallel/step_parallel.cc
index 62fb96c297..21a515ff85 100644
--- a/mindspore/ccsrc/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/parallel/step_parallel.cc
@@ -1423,11 +1423,9 @@ void ExtractInformation(const std::vector<AnfNodePtr> &all_nodes) {
       }
       // load strategy checkpoint
       // key of strategy map
-      std::string instance_name = prim->instance_name();
-      std::string strategy_key_name = cnode->scope()->name() + std::string(CONNSYMBOL) + instance_name;
+      std::string strategy_key_name = NodeParameterName(cnode);
       bool load_strategy_from_ckpt =
         StrategyCheckpoint::GetInstance().LoadCheckPointOn() && stra_map.find(strategy_key_name) != stra_map.end();
-
       if (!StrategyFound(attrs) && !load_strategy_from_ckpt) {
         MS_LOG(INFO) << "ExtractInformation: the strategy of node " << node->ToString() << " prim " << prim->name()
                      << " is empty, using batch parallel";
@@ -2038,17 +2036,20 @@ void HandleSymbolicKeyInstance(const FuncGraphPtr &root, const std::vector<AnfNo
   }
 }
 
-bool NodeWithParameter(const CNodePtr &node) {
+std::string NodeParameterName(const CNodePtr &node) {
   std::vector<AnfNodePtr> node_inputs{node->inputs()};
   for (auto input : node_inputs) {
     if (input->isa<Parameter>()) {
       auto input_parameter = input->cast<ParameterPtr>();
       if (input_parameter->has_default()) {
-        return py::cast<bool>(parse::python_adapter::GetPyObjAttr(input_parameter->default_param(), "requires_grad"));
+        if (py::cast<bool>(parse::python_adapter::GetPyObjAttr(input_parameter->default_param(), REQUIRES_GRAD))) {
+          return py::cast<std::string>(
+            parse::python_adapter::GetPyObjAttr(input_parameter->default_param(), PARAM_NAME));
+        }
       }
     }
   }
-  return false;
+  return "";
 }
 
 void CheckpointStrategy(const FuncGraphPtr &func_graph) {
@@ -2060,21 +2061,20 @@ void CheckpointStrategy(const FuncGraphPtr &func_graph) {
   for (auto &node : all_nodes) {
     MS_EXCEPTION_IF_NULL(node);
     auto cnode = node->cast<CNodePtr>();
-    if ((cnode == nullptr) || !IsValueNode<Primitive>(cnode->input(0)) || !NodeWithParameter(cnode)) {
+    if ((cnode == nullptr) || !IsValueNode<Primitive>(cnode->input(0))) {
+      continue;
+    }
+    std::string param_name = NodeParameterName(cnode);
+    if (param_name.empty()) {
       continue;
     }
     PrimitivePtr prim = GetValueNode<PrimitivePtr>(cnode->input(0));
     MS_EXCEPTION_IF_NULL(prim);
     OperatorInfoPtr operator_info = cnode->operator_info();
     if (operator_info) {
-      if (prim->instance_name().empty()) {
-        MS_LOG(EXCEPTION) << "Node with parameter to checkpoint strategy needs instance name";
-      }
-      std::string instance_name = prim->instance_name();
       StrategyPtr strategyPtr = operator_info->strategy();
       MS_EXCEPTION_IF_NULL(node->scope());
-      std::string node_name = node->scope()->name() + std::string(CONNSYMBOL) + instance_name;
-      stra_map[node_name] = strategyPtr;
+      stra_map[param_name] = strategyPtr;
     }
   }
   if (StrategyCheckpoint::GetInstance().Save(stra_map) != SUCCESS) {
diff --git a/mindspore/ccsrc/parallel/step_parallel.h b/mindspore/ccsrc/parallel/step_parallel.h
index c26f65ec65..93c3ed798c 100644
--- a/mindspore/ccsrc/parallel/step_parallel.h
+++ b/mindspore/ccsrc/parallel/step_parallel.h
@@ -135,7 +135,7 @@ void ReshapeInit(const std::vector<AnfNodePtr> &all_nodes);
 void ParallelCommunication(const FuncGraphPtr &root, const std::vector<AnfNodePtr> &all_nodes,
                            const FuncGraphManagerPtr &manager);
 
-bool NodeWithParameter(const CNodePtr &node);
+std::string NodeParameterName(const CNodePtr &node);
 
 void CheckpointStrategy(const FuncGraphPtr &func_graph);
 
diff --git a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
index 0cf6229fa3..a758a9e7bb 100644
--- a/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
+++ b/mindspore/ccsrc/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h
@@ -25,7 +25,6 @@
 
 namespace mindspore {
 namespace parallel {
-
 using StrategyMap = std::unordered_map<std::string, StrategyPtr>;
 class StrategyCheckpoint {
  public:
diff --git a/tests/ut/python/parallel/test_strategy_checkpoint.py b/tests/ut/python/parallel/test_strategy_checkpoint.py
index 89b6dd1dbb..d95b13f435 100644
--- a/tests/ut/python/parallel/test_strategy_checkpoint.py
+++ b/tests/ut/python/parallel/test_strategy_checkpoint.py
@@ -59,6 +59,7 @@ def test_six_matmul_save():
             self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3")
             self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4")
             self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5")
+            self.weight6 = Parameter(Tensor(np.ones([32, 128]), dtype=ms.float32), name="weight6")
 
         def construct(self, x1, x6):
             out = self.matmul1(x1, self.weight1)
@@ -66,6 +67,7 @@ def test_six_matmul_save():
             out = self.matmul3(out, self.weight3)
             out = self.matmul4(out, self.weight4)
             out = self.matmul5(out, self.weight5)
+            out = out + self.weight6
             out = self.matmul6(out, x6)
             return out
 
@@ -118,12 +120,14 @@ def test_six_matmul_load():
             self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3")
             self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4")
             self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5")
+            self.weight6 = Parameter(Tensor(np.ones([32, 128]), dtype=ms.float32), name="weight6")
 
         def construct(self, x1, x6, x7):
             out = self.matmul1(x1, self.weight1)
             out = self.matmul3(out, self.weight3)
             out = self.matmul4(out, self.weight4)
             out = self.matmul5(out, self.weight5)
+            out = out + self.weight6
             out = self.matmul6(out, x6)
             out = self.matmul7(out, x7)
             return out
@@ -179,6 +183,7 @@ def test_six_matmul_save_auto():
             self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3")
             self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4")
             self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5")
+            self.weight6 = Parameter(Tensor(np.ones([32, 128]), dtype=ms.float32), name="weight6")
 
         def construct(self, x1, x6):
             out = self.matmul1(x1, self.weight1)
@@ -186,6 +191,7 @@ def test_six_matmul_save_auto():
             out = self.matmul3(out, self.weight3)
             out = self.matmul4(out, self.weight4)
             out = self.matmul5(out, self.weight5)
+            out = out + self.weight6
             out = self.matmul6(out, x6)
             return out
 
@@ -232,12 +238,14 @@ def test_six_matmul_load_auto():
             self.weight3 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight3")
             self.weight4 = Parameter(Tensor(np.ones([128, 64]), dtype=ms.float32), name="weight4")
             self.weight5 = Parameter(Tensor(np.ones([64, 128]), dtype=ms.float32), name="weight5")
+            self.weight6 = Parameter(Tensor(np.ones([32, 128]), dtype=ms.float32), name="weight6")
 
         def construct(self, x1, x6, x7):
             out = self.matmul1(x1, self.weight1)
             out = self.matmul3(out, self.weight3)
             out = self.matmul4(out, self.weight4)
             out = self.matmul5(out, self.weight5)
+            out = out + self.weight6
             out = self.matmul6(out, x6)
             out = self.matmul7(out, x7)
             return out

From 0c6cf98db08469e5d3f1f590f5631c56216dfa3c Mon Sep 17 00:00:00 2001
From: buxue <yiren19920727@163.com>
Date: Thu, 30 Apr 2020 10:42:31 +0800
Subject: [PATCH 225/242] fix bug of brpop of FloorMod

---
 mindspore/ops/_grad/grad_math_ops.py | 9 ++++-----
 tests/ut/python/ops/test_ops.py      | 5 ++---
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/mindspore/ops/_grad/grad_math_ops.py b/mindspore/ops/_grad/grad_math_ops.py
index 2f39fe8745..1663c231cc 100755
--- a/mindspore/ops/_grad/grad_math_ops.py
+++ b/mindspore/ops/_grad/grad_math_ops.py
@@ -255,13 +255,10 @@ def get_bprop_floordiv(self):
 @bprop_getters.register(P.FloorMod)
 def get_bprop_floormod(self):
     """Grad definition for `FloorMod` operation."""
-    div_op = P.FloorMod()
-    neg = P.Neg()
-    mul_op = P.Mul()
 
     def bprop(x, y, out, dout):
-        bc_x = div_op(dout, y)
-        bc_y = neg(mul_op(bc_x, out))
+        bc_x = dout
+        bc_y = -dout * (x // y)
         return binop_grad_common(x, y, bc_x, bc_y)
     return bprop
 
@@ -412,6 +409,7 @@ def get_bprop_reducesum(self):
 def get_bprop_cumsum(self):
     """Grad definition for `CumSum` operation."""
     cumsum = P.CumSum(exclusive=self.exclusive, reverse=not self.reverse)
+
     def bprop(x, axis, out, dout):
         return cumsum(dout, axis), zeros_like(axis)
     return bprop
@@ -787,6 +785,7 @@ def get_bprop_atan2(self):
     """Generate bprop for Atan2"""
 
     square = P.Square()
+
     def bprop(x, y, out, dout):
         tmp = dout / (square(x) + square(y))
         dx = tmp * y
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 68ff816fb3..d60503158f 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -351,9 +351,8 @@ test_case_math_ops = [
         'skip': ['backward']}),
     ('FloorMod', {
         'block': P.FloorMod(),
-        'desc_inputs': [Tensor(np.random.rand(4).astype(np.float16)),
-                        Tensor(np.random.rand(4).astype(np.float16))],
-        'skip': ['backward']}),
+        'desc_inputs': [[3, 4, 5], [2, 3, 4, 5]],
+        'desc_bprop': [[2, 3, 4, 5]]}),
     ('identity', {
         'block': ops.functional.identity,
         'desc_inputs': [[2, 2]],

From e6beba0b253c2845818b3bafe5a12f3ab789d919 Mon Sep 17 00:00:00 2001
From: changzherui <changzherui1@huawei.com>
Date: Thu, 30 Apr 2020 11:06:01 +0800
Subject: [PATCH 226/242] modify timemonitor callback

---
 mindspore/train/callback.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/mindspore/train/callback.py b/mindspore/train/callback.py
index 0b5e05e6a0..50fe5fcb26 100644
--- a/mindspore/train/callback.py
+++ b/mindspore/train/callback.py
@@ -365,7 +365,7 @@ class Callback:
         >>>         print(cb_params.cur_step_num)
         >>>
         >>> print_cb = Print_info()
-        >>> model.train(epoch, dataset, callback=print_cb)
+        >>> model.train(epoch, dataset, callbacks=print_cb)
     """
     def __init__(self):
         pass
@@ -695,10 +695,3 @@ class TimeMonitor(Callback):
         epoch_mseconds = (time.time() - self.epoch_time) * 1000
         per_step_mseconds = epoch_mseconds / self.data_size
         print("epoch time: {0}, per step time: {1}".format(epoch_mseconds, per_step_mseconds), flush=True)
-
-    def step_begin(self, run_context):
-        self.step_time = time.time()
-
-    def step_end(self, run_context):
-        step_mseconds = (time.time() - self.step_time) * 1000
-        print('step time', step_mseconds, flush=True)

From d52277a9a45ed79a4bbd2187708ad50c3d38cb63 Mon Sep 17 00:00:00 2001
From: fary86 <fary.fanrui@huawei.com>
Date: Thu, 30 Apr 2020 11:12:29 +0800
Subject: [PATCH 227/242] Fix checking bug of ApplyCenteredRMSProp

---
 mindspore/ops/operations/nn_ops.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 66656b559e..785fafe13b 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -1658,9 +1658,11 @@ class ApplyCenteredRMSProp(PrimitiveWithInfer):
                 "mean_square": mean_square_dtype, "moment": moment_dtype, "grad": grad_dtype}
         validator.check_tensor_type_same(args, mstype.number_type, self.name)
 
-        args = {"learning_rate": learning_rate_dtype, "rho": rho_dtype, 'momentum': momentum_dtype,
-                "epsilon": epsilon_dtype}
-        validator.check_scalar_or_tensor_type_same(args, [mstype.float16, mstype.float32], self.name)
+        valid_types = [mstype.float16, mstype.float32]
+        args_rho = {"rho": rho_dtype, 'momentum': momentum_dtype, "epsilon": epsilon_dtype}
+        validator.check_type_same(args_rho, valid_types, self.name)
+        args_lr = {"learning_rate": learning_rate_dtype, "rho": rho_dtype}
+        validator.check_scalar_or_tensor_type_same(args_lr, valid_types, self.name, allow_mix=True)
         return var_dtype
 
 

From 1d7fe758a03deed0fa61d9e593e573b64cdf061a Mon Sep 17 00:00:00 2001
From: VectorSL <shiliang10@huawei.com>
Date: Tue, 28 Apr 2020 11:24:03 +0800
Subject: [PATCH 228/242] gpu add test for amp

---
 .../kernel/gpu/arrays/slice_gpu_kernel.h      |  6 ++--
 tests/st/networks/test_gpu_resnet.py          | 28 +++++++++++++++++--
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h
index 96e899da60..091a150fcb 100644
--- a/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/arrays/slice_gpu_kernel.h
@@ -129,10 +129,10 @@ class SliceGpuFwdKernel : public GpuKernel {
     }
     begin_ = GetAttr<std::vector<int>>(kernel_node, "begin");
     for (size_t i = 0; i < input_shape.size(); i++) {
-      if ((begin_[i] > 0 && (begin_[i] >= SizeToInt(input_shape[i]))) ||
+      if ((begin_[i] > 0 && (begin_[i] > SizeToInt(input_shape[i]))) ||
           (begin_[i] < 0 && (std::abs(begin_[i]) > SizeToInt(input_shape[i])))) {
-        MS_LOG(ERROR) << "Error input, out of bounds " << input_shape[i] << " in axis " << i << ".";
-        return false;
+        MS_LOG(INFO) << "Input out of bounds " << input_shape[i] << " in axis " << i << ".";
+        begin_[i] = 0;
       }
     }
     return true;
diff --git a/tests/st/networks/test_gpu_resnet.py b/tests/st/networks/test_gpu_resnet.py
index a5f450d5e3..a045f97501 100644
--- a/tests/st/networks/test_gpu_resnet.py
+++ b/tests/st/networks/test_gpu_resnet.py
@@ -32,9 +32,7 @@ from mindspore.nn.optim import Momentum
 from mindspore.ops import operations as P
 from mindspore.nn import TrainOneStepCell, WithLossCell
 from mindspore.nn import Dense
-from mindspore.common.initializer import initializer
-
-context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+from mindspore import amp
 
 
 def random_normal_init(shape, mean=0.0, stddev=0.01, seed=None):
@@ -326,6 +324,7 @@ def resnet50(num_classes):
 @pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_trainTensor(num_classes=10, epoch=8, batch_size=1):
+    context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
     net = resnet50(num_classes)
     lr = 0.1
     momentum = 0.9
@@ -341,3 +340,26 @@ def test_trainTensor(num_classes=10, epoch=8, batch_size=1):
         loss = train_network(data, label)
         losses.append(loss)
     assert(losses[-1].asnumpy() < 1)
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16):
+    context.set_context(mode=context.GRAPH_MODE, device_target="GPU", enable_mem_reuse=False,
+                        enable_dynamic_memory=False)
+    net = resnet50(num_classes)
+    lr = 0.1
+    momentum = 0.9
+    optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    train_network = amp.build_train_network(net, optimizer, criterion, level="O2")
+    train_network.set_train()
+    losses = []
+    for i in range(0, epoch):
+        data = Tensor(np.ones([batch_size, 3, 224, 224]).astype(np.float32) * 0.01)
+        label = Tensor(np.ones([batch_size]).astype(np.int32))
+        loss = train_network(data, label)
+        losses.append(loss)
+    assert(losses[-1][0].asnumpy() < 1)
+    assert(losses[-1][1].asnumpy() == False)
+    assert(losses[-1][2].asnumpy() > 1)

From 157710ca0fb5573261f8cb0ee81d733d1a8a3737 Mon Sep 17 00:00:00 2001
From: Wei Luning <weiluning@huawei.com>
Date: Wed, 29 Apr 2020 10:55:06 +0800
Subject: [PATCH 229/242] bugfix* fix bug in output tuple of tuple.* check
 kRWWrite input no-variable* input x of ScatterNdUpdate should be a parameter
 node

---
 example/resnet101_imagenet2012/train.py       |  1 -
 .../ccsrc/operator/composite/do_signature.cc  |  2 ++
 mindspore/common/api.py                       |  9 ++++----
 mindspore/nn/optim/adam.py                    | 17 +++++++-------
 mindspore/nn/optim/lamb.py                    | 22 +++++++++---------
 mindspore/ops/operations/array_ops.py         |  7 ++++++
 tests/st/ops/gpu/test_assign_add_op.py        | 23 +++++++++++--------
 tests/st/ops/gpu/test_assign_op.py            | 15 ++++++------
 tests/ut/python/ops/test_math_ops.py          | 12 ++++++++++
 tests/ut/python/ops/test_momentum.py          |  3 +--
 tests/ut/python/ops/test_nn_ops.py            | 12 +++++-----
 tests/ut/python/ops/test_ops.py               | 20 ++++++----------
 12 files changed, 80 insertions(+), 63 deletions(-)

diff --git a/example/resnet101_imagenet2012/train.py b/example/resnet101_imagenet2012/train.py
index 3d0a23f93a..6a89a212ca 100755
--- a/example/resnet101_imagenet2012/train.py
+++ b/example/resnet101_imagenet2012/train.py
@@ -14,7 +14,6 @@
 # ============================================================================
 """train_imagenet."""
 import os
-import math
 import argparse
 import random
 import numpy as np
diff --git a/mindspore/ccsrc/operator/composite/do_signature.cc b/mindspore/ccsrc/operator/composite/do_signature.cc
index c3fe45a48a..1098ed1520 100644
--- a/mindspore/ccsrc/operator/composite/do_signature.cc
+++ b/mindspore/ccsrc/operator/composite/do_signature.cc
@@ -195,6 +195,8 @@ AnfNodePtr BuildNewCNode(const FuncGraphPtr &func_graph, const std::string &func
         param = func_graph->NewCNode({NewValueNode(prim::kPrimGetRefKey), param});
       }
       // If sig is SignatureEnumRW::kRWRef, not do anything.
+    } else if (sig == SignatureEnumRW::kRWWrite && type->type_id() != kObjectTypeRefKey) {
+      MS_EXCEPTION(TypeError) << "Function " << func_name << "'s input " << i << " should be a Parameter.";
     }
     // add cast op here
     if (assign_source != nullptr && sig != SignatureEnumRW::kRWWrite) {
diff --git a/mindspore/common/api.py b/mindspore/common/api.py
index 455e7a7f4f..3710e40996 100644
--- a/mindspore/common/api.py
+++ b/mindspore/common/api.py
@@ -70,12 +70,11 @@ def _wrap_func(fn):
         def _convert_data(data):
             if isinstance(data, Tensor) and not isinstance(data, MsTensor):
                 return MsTensor(data)
+            if isinstance(data, tuple):
+                return tuple(_convert_data(x) for x in data)
+            if isinstance(data, list):
+                return list(_convert_data(x) for x in data)
             return data
-
-        if isinstance(results, tuple):
-            return tuple(_convert_data(x) for x in results)
-        if isinstance(results, list):
-            return list(_convert_data(x) for x in results)
         return _convert_data(results)
 
     return wrapper
diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index 87c46380f6..1a386556d9 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -57,21 +57,22 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, param, m, v, grad
     op_reshape = P.Reshape()
     op_shape = P.Shape()
 
-    param = op_cast(param, mstype.float32)
-    m = op_cast(m, mstype.float32)
-    v = op_cast(v, mstype.float32)
-    gradient = op_cast(gradient, mstype.float32)
+    param_fp32 = op_cast(param, mstype.float32)
+    m_fp32 = op_cast(m, mstype.float32)
+    v_fp32 = op_cast(v, mstype.float32)
+    gradient_fp32 = op_cast(gradient, mstype.float32)
 
-    next_m = op_mul(beta1, m) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient)
+    next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta1, gradient_fp32)
 
-    next_v = op_mul(beta2, v) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32) - beta2, op_square(gradient))
+    next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(F.tuple_to_array((1.0,)), mstype.float32)
+                                            - beta2, op_square(gradient_fp32))
 
     update = next_m / (op_sqrt(next_v) + eps)
     if decay_flag:
-        update = update + op_mul(weight_decay_tensor, param)
+        update = update + op_mul(weight_decay_tensor, param_fp32)
 
     update_with_lr = op_mul(lr, update)
-    next_param = param - op_reshape(update_with_lr, op_shape(param))
+    next_param = param_fp32 - op_reshape(update_with_lr, op_shape(param_fp32))
 
     next_v = F.depend(next_v, F.assign(param, next_param))
     next_v = F.depend(next_v, F.assign(m, next_m))
diff --git a/mindspore/nn/optim/lamb.py b/mindspore/nn/optim/lamb.py
index cbeb6fa674..e026b1c560 100755
--- a/mindspore/nn/optim/lamb.py
+++ b/mindspore/nn/optim/lamb.py
@@ -67,23 +67,23 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para
     op_fill = P.Fill()
     op_dtype = P.DType()
 
-    param = op_cast(param, mstype.float32)
-    m = op_cast(m, mstype.float32)
-    v = op_cast(v, mstype.float32)
-    gradient = op_cast(gradient, mstype.float32)
+    param_fp32 = op_cast(param, mstype.float32)
+    m_fp32 = op_cast(m, mstype.float32)
+    v_fp32 = op_cast(v, mstype.float32)
+    gradient_fp32 = op_cast(gradient, mstype.float32)
 
-    next_m = op_mul(beta1, m) + op_mul(op_cast(num_one, mstype.float32) - beta1, gradient)
+    next_m = op_mul(beta1, m_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta1, gradient_fp32)
 
-    next_v = op_mul(beta2, v) + op_mul(op_cast(num_one, mstype.float32) - beta2, op_square(gradient))
+    next_v = op_mul(beta2, v_fp32) + op_mul(op_cast(num_one, mstype.float32) - beta2, op_square(gradient_fp32))
 
     next_mm = next_m / (op_cast(num_one, mstype.float32)
                         - op_pow(beta1, op_cast(global_step + num_one, mstype.float32)))
     next_vv = next_v / (op_cast(num_one, mstype.float32) -
                         op_pow(beta2, op_cast(global_step + num_one, mstype.float32)))
-    w_norm = op_norm(param)
-    g_norm = op_norm(gradient)
+    w_norm = op_norm(param_fp32)
+    g_norm = op_norm(gradient_fp32)
 
-    g_norm_hat = op_norm(op_mul(next_mm, op_rsqrt(next_vv + eps)) + weight_decay_tensor * param)
+    g_norm_hat = op_norm(op_mul(next_mm, op_rsqrt(next_vv + eps)) + weight_decay_tensor * param_fp32)
     zeros = F.zeros_like_tensor(w_norm)
     ones = op_fill(op_dtype(w_norm), op_shape(w_norm), 1.0)
     trust_ratio = op_select(
@@ -95,11 +95,11 @@ def _update_run_op(beta1, beta2, eps, lr, weight_decay_tensor, global_step, para
     update = next_mm / (op_sqrt(next_vv) + eps)
 
     if decay_flag:
-        update = update + op_mul(weight_decay_tensor, param)
+        update = update + op_mul(weight_decay_tensor, param_fp32)
 
     update_with_lr = op_mul(op_mul(trust_ratio, lr), update)
 
-    next_param = param - op_reshape(update_with_lr, op_shape(param))
+    next_param = param_fp32 - op_reshape(update_with_lr, op_shape(param_fp32))
 
     next_v = F.depend(next_v, F.assign(param, next_param))
     next_v = F.depend(next_v, F.assign(m, next_m))
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index b69b083e03..f611bc9617 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -24,6 +24,8 @@ import itertools
 import numbers
 import numpy as np
 
+from ..._c_expression import signature_rw as sig_rw
+from ..._c_expression import signature_kind as sig_kind
 from ..._checkparam import Validator as validator
 from ..._checkparam import Rel
 from ...common import dtype as mstype
@@ -1965,6 +1967,11 @@ class ScatterNdUpdate(PrimitiveWithInfer):
         >>> op = P.ScatterNdUpdate()
         >>> output = op(input_x, indices, update)
     """
+    __mindspore_signature__ = (
+        ('input_x', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD),
+        ('indices', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD),
+        ('value', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD)
+    )
 
     @prim_attr_register
     def __init__(self, use_locking=True):
diff --git a/tests/st/ops/gpu/test_assign_add_op.py b/tests/st/ops/gpu/test_assign_add_op.py
index b021a32f32..4c95177fb6 100644
--- a/tests/st/ops/gpu/test_assign_add_op.py
+++ b/tests/st/ops/gpu/test_assign_add_op.py
@@ -14,19 +14,20 @@
 # ============================================================================
 
 import pytest
-from mindspore import Tensor
+from mindspore import Tensor, Parameter
 from mindspore.ops import operations as P
 import mindspore.nn as nn
 import numpy as np
 import mindspore.context as context
 
 class AssignAdd(nn.Cell):
-    def __init__( self):
+    def __init__(self, value):
         super(AssignAdd, self).__init__()
+        self.var = Parameter(value, name="var")
         self.add = P.AssignAdd()
 
-    def construct(self, x, y):
-        res = self.add(x, y)
+    def construct(self, y):
+        res = self.add(self.var, y)
         return res
 
 @pytest.mark.level0
@@ -58,15 +59,17 @@ def test_assign_add():
     y2 = Tensor(np.arange(1 * 3 * 3 * 3).reshape(1, 3, 3, 3).astype(np.float32))
 
     context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU')
-    add = AssignAdd()
-    output1 = add(x1, y1)
+    add = AssignAdd(x1)
+    output1 = add(y1)
     assert (output1.asnumpy() == expect1).all()
-    output2 = add(output1, y1)
+    add = AssignAdd(output1)
+    output2 = add(y1)
     assert (output2.asnumpy() == expect2).all()
 
     context.set_context(mode=context.GRAPH_MODE, device_target='GPU')
-    add = AssignAdd()
-    output1 = add(x2, y2)
+    add = AssignAdd(x2)
+    output1 = add(y2)
     assert (output1.asnumpy() == expect1).all()
-    output2 = add(output1, y2)
+    add = AssignAdd(output1)
+    output2 = add(y2)
     assert (output2.asnumpy() == expect2).all()
diff --git a/tests/st/ops/gpu/test_assign_op.py b/tests/st/ops/gpu/test_assign_op.py
index 4cf730d763..f1fb908268 100644
--- a/tests/st/ops/gpu/test_assign_op.py
+++ b/tests/st/ops/gpu/test_assign_op.py
@@ -14,7 +14,7 @@
 # ============================================================================
 
 import pytest
-from mindspore import Tensor
+from mindspore import Tensor, Parameter
 from mindspore.ops import operations as P
 import mindspore.nn as nn
 import numpy as np
@@ -22,12 +22,13 @@ import mindspore.context as context
 
 
 class Net(nn.Cell):
-    def __init__(self):
+    def __init__(self, value):
         super(Net, self).__init__()
+        self.var = Parameter(value, name="var")
         self.assign = P.Assign()
 
-    def construct(self, var, value):
-        return self.assign(var, value)
+    def construct(self, value):
+        return self.assign(self.var, value)
 
 x = np.array([[1.2, 1], [1, 0]]).astype(np.float32)
 value = np.array([[1, 2], [3, 4.0]]).astype(np.float32)
@@ -37,13 +38,13 @@ value = np.array([[1, 2], [3, 4.0]]).astype(np.float32)
 @pytest.mark.env_onecard
 def test_assign():
     context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
-    assign = Net()
     var = Tensor(x)
-    output = assign(var, Tensor(value))
+    assign = Net(var)
+    output = assign(Tensor(value))
 
     error = np.ones(shape=[2, 2]) * 1.0e-6
     diff1 = output.asnumpy() - value
-    diff2 = var.asnumpy() - value
+    diff2 = assign.var.default_input.asnumpy() - value
     assert np.all(diff1 < error)
     assert np.all(-diff1 < error)
     assert np.all(diff2 < error)
diff --git a/tests/ut/python/ops/test_math_ops.py b/tests/ut/python/ops/test_math_ops.py
index a4a645a7ef..7ada847aac 100755
--- a/tests/ut/python/ops/test_math_ops.py
+++ b/tests/ut/python/ops/test_math_ops.py
@@ -341,6 +341,15 @@ class SignNet(nn.Cell):
     def construct(self, x):
         return self.sign(x)
 
+class AssignAdd(nn.Cell):
+    def __init__(self):
+        super().__init__()
+        self.op = P.AssignAdd()
+        self.inputdata = Parameter(initializer(1, [1], ms.float32), name="global_step")
+
+    def construct(self, input_):
+        self.inputdata = input_
+        return self.op(self.inputdata, input_)
 
 test_case_math_ops = [
     ('MatMulGrad', {
@@ -413,6 +422,9 @@ raise_set = [
     ('StridedSlice_4_Error', {
         'block': (lambda x: P.StridedSlice(new_axis_mask="1.1"), {'exception': TypeError}),
         'desc_inputs': [0]}),
+    ('AssignAdd_Error', {
+        'block': (P.AssignAdd(), {'exception': TypeError}),
+        'desc_inputs': [[1]]}),
 ]
 
 
diff --git a/tests/ut/python/ops/test_momentum.py b/tests/ut/python/ops/test_momentum.py
index 3334f1670a..28b9637015 100644
--- a/tests/ut/python/ops/test_momentum.py
+++ b/tests/ut/python/ops/test_momentum.py
@@ -38,8 +38,7 @@ def tensor_run_opt(opt, iters, learning_rate, momentum,
                    gradient, variable, moment):
     """ tensor_run_opt """
     success = True
-    new_weight = opt(gradient, moment, variable,
-                     learning_rate, momentum)
+    new_weight = opt(variable, moment, learning_rate, gradient, momentum)
     success = F.depend(success, F.assign(variable, new_weight))
     return success
 
diff --git a/tests/ut/python/ops/test_nn_ops.py b/tests/ut/python/ops/test_nn_ops.py
index ab6f31095d..5038ee28a0 100644
--- a/tests/ut/python/ops/test_nn_ops.py
+++ b/tests/ut/python/ops/test_nn_ops.py
@@ -446,12 +446,6 @@ test_cases = [
         'desc_inputs': [[128, 32, 32, 64]],
         'desc_bprop': [[128, 32, 32, 64]],
     }),
-    ('ApplyMomentum', {
-        'block': P.ApplyMomentum(),
-        'desc_inputs': [[2], [128, 32, 32, 64], [128, 32, 32, 64], [128, 32, 32, 64], [128, 32, 32, 64]],
-        'desc_bprop': [[128, 32, 32, 64]],
-        'skip': ['backward']
-    }),
     ('ScalarSummary', {
         'block': ScalarSummaryNet(),
         'desc_inputs': [2.2],
@@ -515,6 +509,12 @@ test_cases = [
 ]
 
 test_cases_for_verify_exception = [
+    ('ApplyMomentum_Error', {
+        'block': (P.ApplyMomentum(), {'exception': TypeError}),
+        'desc_inputs': [[2], [128, 32, 32, 64], [128, 32, 32, 64], [128, 32, 32, 64], [128, 32, 32, 64]],
+        'desc_bprop': [[128, 32, 32, 64]],
+        'skip': ['backward']
+    }),
     ('Conv2d_ValueError_1', {
         'block': (lambda _: P.Conv2D(3, 4, mode=-2.0), {'exception': TypeError}),
         'desc_inputs': [0],
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 68ff816fb3..23bf7da4b4 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -674,12 +674,6 @@ test_case_nn_ops = [
         'desc_inputs': [[128, 64, 32, 32], [128, 64, 32, 32], [64], [64], [64], [64]],
         'desc_bprop': [[128, 64, 32, 32], [64], [64], [64], [64]],
         'skip': ['backward']}),
-    ('ApplyMomentum', {
-        'block': P.ApplyMomentum(),
-        'desc_inputs': [[128, 32, 32, 64], [128, 32, 32, 64],
-                        [32, 32, 64], [32, 32, 64], [32, 32, 64]],
-        'desc_bprop': [[128, 32, 32, 64]],
-        'skip': ['backward']}),
     ('TopK', {
         'block': P.TopK(),
         'desc_const': [5],
@@ -1113,12 +1107,6 @@ test_case_other_ops = [
         'desc_inputs': (Tensor(np.ones((1, 3, 6, 6), np.float32)),
                         Tensor(np.ones((2, 4), np.int32))),
         'desc_bprop': [[2]]}),
-    ('ScatterNdUpdate', {
-        'block': P.ScatterNdUpdate(),
-        'desc_inputs': (Tensor(np.ones((2, 3), np.float32)),
-                        Tensor(np.ones((2, 2), np.int32)),
-                        Tensor(np.ones((2,), np.float32))),
-        'desc_bprop': [[2, 3]]}),
     ('ScatterNd', {
         'block': P.ScatterNd(),
         'desc_const': [(3, 3)],
@@ -1178,7 +1166,7 @@ import mindspore.context as context
 @non_graph_engine
 @mindspore_test(pipeline_for_compile_forward_ge_graph_for_case_by_case_config)
 def test_exec():
-    context.set_context(mode=context.GRAPH_MODE)
+    context.set_context(mode=context.GRAPH_MODE, save_graphs=True)
     return test_exec_case
 
 
@@ -1207,6 +1195,12 @@ raise_set = [
         'block': (NetForFlatten0D(), {'exception': ValueError}),
         'desc_inputs': [Tensor(np.array(0).astype(np.int32))],
         'desc_bprop': [Tensor(np.array(0).astype(np.int32))]}),
+    ('ScatterNdUpdate', {
+        'block': (P.ScatterNdUpdate(), {'exception': TypeError}),
+        'desc_inputs': (Tensor(np.ones((2, 3), np.float32)),
+                        Tensor(np.ones((2, 2), np.int32)),
+                        Tensor(np.ones((2,), np.float32))),
+        'desc_bprop': [[2, 3]]}),
 ]
 
 

From 5e7cef7e3d4bc732ac4533ccb1c5a9260d99de0a Mon Sep 17 00:00:00 2001
From: panfengfeng <panfengfeng@huawei.com>
Date: Thu, 30 Apr 2020 13:12:05 +0800
Subject: [PATCH 230/242] fix generatordataset check shuffle parameter

---
 mindspore/dataset/engine/validators.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index dbe8e47d03..4f1bb2c2d7 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -570,6 +570,8 @@ def check_generatordataset(method):
         check_param_type(nreq_param_int, param_dict, int)
         nreq_param_list = ["column_types"]
         check_param_type(nreq_param_list, param_dict, list)
+        nreq_param_bool = ["shuffle"]
+        check_param_type(nreq_param_bool, param_dict, bool)
 
         num_shards = param_dict.get("num_shards")
         shard_id = param_dict.get("shard_id")

From fa21353c5c77e562e266bd355b60051412f0762f Mon Sep 17 00:00:00 2001
From: "Etone.Chan" <etone.chan@huawei.com>
Date: Wed, 29 Apr 2020 16:59:09 +0800
Subject: [PATCH 231/242] get input info of fusionop by visitkernel

---
 .../ascend/buffer_fusion/buffer_fusion.cc     | 21 ++++++-------------
 mindspore/ops/_op_impl/tbe/relu6_grad.py      |  2 +-
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
index 851831383b..a2313a50d0 100644
--- a/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/buffer_fusion/buffer_fusion.cc
@@ -270,17 +270,9 @@ kernel::KernelBuildInfoPtr CreateFusionOpKernelInfo(const std::vector<AnfNodePtr
   std::vector<std::string> inputs_format;
   std::vector<TypeId> inputs_data_type;
   for (const auto &input : inputs_list) {
-    if (input->isa<CNode>() && AnfAlgo::GetCNodeName(input) == prim::kPrimTupleGetItem->name()) {
-      auto tuple_getitem = input->cast<CNodePtr>();
-      MS_EXCEPTION_IF_NULL(tuple_getitem);
-      inputs_format.push_back(AnfAlgo::GetOutputFormat(
-        tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2))))));
-      inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(
-        tuple_getitem->input(1), IntToSize(GetValue<int>(GetValueNode(tuple_getitem->input(2))))));
-    } else {
-      inputs_format.push_back(AnfAlgo::GetOutputFormat(input, 0));
-      inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(input, 0));
-    }
+    auto real_input = AnfAlgo::VisitKernel(input, 0);
+    inputs_format.push_back(AnfAlgo::GetOutputFormat(real_input.first, real_input.second));
+    inputs_data_type.push_back(AnfAlgo::GetOutputDeviceDataType(real_input.first, real_input.second));
   }
   // outputs format and data type
   std::vector<std::string> outputs_format;
@@ -375,11 +367,10 @@ void GetFusionScopeComputeNodeList(session::KernelGraph *kernel_graph,
   }
 }
 
-void GetFusionScopeInputNodeList(session::KernelGraph *kernel_graph,
+void GetFusionScopeInputNodeList(const session::KernelGraph &kernel_graph,
                                  std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) {
-  MS_EXCEPTION_IF_NULL(kernel_graph);
   MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
-  auto manager = kernel_graph->manager();
+  auto manager = kernel_graph.manager();
   MS_EXCEPTION_IF_NULL(manager);
 
   for (auto &buffer_fusion_info : *buffer_fusion_infos) {
@@ -643,7 +634,7 @@ void BufferFusion::GetBufferFusionInfo(session::KernelGraph *kernel_graph,
                                        std::unordered_map<int32_t, BufferFusionInfo_t> *buffer_fusion_infos) const {
   MS_EXCEPTION_IF_NULL(buffer_fusion_infos);
   GetFusionScopeComputeNodeList(kernel_graph, buffer_fusion_infos);
-  GetFusionScopeInputNodeList(kernel_graph, buffer_fusion_infos);
+  GetFusionScopeInputNodeList(*kernel_graph, buffer_fusion_infos);
   GetFusionScopeOutputNodeList(kernel_graph, buffer_fusion_infos);
   for (auto &buffer_fusion_info : *buffer_fusion_infos) {
     buffer_fusion_info.second.kernel_build_info =
diff --git a/mindspore/ops/_op_impl/tbe/relu6_grad.py b/mindspore/ops/_op_impl/tbe/relu6_grad.py
index eaf3449fe7..5a9af9b425 100644
--- a/mindspore/ops/_op_impl/tbe/relu6_grad.py
+++ b/mindspore/ops/_op_impl/tbe/relu6_grad.py
@@ -17,7 +17,7 @@
 from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType
 
 relu6_grad_op_info = TBERegOp("ReLU6Grad") \
-    .fusion_type("ELEMWISE") \
+    .fusion_type("OPAQUE") \
     .async_flag(False) \
     .binfile_name("relu6_grad.so") \
     .compute_cost(10) \

From eacfce78cca53a3d1ac8ea4cf0ebf4bd061c18c9 Mon Sep 17 00:00:00 2001
From: guohongzilong <2713219276@qq.com>
Date: Thu, 30 Apr 2020 13:44:30 +0800
Subject: [PATCH 232/242] print op cannot support tensor which the data is
 scalar

---
 mindspore/ops/operations/debug_ops.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mindspore/ops/operations/debug_ops.py b/mindspore/ops/operations/debug_ops.py
index 48ede57be4..6887c778ed 100644
--- a/mindspore/ops/operations/debug_ops.py
+++ b/mindspore/ops/operations/debug_ops.py
@@ -198,7 +198,11 @@ class Print(PrimitiveWithInfer):
     Output tensor or string to stdout.
 
     Note:
-        The print operation cannot support float64 and bool types currently.
+        The print operation cannot support the following cases currently.
+
+        1. The type of tensor is float64 or bool.
+
+        2. The data of tensor is a scalar type.
 
     Inputs:
         - **input_x** (Union[Tensor, str]) - The graph node to attach to. The input supports

From 76fda356f6f3fa7461e7f03b8b223f5334f00b12 Mon Sep 17 00:00:00 2001
From: panfengfeng <panfengfeng@huawei.com>
Date: Thu, 30 Apr 2020 14:59:31 +0800
Subject: [PATCH 233/242] fix randomaffine error

---
 mindspore/dataset/transforms/vision/validators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/dataset/transforms/vision/validators.py b/mindspore/dataset/transforms/vision/validators.py
index 96d0a3bfdc..aff2d3bc0c 100644
--- a/mindspore/dataset/transforms/vision/validators.py
+++ b/mindspore/dataset/transforms/vision/validators.py
@@ -120,7 +120,7 @@ def check_degrees(degrees):
         degrees = (-degrees, degrees)
     elif isinstance(degrees, (list, tuple)):
         if len(degrees) != 2:
-            raise ValueError("If degrees is a sequence, the length must be 2.")
+            raise TypeError("If degrees is a sequence, the length must be 2.")
     else:
         raise TypeError("Degrees must be a single non-negative number or a sequence")
     return degrees

From 7185961e892b8e21a8008e0f65f0f8d5962421a2 Mon Sep 17 00:00:00 2001
From: YuJianfeng <yujianfeng5@huawei.com>
Date: Wed, 29 Apr 2020 11:14:50 +0800
Subject: [PATCH 234/242] Enable BatchNorm fusion pass

---
 .../ascend/ascend_backend_optimization.cc     |   6 +-
 .../ir_fusion/fused_batch_norm_fusion.cc      | 287 +++++++++++-------
 .../ir_fusion/fused_batch_norm_fusion.h       |  41 ++-
 mindspore/nn/layer/normalization.py           |  43 +--
 .../ir_fusion/fused_batch_norm_fusion_test.cc |  54 ++++
 .../fused_batch_norm_fusion_test.py           |  13 +-
 6 files changed, 292 insertions(+), 152 deletions(-)
 create mode 100644 tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc

diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
index 66ea5ee526..4294f48e47 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc
@@ -19,6 +19,7 @@
 #include "pre_activate/common/optimizer.h"
 #include "pre_activate/ascend/ir_fission/bn_split.h"
 #include "pre_activate/ascend/ir_fission/bn_grad_split.h"
+#include "pre_activate/ascend/ir_fission/batch_norm_grad_split.h"
 #include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h"
 #include "pre_activate/ascend/ir_fission/layer_norm_grad_split.h"
 #include "pre_activate/pass/communication_op_fusion.h"
@@ -87,7 +88,6 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) {
   ir_fusion_pm->AddPass(std::make_shared<ReshapeTransposeFusion>());
   ir_fusion_pm->AddPass(std::make_shared<TransposeReshapeFusion>());
   ir_fusion_pm->AddPass(std::make_shared<ClipByValueFusion>());
-  ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormFusion>());
   ir_fusion_pm->AddPass(std::make_shared<TopKSplit>());
   ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneWithDecayRule>());
   ir_fusion_pm->AddPass(std::make_shared<AdamApplyOneFusion>());
@@ -193,8 +193,8 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap
   }
   auto optimizer = std::make_shared<GraphOptimizer>();
   auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm");
-  ir_fusion_pm->AddPass(std::make_shared<BnSplit>());
-  ir_fusion_pm->AddPass(std::make_shared<BnGradSplit>());
+  ir_fusion_pm->AddPass(std::make_shared<BatchNormGradSplit>());
+  ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormFusion>());
   ir_fusion_pm->AddPass(std::make_shared<AddMemcpyAsync>());
   if (context_ptr->ir_fusion_flag()) {
     AddAscendBackendOptionalIRFusion(ir_fusion_pm.get());
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc
index 12f2684b3b..7641772d7a 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.cc
@@ -23,6 +23,8 @@
 namespace mindspore {
 namespace opt {
 namespace {
+constexpr size_t kReplaceOutputIndex0 = 3;
+constexpr size_t kReplaceOutputIndex1 = 4;
 bool IsC(const BaseRef &n) {
   if (utils::isa<AnfNodePtr>(n)) {
     AnfNodePtr in = utils::cast<AnfNodePtr>(n);
@@ -32,52 +34,6 @@ bool IsC(const BaseRef &n) {
   return false;
 }
 
-AnfNodePtr GetBatchNormNode(const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  auto depend_cnode = node->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(depend_cnode);
-  CheckCNodeInputSize(depend_cnode, kDependInputNum);
-  AnfNodePtr assign_sub = depend_cnode->input(2);
-  MS_EXCEPTION_IF_NULL(assign_sub);
-  auto assign_sub_cnode = assign_sub->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(assign_sub_cnode);
-  CheckCNodeInputSize(assign_sub_cnode, kAssignSubInputNum);
-  AnfNodePtr mul = assign_sub_cnode->input(2);
-  MS_EXCEPTION_IF_NULL(mul);
-  auto mul_cnode = mul->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(mul_cnode);
-  CheckCNodeInputSize(mul_cnode, kMulInputNum);
-  AnfNodePtr sub = mul_cnode->input(1);
-  MS_EXCEPTION_IF_NULL(sub);
-  auto sub_cnode = sub->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(sub_cnode);
-  CheckCNodeInputSize(sub_cnode, kSubInputNum);
-  AnfNodePtr tuple_getitem = sub_cnode->input(2);
-  MS_EXCEPTION_IF_NULL(tuple_getitem);
-  auto tuple_getitem_cnode = tuple_getitem->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(tuple_getitem_cnode);
-  CheckCNodeInputSize(tuple_getitem_cnode, kTupleGetitemInputNum);
-  return tuple_getitem_cnode->input(1);
-}
-
-bool CompareTupleGetitem(const AnfNodePtr &n1, const AnfNodePtr &n2) {
-  MS_EXCEPTION_IF_NULL(n1);
-  MS_EXCEPTION_IF_NULL(n2);
-  auto n1_cnode = n1->cast<CNodePtr>();
-  auto n2_cnode = n2->cast<CNodePtr>();
-  MS_EXCEPTION_IF_NULL(n1_cnode);
-  MS_EXCEPTION_IF_NULL(n2_cnode);
-  auto index_input1 = n1_cnode->input(kInputNodeOutputIndexInTupleGetItem);
-  MS_EXCEPTION_IF_NULL(index_input1);
-  auto value_node1 = index_input1->cast<ValueNodePtr>();
-  MS_EXCEPTION_IF_NULL(value_node1);
-  auto index_input2 = n2_cnode->input(kInputNodeOutputIndexInTupleGetItem);
-  MS_EXCEPTION_IF_NULL(index_input2);
-  auto value_node2 = index_input2->cast<ValueNodePtr>();
-  MS_EXCEPTION_IF_NULL(value_node2);
-  return GetValue<int>(value_node1->value()) < GetValue<int>(value_node2->value());
-}
-
 void GetBNOutput(const FuncGraphPtr &func_graph, const AnfNodePtr &bn, std::vector<AnfNodePtr> *bn_outputs) {
   MS_EXCEPTION_IF_NULL(func_graph);
   MS_EXCEPTION_IF_NULL(bn);
@@ -92,54 +48,35 @@ void GetBNOutput(const FuncGraphPtr &func_graph, const AnfNodePtr &bn, std::vect
     MS_EXCEPTION_IF_NULL(output);
     bn_outputs->push_back(output);
   }
-  sort(bn_outputs->begin(), bn_outputs->end(), CompareTupleGetitem);
 }
 }  // namespace
 
 const BaseRef FusedBatchNormFusion::DefinePattern() const {
-  const auto prim_batch_norm = std::make_shared<Primitive>(kBatchNormOpName);
   std::shared_ptr<Var> Xs = std::make_shared<SeqVar>();
   VarPtr index0 = std::make_shared<CondVar>(IsC);
   VarPtr index1 = std::make_shared<CondVar>(IsC);
   VarPtr index2 = std::make_shared<CondVar>(IsC);
-  VectorRef batch_norm = VectorRef({prim_batch_norm, data_input_var0_, data_input_var1_, data_input_var2_, Xs});
+  VectorRef batch_norm = VectorRef({batch_norm_var_, data_input0_var_, data_input1_var_, data_input2_var_, Xs});
   VectorRef tuple_getitem0 = VectorRef({prim::kPrimTupleGetItem, batch_norm, index0});
   VectorRef tuple_getitem1 = VectorRef({prim::kPrimTupleGetItem, batch_norm, index1});
   VectorRef tuple_getitem2 = VectorRef({prim::kPrimTupleGetItem, batch_norm, index2});
-  VectorRef sub0 = VectorRef({prim::kPrimSub, variable_input_var0_, tuple_getitem1});
-  VectorRef sub1 = VectorRef({prim::kPrimSub, variable_input_var1_, tuple_getitem2});
-  VectorRef mul0 = VectorRef({prim::kPrimMul, sub0, constant_input_var0_});
-  VectorRef mul1 = VectorRef({prim::kPrimMul, sub1, constant_input_var1_});
-  VectorRef assign_sub0 = VectorRef({prim::kPrimAssignSub, variable_input_var0_, mul0});
-  VectorRef assign_sub1 = VectorRef({prim::kPrimAssignSub, variable_input_var1_, mul1});
+  VectorRef sub0 = VectorRef({prim::kPrimSub, variable_input0_var_, tuple_getitem1});
+  VectorRef sub1 = VectorRef({prim::kPrimSub, variable_input1_var_, tuple_getitem2});
+  VectorRef mul0 = VectorRef({prim::kPrimMul, sub0, constant_input0_var_});
+  VectorRef mul1 = VectorRef({prim::kPrimMul, sub1, constant_input1_var_});
+  VectorRef assign_sub0 = VectorRef({prim::kPrimAssignSub, variable_input0_var_, mul0});
+  VectorRef assign_sub1 = VectorRef({prim::kPrimAssignSub, variable_input1_var_, mul1});
   VectorRef depend0 = VectorRef({prim::kPrimDepend, tuple_getitem0, assign_sub0});
   return VectorRef({prim::kPrimDepend, depend0, assign_sub1});
 }
 
-abstract::AbstractTuplePtr FusedBatchNormFusion::CreateAbstractOfFusedBatchNorm(const EquivPtr &equiv,
-                                                                                const AnfNodePtr &bn) const {
-  MS_EXCEPTION_IF_NULL(equiv);
-  MS_EXCEPTION_IF_NULL(bn);
-  auto variable_input0 = utils::cast<AnfNodePtr>((*equiv)[variable_input_var0_]);
-  MS_EXCEPTION_IF_NULL(variable_input0);
-  auto variable_input1 = utils::cast<AnfNodePtr>((*equiv)[variable_input_var1_]);
-  MS_EXCEPTION_IF_NULL(variable_input1);
-  auto bn_abstract_tuple = dyn_cast<abstract::AbstractTuple>(bn->abstract());
-  MS_EXCEPTION_IF_NULL(bn_abstract_tuple);
-  if (bn_abstract_tuple->elements().size() != kBnOutputNum) {
-    MS_LOG(EXCEPTION) << "The abstract size of node bn must be " << kBnOutputNum << ", but it is "
-                      << bn_abstract_tuple->elements().size();
-  }
-  AbstractBasePtrList fused_bn_abstract_list{bn_abstract_tuple->elements()[0], variable_input0->abstract(),
-                                             variable_input1->abstract(), bn_abstract_tuple->elements()[3],
-                                             bn_abstract_tuple->elements()[4]};
-  auto abstract_tuple = std::make_shared<abstract::AbstractTuple>(fused_bn_abstract_list);
-  return abstract_tuple;
-}
-
 ValuePtr FusedBatchNormFusion::GetFactor(const EquivPtr &equiv) const {
   MS_EXCEPTION_IF_NULL(equiv);
-  auto constant_input = utils::cast<AnfNodePtr>((*equiv)[constant_input_var0_]);
+  auto iter_constant_input0 = (*equiv).find(constant_input0_var_);
+  if (iter_constant_input0 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the constant_input0 var after matched.";
+  }
+  auto constant_input = utils::cast<AnfNodePtr>(iter_constant_input0->second);
   MS_EXCEPTION_IF_NULL(constant_input);
   if (!constant_input->isa<ValueNode>()) {
     return nullptr;
@@ -158,53 +95,187 @@ ValuePtr FusedBatchNormFusion::GetFactor(const EquivPtr &equiv) const {
   return MakeValue(tensor_data[0]);
 }
 
-const AnfNodePtr FusedBatchNormFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
-                                               const EquivPtr &equiv) const {
+AnfNodePtr FusedBatchNormFusion::CreateBNTrainingReduce(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
+                                                        const EquivPtr &equiv) const {
   MS_EXCEPTION_IF_NULL(func_graph);
+  MS_EXCEPTION_IF_NULL(node);
   MS_EXCEPTION_IF_NULL(equiv);
-  // Set inputs
-  auto data_input0 = utils::cast<AnfNodePtr>((*equiv)[data_input_var0_]);
-  MS_EXCEPTION_IF_NULL(data_input0);
-  auto data_input1 = utils::cast<AnfNodePtr>((*equiv)[data_input_var1_]);
+  // Set input to create node
+  auto iter_data_input0 = (*equiv).find(data_input0_var_);
+  if (iter_data_input0 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input0 var after matched.";
+  }
+  std::vector<AnfNodePtr> bn_training_reduce_inputs = {
+    NewValueNode(std::make_shared<Primitive>(kBNTrainingReduceOpName)),
+    utils::cast<AnfNodePtr>(iter_data_input0->second)};
+  auto bn_training_reduce = func_graph->NewCNode(bn_training_reduce_inputs);
+  MS_EXCEPTION_IF_NULL(bn_training_reduce);
+  bn_training_reduce->set_scope(node->scope());
+  // Set abstract
+  auto iter_data_input1 = (*equiv).find(data_input1_var_);
+  if (iter_data_input1 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input1 var after matched.";
+  }
+  auto data_input1 = utils::cast<AnfNodePtr>(iter_data_input1->second);
   MS_EXCEPTION_IF_NULL(data_input1);
-  auto data_input2 = utils::cast<AnfNodePtr>((*equiv)[data_input_var2_]);
+  auto iter_data_input2 = (*equiv).find(data_input2_var_);
+  if (iter_data_input2 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input2 var after matched.";
+  }
+  auto data_input2 = utils::cast<AnfNodePtr>(iter_data_input2->second);
   MS_EXCEPTION_IF_NULL(data_input2);
-  auto variable_input0 = utils::cast<AnfNodePtr>((*equiv)[variable_input_var0_]);
+  AbstractBasePtrList abstract_list{data_input1->abstract(), data_input2->abstract()};
+  auto abstract_tuple = std::make_shared<abstract::AbstractTuple>(abstract_list);
+  bn_training_reduce->set_abstract(abstract_tuple);
+  return bn_training_reduce;
+}
+
+void FusedBatchNormFusion::GetBNTrainingUpdateInputs(const EquivPtr &equiv,
+                                                     const std::vector<AnfNodePtr> &bn_training_reduce_outputs,
+                                                     std::vector<AnfNodePtr> *bn_training_update_inputs) const {
+  MS_EXCEPTION_IF_NULL(equiv);
+  MS_EXCEPTION_IF_NULL(bn_training_update_inputs);
+  auto iter_data_input0 = (*equiv).find(data_input0_var_);
+  if (iter_data_input0 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input0 var after matched.";
+  }
+  auto iter_data_input1 = (*equiv).find(data_input1_var_);
+  if (iter_data_input1 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input1 var after matched.";
+  }
+  auto iter_data_input2 = (*equiv).find(data_input2_var_);
+  if (iter_data_input2 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the data_input2 var after matched.";
+  }
+  auto iter_variable_input0 = (*equiv).find(variable_input0_var_);
+  if (iter_variable_input0 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the variable_input0 var after matched.";
+  }
+  auto iter_variable_input1 = (*equiv).find(variable_input1_var_);
+  if (iter_variable_input1 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the variable_input1 var after matched.";
+  }
+  if (bn_training_reduce_outputs.size() != kBNTrainingReduceOutputNum) {
+    MS_LOG(EXCEPTION) << "The output size of node bn_training_reduce must be " << kBNTrainingReduceOutputNum
+                      << ", but it is " << bn_training_reduce_outputs.size();
+  }
+  *bn_training_update_inputs = {
+    NewValueNode(std::make_shared<Primitive>(kBNTrainingUpdateOpName)),
+    utils::cast<AnfNodePtr>(iter_data_input0->second),
+    bn_training_reduce_outputs[0],
+    bn_training_reduce_outputs[1],
+    utils::cast<AnfNodePtr>(iter_data_input1->second),
+    utils::cast<AnfNodePtr>(iter_data_input2->second),
+    utils::cast<AnfNodePtr>(iter_variable_input0->second),
+    utils::cast<AnfNodePtr>(iter_variable_input1->second),
+  };
+}
+
+void FusedBatchNormFusion::GetBNTrainingUpdateAbstractList(const EquivPtr &equiv, const AnfNodePtr &bn,
+                                                           std::vector<AbstractBasePtr> *abstract_list) const {
+  MS_EXCEPTION_IF_NULL(equiv);
+  MS_EXCEPTION_IF_NULL(bn);
+  MS_EXCEPTION_IF_NULL(abstract_list);
+  auto bn_abstract_tuple = dyn_cast<abstract::AbstractTuple>(bn->abstract());
+  MS_EXCEPTION_IF_NULL(bn_abstract_tuple);
+  if (bn_abstract_tuple->elements().size() < kBnOutputNum) {
+    MS_LOG(EXCEPTION) << "The abstract size of node bn must not be less than " << kBnOutputNum << ", but it is "
+                      << bn_abstract_tuple->elements().size();
+  }
+  auto iter_variable_input0 = (*equiv).find(variable_input0_var_);
+  if (iter_variable_input0 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the variable_input0 var after matched.";
+  }
+  auto variable_input0 = utils::cast<AnfNodePtr>(iter_variable_input0->second);
   MS_EXCEPTION_IF_NULL(variable_input0);
-  auto variable_input1 = utils::cast<AnfNodePtr>((*equiv)[variable_input_var1_]);
+  auto iter_variable_input1 = (*equiv).find(variable_input1_var_);
+  if (iter_variable_input1 == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the variable_input1 var after matched.";
+  }
+  auto variable_input1 = utils::cast<AnfNodePtr>(iter_variable_input1->second);
   MS_EXCEPTION_IF_NULL(variable_input1);
-  std::vector<AnfNodePtr> fused_bn_inputs = {
-    NewValueNode(prim::kPrimFusedBatchNorm), data_input0, data_input1, data_input2, variable_input0, variable_input1};
-  auto fused_bn = func_graph->NewCNode(fused_bn_inputs);
-  fused_bn->set_scope(node->scope());
-  MS_EXCEPTION_IF_NULL(fused_bn);
+  *abstract_list = {bn_abstract_tuple->elements()[0], variable_input0->abstract(), variable_input1->abstract(),
+                    bn_abstract_tuple->elements()[1], bn_abstract_tuple->elements()[2]};
+}
+
+AnfNodePtr FusedBatchNormFusion::CreateBNTrainingUpdate(
+  const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &equiv,
+  const std::vector<AnfNodePtr> &bn_training_reduce_outputs) const {
+  MS_EXCEPTION_IF_NULL(func_graph);
+  MS_EXCEPTION_IF_NULL(node);
+  MS_EXCEPTION_IF_NULL(equiv);
+  // Set input
+  std::vector<AnfNodePtr> bn_training_update_inputs;
+  GetBNTrainingUpdateInputs(equiv, bn_training_reduce_outputs, &bn_training_update_inputs);
+  auto bn_training_update = func_graph->NewCNode(bn_training_update_inputs);
+  MS_EXCEPTION_IF_NULL(bn_training_update);
   // Set abstract
-  AnfNodePtr bn = GetBatchNormNode(node);
-  fused_bn->set_abstract(CreateAbstractOfFusedBatchNorm(equiv, bn));
-  // Set attr
-  AnfAlgo::CopyNodeAttr(kAttrEpsilon, bn, fused_bn);
+  auto iter_batch_norm = (*equiv).find(batch_norm_var_);
+  if (iter_batch_norm == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the batch_norm var after matched.";
+  }
+  AnfNodePtr bn = utils::cast<AnfNodePtr>(iter_batch_norm->second);
+  MS_EXCEPTION_IF_NULL(bn);
+  AbstractBasePtrList abstract_list;
+  GetBNTrainingUpdateAbstractList(equiv, bn, &abstract_list);
+  auto abstract_tuple = std::make_shared<abstract::AbstractTuple>(abstract_list);
+  bn_training_update->set_abstract(abstract_tuple);
+  AnfAlgo::CopyNodeAttr(kAttrEpsilon, bn, bn_training_update);
   ValuePtr factor = GetFactor(equiv);
   if (factor == nullptr) {
     return nullptr;
   }
-  AnfAlgo::SetNodeAttr(kAttrMomentum, factor, fused_bn);
-  // Replace old nodes with outputs of fused_bn
-  std::vector<AnfNodePtr> fused_bn_outputs;
-  CreateMultipleOutputsOfAnfNode(func_graph, fused_bn, kBnOutputNum, &fused_bn_outputs);
-  if (fused_bn_outputs.size() != kBnOutputNum) {
-    MS_LOG(EXCEPTION) << "The output size of node bn must be " << kBnOutputNum << ", but it is "
-                      << fused_bn_outputs.size();
+  AnfAlgo::SetNodeAttr(kAttrFactor, factor, bn_training_update);
+  AnfAlgo::SetNodeAttr(kAttrIsRef, MakeValue(true), bn_training_update);
+  bn_training_update->set_scope(node->scope());
+  return bn_training_update;
+}
+
+const AnfNodePtr FusedBatchNormFusion::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
+                                               const EquivPtr &equiv) const {
+  MS_EXCEPTION_IF_NULL(func_graph);
+  MS_EXCEPTION_IF_NULL(equiv);
+  MS_EXCEPTION_IF_NULL(node);
+  AnfNodePtr bn_training_reduce = CreateBNTrainingReduce(func_graph, node, equiv);
+  std::vector<AnfNodePtr> bn_training_reduce_outputs;
+  CreateMultipleOutputsOfAnfNode(func_graph, bn_training_reduce, kBNTrainingReduceOutputNum,
+                                 &bn_training_reduce_outputs);
+  AnfNodePtr bn_training_update = CreateBNTrainingUpdate(func_graph, node, equiv, bn_training_reduce_outputs);
+  if (bn_training_update == nullptr) {
+    MS_LOG(DEBUG) << "Create BNTrainingUpdate failed for bn node " << node->DebugString();
+    return nullptr;
+  }
+  std::vector<AnfNodePtr> bn_training_update_outputs;
+  CreateMultipleOutputsOfAnfNode(func_graph, bn_training_update, kBNTrainingUpdateOutputNum,
+                                 &bn_training_update_outputs);
+  if (bn_training_update_outputs.size() < kBNTrainingUpdateOutputNum) {
+    MS_LOG(EXCEPTION) << "The output size of node bn must be " << kBNTrainingUpdateOutputNum << ", but it is "
+                      << bn_training_update_outputs.size();
+  }
+  // Replace old bn outputs with new outputs
+  auto iter_batch_norm = (*equiv).find(batch_norm_var_);
+  if (iter_batch_norm == (*equiv).end()) {
+    MS_LOG(EXCEPTION) << "The equiv map is expected to contains the batch_norm var after matched.";
   }
+  AnfNodePtr bn = utils::cast<AnfNodePtr>(iter_batch_norm->second);
   std::vector<AnfNodePtr> bn_outputs;
   GetBNOutput(func_graph, bn, &bn_outputs);
-  if (bn_outputs.size() != kBnOutputNum) {
-    MS_LOG(EXCEPTION) << "The output size of node bn must be " << kBnOutputNum << ", but it is " << bn_outputs.size();
-  }
   auto manager = func_graph->manager();
   MS_EXCEPTION_IF_NULL(manager);
-  (void)manager->Replace(bn_outputs[3], fused_bn_outputs[3]);
-  (void)manager->Replace(bn_outputs[4], fused_bn_outputs[4]);
-  return fused_bn_outputs[0];
+  for (const auto &output : bn_outputs) {
+    MS_EXCEPTION_IF_NULL(output);
+    auto tuple_getitem_cnode = output->cast<CNodePtr>();
+    MS_EXCEPTION_IF_NULL(tuple_getitem_cnode);
+    AnfNodePtr index_node = tuple_getitem_cnode->input(kInputNodeOutputIndexInTupleGetItem);
+    MS_EXCEPTION_IF_NULL(index_node);
+    auto value_node = index_node->cast<ValueNodePtr>();
+    MS_EXCEPTION_IF_NULL(value_node);
+    int index = GetValue<int>(value_node->value());
+    if (index == kReplaceOutputIndex0 || index == kReplaceOutputIndex1) {
+      (void)manager->Replace(output, bn_training_update_outputs[index]);
+    }
+  }
+  return bn_training_update_outputs[0];
 }
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h
index db25e4f9f5..e6bf1dda55 100644
--- a/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h
+++ b/mindspore/ccsrc/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h
@@ -19,6 +19,7 @@
 #include <vector>
 #include <memory>
 #include "pre_activate/common/optimizer.h"
+#include "utils/utils.h"
 
 namespace mindspore {
 namespace opt {
@@ -26,29 +27,37 @@ class FusedBatchNormFusion : public PatternProcessPass {
  public:
   explicit FusedBatchNormFusion(bool multigraph = true)
       : PatternProcessPass("fused_batch_norm_fusion", multigraph),
-        data_input_var0_(std::make_shared<Var>()),
-        data_input_var1_(std::make_shared<Var>()),
-        data_input_var2_(std::make_shared<Var>()),
-        variable_input_var0_(std::make_shared<Var>()),
-        variable_input_var1_(std::make_shared<Var>()),
-        constant_input_var0_(std::make_shared<Var>()),
-        constant_input_var1_(std::make_shared<Var>()) {}
+        data_input0_var_(std::make_shared<Var>()),
+        data_input1_var_(std::make_shared<Var>()),
+        data_input2_var_(std::make_shared<Var>()),
+        variable_input0_var_(std::make_shared<Var>()),
+        variable_input1_var_(std::make_shared<Var>()),
+        constant_input0_var_(std::make_shared<Var>()),
+        constant_input1_var_(std::make_shared<Var>()),
+        batch_norm_var_(std::make_shared<Var>(std::make_shared<Primitive>(prim::kPrimBatchNorm->name()))) {}
   ~FusedBatchNormFusion() override = default;
   const BaseRef DefinePattern() const override;
   const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
 
  private:
-  abstract::AbstractTuplePtr CreateAbstractOfFusedBatchNorm(const EquivPtr &equiv, const AnfNodePtr &bn) const;
-
+  AnfNodePtr CreateBNTrainingReduce(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
+                                    const EquivPtr &equiv) const;
+  void GetBNTrainingUpdateInputs(const EquivPtr &equiv, const std::vector<AnfNodePtr> &bn_training_reduce_outputs,
+                                 std::vector<AnfNodePtr> *bn_training_update_inputs) const;
+  void GetBNTrainingUpdateAbstractList(const EquivPtr &equiv, const AnfNodePtr &bn,
+                                       std::vector<AbstractBasePtr> *abstract_list) const;
+  AnfNodePtr CreateBNTrainingUpdate(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const EquivPtr &equiv,
+                                    const std::vector<AnfNodePtr> &bn_training_reduce_outputs) const;
   ValuePtr GetFactor(const EquivPtr &equiv) const;
 
-  VarPtr data_input_var0_;
-  VarPtr data_input_var1_;
-  VarPtr data_input_var2_;
-  VarPtr variable_input_var0_;
-  VarPtr variable_input_var1_;
-  VarPtr constant_input_var0_;
-  VarPtr constant_input_var1_;
+  VarPtr data_input0_var_;
+  VarPtr data_input1_var_;
+  VarPtr data_input2_var_;
+  VarPtr variable_input0_var_;
+  VarPtr variable_input1_var_;
+  VarPtr constant_input0_var_;
+  VarPtr constant_input1_var_;
+  VarPtr batch_norm_var_;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py
index 7a102b0bbe..fd9279cf04 100644
--- a/mindspore/nn/layer/normalization.py
+++ b/mindspore/nn/layer/normalization.py
@@ -62,6 +62,7 @@ class _BatchNorm(Cell):
         self.beta = Parameter(initializer(
             beta_init, num_features), name="beta", requires_grad=affine)
         self.group = check_int_positive(device_num_each_group)
+        self.is_global = False
         if self.group != 1:
             self.rank_id = get_rank()
             self.rank_size = get_group_size()
@@ -80,15 +81,18 @@ class _BatchNorm(Cell):
         self.cast = P.Cast()
         self.dtype = P.DType()
         self.reshape = P.Reshape()
+        self.is_ascend = context.get_context("device_target") == "Ascend"
 
         if context.get_context("enable_ge"):
             self.is_ge_backend = True
             self.momentum = Tensor(1.0 - momentum, mstype.float32)
-            self.bn_train = P.BatchNorm(is_training=True,
-                                        epsilon=self.eps)
         else:
             self.is_ge_backend = False
             self.momentum = 1.0 - momentum
+        if self.is_ge_backend or self.is_ascend:
+            self.bn_train = P.BatchNorm(is_training=True,
+                                        epsilon=self.eps)
+        else:
             self.bn_train = P.FusedBatchNorm(mode=1,
                                              epsilon=self.eps,
                                              momentum=self.momentum)
@@ -140,24 +144,23 @@ class _BatchNorm(Cell):
 
     def construct(self, x):
         if self.training and self.use_batch_statistics:
-            if self.is_ge_backend:
-                if self.is_global:
-                    axes, re_shape = _shape_infer(F.shape(x), self.num_features)
-                    y = self._global_sync(x, axes, re_shape)
-                else:
-                    y, batch_mean, batch_var, _, _ = \
-                        self.bn_train(x,
-                                      self.gamma,
-                                      self.beta,
-                                      None,
-                                      None)
-
-                    mean_sub = self.sub_mean(self.moving_mean, batch_mean)
-                    temp_mean = self.mul_mean(mean_sub, self.momentum)
-                    mean_sub2 = self.sub_var(self.moving_variance, batch_var)
-                    temp_variance = self.mul_var(mean_sub2, self.momentum)
-                    y = F.depend(y, self.assign_sub_mean(self.moving_mean, temp_mean))
-                    y = F.depend(y, self.assign_sub_var(self.moving_variance, temp_variance))
+            if self.is_ge_backend and self.is_global:
+                axes, re_shape = _shape_infer(F.shape(x), self.num_features)
+                y = self._global_sync(x, axes, re_shape)
+            elif self.is_ge_backend or self.is_ascend:
+                y, batch_mean, batch_var, _, _ = \
+                    self.bn_train(x,
+                                  self.gamma,
+                                  self.beta,
+                                  None,
+                                  None)
+
+                mean_sub = self.sub_mean(self.moving_mean, batch_mean)
+                temp_mean = self.mul_mean(mean_sub, self.momentum)
+                mean_sub2 = self.sub_var(self.moving_variance, batch_var)
+                temp_variance = self.mul_var(mean_sub2, self.momentum)
+                y = F.depend(y, self.assign_sub_mean(self.moving_mean, temp_mean))
+                y = F.depend(y, self.assign_sub_var(self.moving_variance, temp_variance))
             else:
                 y = self.bn_train(x,
                                   self.gamma,
diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
new file mode 100644
index 0000000000..3d13f4a336
--- /dev/null
+++ b/tests/ut/cpp/pre_activate/ascend/ir_fusion/fused_batch_norm_fusion_test.cc
@@ -0,0 +1,54 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "pre_activate/ascend/ir_fusion/fused_batch_norm_fusion.h"
+#include "common/backend_common_test.h"
+#include "common/py_func_graph_fetcher.h"
+
+namespace mindspore {
+namespace opt {
+class TestHWFusedBatchNormFusion : public BackendCommon {
+ public:
+  TestHWFusedBatchNormFusion() : get_py_fun_("gtest_input.pre_activate.fused_batch_norm_fusion_test", true) {}
+  ~TestHWFusedBatchNormFusion() override = default;
+
+  UT::PyFuncGraphFetcher get_py_fun_;
+};
+
+TEST_F(TestHWFusedBatchNormFusion, test_fused_batch_norm_fusion) {
+  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_fused_batch_norm_fusion", "before");
+  EXPECT_NE(g, nullptr);
+  std::vector<int> shp_x{32, 64, 112, 112};
+  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp_x);
+  std::vector<int> shp_y{64};
+  auto y_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp_y);
+  AbstractBasePtrList args_spec_list{x_abstract};
+  for (size_t i = 0; i < 6; ++i) {
+    args_spec_list.push_back(y_abstract);
+  }
+  auto kg = GetKernelGraph(g, args_spec_list);
+
+  auto optimizer = std::make_shared<opt::GraphOptimizer>();
+  auto pm = std::make_shared<opt::PassManager>();
+  pm->AddPass(std::make_shared<opt::FusedBatchNormFusion>());
+  optimizer->AddPassManager(pm);
+  FuncGraphPtr new_graph = optimizer->Optimize(kg);
+
+  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_fused_batch_norm_fusion", "after");
+  EXPECT_TRUE(CheckEqualGraph(g_after, new_graph));
+}
+}  // namespace opt
+}  // namespace mindspore
\ No newline at end of file
diff --git a/tests/ut/cpp/python_input/gtest_input/pre_activate/fused_batch_norm_fusion_test.py b/tests/ut/cpp/python_input/gtest_input/pre_activate/fused_batch_norm_fusion_test.py
index 8f4b8b476f..ca93d40443 100644
--- a/tests/ut/cpp/python_input/gtest_input/pre_activate/fused_batch_norm_fusion_test.py
+++ b/tests/ut/cpp/python_input/gtest_input/pre_activate/fused_batch_norm_fusion_test.py
@@ -24,7 +24,8 @@ make_tuple = Primitive('make_tuple')
 tuple_getitem = Primitive('tuple_getitem')
 depend = Primitive('depend')
 BatchNorm = P.BatchNorm()
-FusedBatchNorm = P.FusedBatchNorm()
+BNTrainingReduce = Primitive('BNTrainingReduce')
+BNTrainingUpdate = Primitive('BNTrainingUpdate')
 constant0 = Tensor(0.1, mstype.float32)
 constant1 = Tensor(0.1, mstype.float32)
 
@@ -40,7 +41,7 @@ class FnDict:
         return self.fnDict[name]
 
 
-def useless_test_fused_batch_norm_fusion(tag):
+def test_fused_batch_norm_fusion(tag):
     fns = FnDict()
 
     @fns
@@ -60,9 +61,11 @@ def useless_test_fused_batch_norm_fusion(tag):
 
     @fns
     def after(input0, input1, input2, input3, input4, var0, var1):
-        fused_batch_norm = FusedBatchNorm(input0, input1, input2, var0, var1)
-        outputs = make_tuple(tuple_getitem(fused_batch_norm, 0), tuple_getitem(fused_batch_norm, 3),
-                             tuple_getitem(fused_batch_norm, 4))
+        bn_training_reduce = BNTrainingReduce(input0)
+        bn_training_update = BNTrainingUpdate(input0, tuple_getitem(bn_training_reduce, 0),
+                                              tuple_getitem(bn_training_reduce, 1), input1, input2, var0, var1)
+        outputs = make_tuple(tuple_getitem(bn_training_update, 0), tuple_getitem(bn_training_update, 3),
+                             tuple_getitem(bn_training_update, 4))
         output = tuple_getitem(outputs, 0)
         return make_tuple(output)
 

From 4694c979a596480ef50a800d4392c6d6c82e2965 Mon Sep 17 00:00:00 2001
From: zhaozhenlong <zhaozhenlong1@huawei.com>
Date: Wed, 29 Apr 2020 20:58:06 +0800
Subject: [PATCH 235/242] delete pack description about num and add raise error

SparseApplyAdagrad example format

PReLU and grad state 1-d not supported

atan2 bprop dx dy revert to original shapes
---
 mindspore/ops/_grad/grad_math_ops.py  |  6 +++---
 mindspore/ops/operations/_grad_ops.py |  5 +++++
 mindspore/ops/operations/array_ops.py | 17 +++++++++-------
 mindspore/ops/operations/nn_ops.py    | 24 ++++++++++++-----------
 tests/ut/python/ops/test_ops.py       | 28 ++++++++++++++++++++-------
 5 files changed, 52 insertions(+), 28 deletions(-)

diff --git a/mindspore/ops/_grad/grad_math_ops.py b/mindspore/ops/_grad/grad_math_ops.py
index b4857ed22c..6e253b56e9 100755
--- a/mindspore/ops/_grad/grad_math_ops.py
+++ b/mindspore/ops/_grad/grad_math_ops.py
@@ -788,7 +788,7 @@ def get_bprop_atan2(self):
 
     def bprop(x, y, out, dout):
         tmp = dout / (square(x) + square(y))
-        dx = tmp * y
-        dy = tmp * (-x)
-        return (dx, dy)
+        bc_dx = tmp * y
+        bc_dy = tmp * (-x)
+        return binop_grad_common(x, y, bc_dx, bc_dy)
     return bprop
diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py
index 747caa7a96..4efdc5dedb 100644
--- a/mindspore/ops/operations/_grad_ops.py
+++ b/mindspore/ops/operations/_grad_ops.py
@@ -669,6 +669,9 @@ class PReLUGrad(PrimitiveWithInfer):
     r"""
     Gradients of PReLU operation.
 
+    Note:
+        1-dimensional input_x is not supported.
+
     Inputs:
         - **y_backprop** (Tensor) - Representing the backprop of the next layer.
         - **input_x** (Tensor) - Should be the input `input_x` of forward operator PRelu.
@@ -683,6 +686,8 @@ class PReLUGrad(PrimitiveWithInfer):
         pass
 
     def infer_shape(self, y_backprop_shape, A_shape, w_shape):
+        if len(A_shape) == 1:
+            raise ValueError(f'For \'{self.name}\' input_x rank 1 is not supported.')
         return y_backprop_shape, w_shape
 
     def infer_dtype(self, y_backprop_dtype, A_dtype, w_dtype):
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index f611bc9617..aca87cab66 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -1308,8 +1308,8 @@ class Concat(PrimitiveWithInfer):
 def _get_pack_shape(x_shape, x_type, axis, prim_name):
     """for pack output shape"""
     validator.check_value_type("shape", x_shape, [tuple, list], prim_name)
-    validator.check_integer("len of input_x shape", len(x_shape), 0, Rel.GT, prim_name)
-    validator.check_subclass("shape0", x_type[0], mstype.tensor, prim_name)
+    validator.check_integer("len of input_x", len(x_shape), 1, Rel.GT, prim_name)
+    validator.check_subclass("input_x[0]", x_type[0], mstype.tensor, prim_name)
     validator.check_integer("len of input_x0 shape", len(x_shape[0]), 0, Rel.GT, prim_name)
     rank_base = len(x_shape[0])
     N = len(x_shape)
@@ -1320,7 +1320,7 @@ def _get_pack_shape(x_shape, x_type, axis, prim_name):
     for i in range(1, N):
         v = x_shape[i]
         validator.check('len of x_shape[%d]' % i, len(v), 'len of rank_base', rank_base, Rel.EQ, prim_name)
-        validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0], Rel.EQ, prim_name)
+        validator.check('x_type[%d]' % i, x_type[i], 'base', x_type[0], Rel.EQ, prim_name, TypeError)
         for j in range(rank_base):
             if v[j] != x_shape[0][j]:
                 raise ValueError(f"For \'{prim_name}\' element {i} shape in input can not pack with first element")
@@ -1346,6 +1346,12 @@ class Pack(PrimitiveWithInfer):
     Outputs:
         Tensor. A packed Tensor with the same type as `input_x`.
 
+    Raises:
+        TypeError: If the data types of elements in input_x are not the same.
+        ValueError: If length of input_x is not greater than 1;
+                    or if axis is out of the range [-(R+1), R+1);
+                    or if the shapes of elements in input_x are not the same.
+
     Examples:
         >>> data1 = Tensor(np.array([0, 1]).astype(np.float32))
         >>> data2 = Tensor(np.array([2, 3]).astype(np.float32))
@@ -1386,8 +1392,6 @@ class Unpack(PrimitiveWithInfer):
     Args:
         axis (int): Dimension along which to pack. Default: 0.
                     Negative values wrap around. The range is [-R, R).
-        num (int): The number of tensors to be unpacked to. Default : "None".
-                   If `num` is not specified, it is inferred from the shape of `input_x`.
 
     Inputs:
         - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
@@ -1397,8 +1401,7 @@ class Unpack(PrimitiveWithInfer):
         A tuple of Tensors, the shape of each objects is same.
 
     Raises:
-        ValueError: If axis is out of the range [-len(input_x.shape()), len(input_x.shape())),
-                    or if len(input_x.shape[axis]) not equal to num.
+        ValueError: If axis is out of the range [-len(input_x.shape()), len(input_x.shape())).
 
     Examples:
         >>> unpack = P.Unpack()
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 5e84e43904..e3ce37e7d1 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -2087,6 +2087,9 @@ class PReLU(PrimitiveWithInfer):
 
     where :math:`x_i` is an element of an channel of the input.
 
+    Note:
+        1-dimensional input_x is not supported.
+
     Inputs:
         - **input_x** (Tensor) - Float tensor, representing the output of the preview layer.
         - **weight** (Tensor) -  Float Tensor, w > 0, there is only two shapes are legitimate,
@@ -2106,14 +2109,13 @@ class PReLU(PrimitiveWithInfer):
         input_x_dim = len(input_x_shape)
         weight_dim = len(weight_shape)
 
+        if input_x_dim == 1:
+            raise ValueError(f'For \'{self.name}\' input_x rank 1 is not supported.')
+
         if weight_dim != 1:
             raise ValueError(f'For \'{self.name}\' weight_dim must be 1, while weight_dim is {weight_dim}.')
 
-        if input_x_dim == 1 and weight_shape[0] != 1:
-            raise ValueError(f'For \'{self.name}\' when input_x_dim is 1, weight_shape[0] must be 1, '
-                             f'while weight_shape[0] is {weight_shape[0]}.')
-
-        if input_x_dim != 1 and weight_shape[0] != input_x_shape[1] and weight_shape[0] != 1:
+        if weight_shape[0] != input_x_shape[1] and weight_shape[0] != 1:
             raise ValueError(f'For \'{self.name}\' channel of input_x and weight must be matched,'
                              f' while channel of input_x is {input_x_shape[1]},'
                              f' weight_shape[0] is {weight_shape[0]}.')
@@ -2556,12 +2558,12 @@ class SparseApplyAdagrad(PrimitiveWithInfer):
         Tensor, has the same shape and type as `var`.
 
     Examples:
-        var = Tensor(np.random.random((3, 3)), mindspore.float32)
-        accum = Tensor(np.random.random((3, 3)), mindspore.float32)
-        grad = Tensor(np.random.random((3, 3)), mindspore.float32)
-        indices = Tensor(np.ones((3,), np.int32))
-        sparse_apply_ada_grad = P.SparseApplyAdagrad(0.5)
-        sparse_apply_ada_grad(var, accum, grad, indices)
+        >>> var = Tensor(np.random.random((3, 3)), mindspore.float32)
+        >>> accum = Tensor(np.random.random((3, 3)), mindspore.float32)
+        >>> grad = Tensor(np.random.random((3, 3)), mindspore.float32)
+        >>> indices = Tensor(np.ones((3,), np.int32))
+        >>> sparse_apply_ada_grad = P.SparseApplyAdagrad(0.5)
+        >>> sparse_apply_ada_grad(var, accum, grad, indices)
     """
 
     @prim_attr_register
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index 8b949393f0..1d18083517 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -1033,11 +1033,6 @@ test_case_array_ops = [
         'desc_bprop':[[3, 2, 3, 3]],
     }),
     ('Pack_2', {
-        'block': NetForPackInput(P.Pack()),
-        'desc_inputs':[[2, 2]],
-        'desc_bprop':[[1, 2, 2]],
-    }),
-    ('Pack_3', {
         'block': NetForPackInput(P.Pack()),
         'desc_inputs':[[128, 128], [128, 128]],
         'desc_bprop':[[2, 128, 128]],
@@ -1052,16 +1047,26 @@ test_case_array_ops = [
         'desc_inputs':[Tensor(np.array([[1, 1, 1]], np.float32))],
         'desc_bprop':[[1], [1], [1]],
     }),
-    ('Diag', {
+    ('Diag_1', {
         'block': P.Diag(),
         'desc_inputs': [[4]],
         'desc_bprop': [[4, 4]],
     }),
-    ('DiagPart', {
+    ('Diag_2', {
+        'block': P.Diag(),
+        'desc_inputs': [[4, 4]],
+        'desc_bprop': [[4, 4, 4, 4]],
+    }),
+    ('DiagPart_1', {
         'block': P.DiagPart(),
         'desc_inputs': [[4, 4]],
         'desc_bprop': [[4]],
     }),
+    ('DiagPart_2', {
+        'block': P.DiagPart(),
+        'desc_inputs': [[4, 4, 4, 4]],
+        'desc_bprop': [[4, 4]],
+    }),
     ('SpaceToBatch_1', {
         'block': P.SpaceToBatch(2, [[0, 0], [0, 0]]),
         'desc_inputs': [[1, 3, 2, 2]],
@@ -1200,6 +1205,15 @@ raise_set = [
                         Tensor(np.ones((2, 2), np.int32)),
                         Tensor(np.ones((2,), np.float32))),
         'desc_bprop': [[2, 3]]}),
+    ('Pack', {
+        'block': (NetForPackInput(P.Pack()), {'exception': ValueError}),
+        'desc_inputs':[[2, 2]],
+        'desc_bprop':[[1, 2, 2]]}),
+    ('PReLU', {
+        'block': (P.PReLU(), {'exception': ValueError}),
+        'desc_inputs':[[2], [1]],
+        'desc_bprop':[[1]]}),
+
 ]
 
 

From caac6bce5c3445a5f444cd9a18521f4024972171 Mon Sep 17 00:00:00 2001
From: ch-l <ch.l@huawei.com>
Date: Wed, 29 Apr 2020 10:16:32 +0200
Subject: [PATCH 236/242] adjustements w.r.t. distributed execution

---
 .../auto_parallel/rec_core/rec_cost.cc        | 24 +++++-
 .../auto_parallel/rec_core/rec_cost.h         |  2 +-
 .../rec_core/rec_generate_strategy.cc         | 16 ++--
 .../auto_parallel/rec_core/rec_parse_graph.cc | 26 +-----
 .../auto_parallel/rec_core/rec_parse_graph.h  |  3 -
 .../auto_parallel/rec_core/rec_partition.cc   | 81 +------------------
 .../auto_parallel/rec_core/rec_partition.h    |  6 --
 7 files changed, 32 insertions(+), 126 deletions(-)

diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
index 3fea107a73..e5ba59425c 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.cc
@@ -296,10 +296,10 @@ double CostConvolution::GetMinCostIn(const Graph::NodeType &node) {
                       static_cast<int>(op.arguments[1].tensor_shape.shape_n * op.arguments[1].tensor_str.str_n) *
                       static_cast<int>(op.arguments[1].tensor_shape.shape_w * op.arguments[1].tensor_str.str_w) *
                       static_cast<int>(op.arguments[1].tensor_shape.shape_c * op.arguments[1].tensor_str.str_c);
-  int tensor_out = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_shape.shape_w) *
-                   static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_shape.shape_c) *
-                   static_cast<int>(node.tensor_parm.tensor_str.str_h * node.tensor_parm.tensor_str.str_w) *
-                   static_cast<int>(node.tensor_parm.tensor_str.str_n * node.tensor_parm.tensor_str.str_c);
+  int tensor_out = static_cast<int>(node.tensor_parm.tensor_shape.shape_h * node.tensor_parm.tensor_str.str_h) *
+                   static_cast<int>(node.tensor_parm.tensor_shape.shape_n * node.tensor_parm.tensor_str.str_n) *
+                   static_cast<int>(node.tensor_parm.tensor_shape.shape_w * node.tensor_parm.tensor_str.str_w) *
+                   static_cast<int>(node.tensor_parm.tensor_shape.shape_c * node.tensor_parm.tensor_str.str_c);
 
   std::vector<double> cost_in;
   cost_in.push_back(StrDimB(tensor_filter));
@@ -628,6 +628,22 @@ StrategyRec CostCommon::ChoseStr(const std::vector<double> &cost_op, StrategyRec
   return str;
 }
 
+// Get weight for BN
+double CostBatchNorm::GetMinCostIn(const OperatorRec &op) {
+  int tensor = static_cast<int>(op.arguments[0].tensor_shape.shape_h * op.arguments[0].tensor_str.str_h) *
+               static_cast<int>(op.arguments[0].tensor_shape.shape_n * op.arguments[0].tensor_str.str_n) *
+               static_cast<int>(op.arguments[0].tensor_shape.shape_w * op.arguments[0].tensor_str.str_w) *
+               static_cast<int>(op.arguments[0].tensor_shape.shape_c * op.arguments[0].tensor_str.str_c);
+
+  std::vector<double> cost_in;
+  cost_in.push_back(StrDimB(tensor) * 1.2);
+  cost_in.push_back(DOUBLE_MAX);
+  cost_in.push_back(StrDimH(tensor) * 1.2);
+  cost_in.push_back(StrDimW(tensor) * 1.2);
+
+  return *min_element(cost_in.begin(), cost_in.end());
+}
+
 // Get optimal strategy for BN
 StrategyRec CostBatchNorm::GetOptimalStr(const Graph::NodeType &node,
                                          const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
index 85e5e5ea94..315c081d67 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_cost.h
@@ -213,7 +213,7 @@ class CostBatchNorm {
                             const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
                             const Graph &graph);
 
-  double GetMinCostIn() const { return 0.0; }
+  double GetMinCostIn(const OperatorRec &op);
 
  private:
   double StrDimB(int32_t Tensor) {
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
index e942c8005f..42b3bfc72e 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
@@ -132,8 +132,9 @@ std::vector<int32_t> MakeOriginalStrategy(const std::vector<std::shared_ptr<Oper
   if (iter_ops >= ops.size()) {
     MS_LOG(EXCEPTION) << "Failure: Operators' elements out of range.";
   }
-  if (iter_op_inputs >= ops[iter_ops]->strategy()->GetInputDim().size())
+  if (iter_op_inputs >= ops[iter_ops]->strategy()->GetInputDim().size()) {
     MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range.";
+  }
   size_t input_size = ops[iter_ops]->strategy()->GetInputDim()[iter_op_inputs].size();
   for (size_t dim = 0; dim < input_size; dim++) {
     s.push_back(1);
@@ -161,8 +162,9 @@ std::vector<int32_t> MakeDataParallelStrategy(const std::vector<std::shared_ptr<
     MS_LOG(EXCEPTION) << "Failure: Operators' elements out of range.";
   }
   StrategyPtr origin_strategy = ops[iter_ops]->strategy();
-  if (iter_op_inputs >= origin_strategy->GetInputDim().size())
+  if (iter_op_inputs >= origin_strategy->GetInputDim().size()) {
     MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range.";
+  }
   size_t input_size = origin_strategy->GetInputDim()[iter_op_inputs].size();
   for (size_t dim = 0; dim < input_size; dim++) {
     if (dim == 0 && input_size == 4) {
@@ -198,9 +200,9 @@ std::vector<int32_t> PrepareStrategy(const std::shared_ptr<Graph> &graph,
     return MakeOriginalStrategy(ops, iter_ops, iter_op_inputs);
   } else if (type == RELU) {
     return MakeRecSearchStrategy(graph, iter_ops, iter_op_inputs);
-  } else if (type == BATCH_NORM || (type == FUSE_BATCH_NORM)) {
+  } else if ((type == BATCH_NORM) || (type == FUSE_BATCH_NORM)) {
     return PrepareBN(graph, iter_ops, iter_op_inputs);
-  } else if (type == SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) {
+  } else if (type == SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) {
     return PrepareSparse(iter_op_inputs);
   } else {
     return MakeDataParallelStrategy(ops, iter_ops, iter_op_inputs);
@@ -224,12 +226,6 @@ void MaskSpecialOps(std::shared_ptr<Graph> graph) {
       node.apply.arguments[1].tensor_str.str_c = 1;
       node.apply.arguments[1].tensor_str.str_h = 1;
       node.apply.arguments[1].tensor_str.str_w = 1;
-    } else if (node.apply.op_type == kRecBiasAdd || node.apply.op_type == kRecMatMul) {
-      // For MatMul and BiasAdd
-      node.apply.arguments[0].tensor_str.str_h = 1;
-      node.apply.arguments[0].tensor_str.str_w = 1;
-      node.apply.arguments[1].tensor_str.str_h = 1;
-      node.apply.arguments[1].tensor_str.str_w = 1;
     }
   }
 }
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
index b9b1b7b914..ada22fef9a 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.cc
@@ -58,7 +58,8 @@ Graph::NodeType MakeNewOperator(std::vector<std::shared_ptr<OperatorInfo>> ops,
       ops[iter_ops]->outputs_tensor_info()[0].shape()[0], ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
       ops[iter_ops]->outputs_tensor_info()[0].shape()[2], ops[iter_ops]->outputs_tensor_info()[0].shape()[3]);
   } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 2) {
-    NewOp.tensor_parm = Fill2DTensor(ops, iter_ops, NewOp);
+    NewOp.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0],
+                                   ops[iter_ops]->outputs_tensor_info()[0].shape()[1]);
   } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 1) {
     NewOp.tensor_parm = MakeTensor(1, 1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
   } else if (ops[iter_ops]->outputs_tensor_info()[0].shape().size() == 0) {
@@ -71,29 +72,6 @@ Graph::NodeType MakeNewOperator(std::vector<std::shared_ptr<OperatorInfo>> ops,
   return NewOp;
 }
 
-TensorParam Fill2DTensor(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
-                         Graph::NodeType NewTensor) {
-  if (NewTensor.apply.op_type == OperatorType::kRecMatMul) {
-    auto attrs = ops[iter_ops]->attrs();
-    bool transpose_a = attrs[TRANSPOSE_A]->cast<BoolImmPtr>()->value();
-    bool transpose_b = attrs[TRANSPOSE_B]->cast<BoolImmPtr>()->value();
-    if (transpose_a) {
-      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
-                                         ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
-    } else if (transpose_b) {
-      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[1],
-                                         ops[iter_ops]->outputs_tensor_info()[0].shape()[0]);
-    } else {
-      NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0],
-                                         ops[iter_ops]->outputs_tensor_info()[0].shape()[1]);
-    }
-  } else {
-    NewTensor.tensor_parm = MakeTensor(1, 1, ops[iter_ops]->outputs_tensor_info()[0].shape()[0],
-                                       ops[iter_ops]->outputs_tensor_info()[0].shape()[1]);
-  }
-  return NewTensor.tensor_parm;
-}
-
 OperatorRec CompleteOperatorInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
                                    Graph::NodeType NewTensor) {
   for (size_t iter_input_tensors = 0; iter_input_tensors < ops[iter_ops]->inputs_tensor_info().size();
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
index 17a8174dde..2b1d0c55ed 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_parse_graph.h
@@ -53,9 +53,6 @@ const TensorParam MakeTensor(int n, int c, int h, int w);
 
 Graph::NodeType MakeNewOperator(std::vector<std::shared_ptr<OperatorInfo>> ops, size_t iter_ops);
 
-TensorParam Fill2DTensor(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
-                         Graph::NodeType NewTensor);
-
 OperatorRec CompleteOperatorInputs(const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_ops,
                                    Graph::NodeType NewTensor);
 
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
index 5fcaefcb47..3527c18079 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.cc
@@ -73,7 +73,7 @@ double GetWeights(const Graph::NodeType &node) {
     // For BatchNorm
     auto cost_ptr = std::make_shared<CostBatchNorm>();
 
-    return cost_ptr->GetMinCostIn();
+    return cost_ptr->GetMinCostIn(op);
   } else if (op.op_type == OperatorType::kRecOneHot || op.op_type == OperatorType::kRecLog ||
              op.op_type == OperatorType::kRecExp || op.op_type == OperatorType::kRecAdd ||
              op.op_type == OperatorType::kRecSub || op.op_type == OperatorType::kRecMul ||
@@ -108,8 +108,8 @@ std::vector<size_t> SortByWeight(const std::shared_ptr<Graph> graph) {
     }
   }
 
-  // Do sorting.
-  sort(weight_to_node_index.begin(), weight_to_node_index.end());
+  // Ordering ops aka nodes of the graph
+  std::sort(weight_to_node_index.begin(), weight_to_node_index.end());
 
   // Store the result in node_index_by_weights.
   uint64_t size = weight_to_node_index.size();
@@ -231,7 +231,6 @@ Status PartitionForAllDevices(const size_t num_device, const double device_memor
     }
   }
 
-  InferUndecideStrategy(graph);
   if (DevicesMemoryControl(device_memory, graph) != SUCCESS) {
     return FAILED;
   } else {
@@ -257,80 +256,6 @@ Graph::NodeType ApplyStrToTensor(Graph::NodeType Node) {
   return Node;
 }
 
-// Check Strategy for the same tensor between op.
-void InferUndecideStrategy(std::shared_ptr<Graph> graph) {
-  MS_EXCEPTION_IF_NULL(graph);
-
-  uint64_t iter_nodes = graph->nodes.size();
-
-  // For all the nodes in the graph
-  for (uint64_t i_node = 0; i_node < iter_nodes; i_node++) {
-    // If this target node is an operator, find it's adjecent op's strategy;
-    if (graph->nodes[i_node].info == 0) {
-      // Try to apply last op's strategy.
-      ApplyLastStrategy(i_node, graph);
-      // Try to apply next op's strategy.
-      ApplyNextStrategy(i_node, graph);
-    }
-  }
-}
-
-void ApplyLastStrategy(const uint64_t node_index, std::shared_ptr<Graph> graph) {
-  Graph::NodeType &target_node = graph->nodes[node_index];
-
-  // Number of node-in
-  size_t num_node_in = target_node.node_in.size();
-
-  // Find forward op and copy strategy if meets the limits.
-  for (size_t index = 0; index < num_node_in; index++) {
-    if (graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_n <=
-          target_node.apply.arguments[0].tensor_str.str_n &&
-        graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_c <=
-          target_node.apply.arguments[0].tensor_str.str_c &&
-        graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_h <=
-          target_node.apply.arguments[0].tensor_str.str_h &&
-        graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_w <=
-          target_node.apply.arguments[0].tensor_str.str_w) {
-      target_node.apply.arguments[0].tensor_str.str_n =
-        graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_n;
-      target_node.apply.arguments[0].tensor_str.str_c =
-        graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_c;
-      target_node.apply.arguments[0].tensor_str.str_h =
-        graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_h;
-      target_node.apply.arguments[0].tensor_str.str_w =
-        graph->nodes[target_node.node_in[index]].tensor_parm.tensor_str.str_w;
-    }
-  }
-}
-
-void ApplyNextStrategy(const uint64_t node_index, std::shared_ptr<Graph> graph) {
-  Graph::NodeType &target_node = graph->nodes[node_index];
-
-  // Number of node-out
-  size_t num_node_out = target_node.node_out.size();
-
-  // Find backward op and copy strategy if meets the limits.
-  for (size_t index = 0; index < num_node_out; index++) {
-    if (graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_n <=
-          target_node.tensor_parm.tensor_str.str_n &&
-        graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_c <=
-          target_node.tensor_parm.tensor_str.str_c &&
-        graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_h <=
-          target_node.tensor_parm.tensor_str.str_h &&
-        graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_w <=
-          target_node.tensor_parm.tensor_str.str_w) {
-      target_node.tensor_parm.tensor_str.str_n =
-        graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_n;
-      target_node.tensor_parm.tensor_str.str_c =
-        graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_c;
-      target_node.tensor_parm.tensor_str.str_h =
-        graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_h;
-      target_node.tensor_parm.tensor_str.str_w =
-        graph->nodes[target_node.node_out[index]].apply.arguments[0].tensor_str.str_w;
-    }
-  }
-}
-
 Status DevicesMemoryControl(const double device_memory, std::shared_ptr<Graph> graph) {
   MS_EXCEPTION_IF_NULL(graph);
 
diff --git a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h
index e22b11542a..fc504b3cb2 100644
--- a/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h
+++ b/mindspore/ccsrc/parallel/auto_parallel/rec_core/rec_partition.h
@@ -44,12 +44,6 @@ Status PartitionForAllDevices(const size_t num_device, const double device_memor
 
 Graph::NodeType ApplyStrToTensor(Graph::NodeType Node);
 
-void InferUndecideStrategy(std::shared_ptr<Graph> graph);
-
-void ApplyLastStrategy(const uint64_t node_index, std::shared_ptr<Graph> graph);
-
-void ApplyNextStrategy(const uint64_t node_index, std::shared_ptr<Graph> graph);
-
 Status DevicesMemoryControl(const double device_memory, std::shared_ptr<Graph> graph);
 
 size_t GetDataTypeSize(const TensorType &type);

From 99c9b48a4d249fb0191d7e2a10f274686253f4d5 Mon Sep 17 00:00:00 2001
From: changzherui <changzherui1@huawei.com>
Date: Mon, 4 May 2020 19:39:17 +0800
Subject: [PATCH 237/242] delete TestHWBatchNormGradSplit

---
 .../ir_fission/batch_norm_grad_split_test.cc  | 59 -------------------
 1 file changed, 59 deletions(-)
 delete mode 100644 tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_split_test.cc

diff --git a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_split_test.cc b/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_split_test.cc
deleted file mode 100644
index 68c327ade1..0000000000
--- a/tests/ut/cpp/pre_activate/ascend/ir_fission/batch_norm_grad_split_test.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "common/backend_common_test.h"
-#include "common/py_func_graph_fetcher.h"
-#include "operator/ops.h"
-#include "ir/meta_tensor.h"
-#include "debug/anf_ir_dump.h"
-#include "utils/utils.h"
-#include "pre_activate/common/optimizer.h"
-#include "pre_activate/ascend/ir_fission/batch_norm_grad_split.h"
-#include "session/anf_runtime_algorithm.h"
-
-namespace mindspore {
-namespace opt {
-class TestHWBatchNormGradSplit : public BackendCommon {
- public:
-  TestHWBatchNormGradSplit() : get_py_fun_("gtest_input.pre_activate.batch_norm_grad_split", true) {}
-
- public:
-  UT::PyFuncGraphFetcher get_py_fun_;
-};
-
-TEST_F(TestHWBatchNormGradSplit, test_split) {
-  get_py_fun_.SetDoResolve(true);
-  FuncGraphPtr g = get_py_fun_.CallAndParseRet("test_batch_norm_grad_split", "before");
-  EXPECT_NE(g, nullptr);
-  std::vector<int> shp_x{1, 64, 112, 112};
-  std::vector<int> shp_b{64};
-  auto x_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp_x);
-  auto b_abstract = std::make_shared<abstract::AbstractTensor>(kFloat32, shp_b);
-  AbstractBasePtrList args_spec_list{x_abstract, x_abstract, b_abstract, b_abstract, b_abstract, b_abstract};
-  auto kernel_graph = GetKernelGraph(g, args_spec_list);
-  EXPECT_NE(kernel_graph, nullptr);
-
-  auto optimizer = std::make_shared<opt::GraphOptimizer>();
-  auto pm = std::make_shared<opt::PassManager>();
-  auto pass = std::make_shared<opt::BatchNormGradSplit>();
-  pm->AddPass(pass);
-  optimizer->AddPassManager(pm);
-  auto new_graph = optimizer->Optimize(kernel_graph);
-
-  FuncGraphPtr g_after = get_py_fun_.CallAndParseRet("test_batch_norm_grad_split", "after");
-  EXPECT_TRUE(CheckEqualGraph(g_after, new_graph));
-}
-}  // namespace opt
-}  // namespace mindspore

From 354d8b0354ac0ea922dfbbbcf17c0c64b4bb6c0e Mon Sep 17 00:00:00 2001
From: changzherui <changzherui1@huawei.com>
Date: Tue, 5 May 2020 16:02:02 +0800
Subject: [PATCH 238/242] mod softmax

---
 mindspore/ccsrc/kernel/tbe/tbe_adapter.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
index fec4729b23..959d21a1b8 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
@@ -30,6 +30,9 @@ namespace mindspore {
 namespace kernel {
 namespace tbe {
 static std::map<string, string> tbe_func_adapter_map = {
+  {"softmax", "softmax_v2"},
+  {"log_softmax", "log_softmax_v2"},
+  {"applymomentum", "applymomentum_d"},
   {"re_lu6", "relu6"},
   {"re_lu6_grad", "relu6_grad"},
   {"re_lu", "relu"},

From 84c029d1caf21ec2c7a0473b3a7ae49b98254633 Mon Sep 17 00:00:00 2001
From: changzherui <changzherui1@huawei.com>
Date: Tue, 5 May 2020 16:36:01 +0800
Subject: [PATCH 239/242] add apply_momentum

---
 mindspore/ccsrc/kernel/tbe/tbe_adapter.cc | 2 +-
 mindspore/ops/_op_impl/tbe/batchnorm.py   | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
index 959d21a1b8..8ce5504b8e 100644
--- a/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
+++ b/mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
@@ -32,7 +32,7 @@ namespace tbe {
 static std::map<string, string> tbe_func_adapter_map = {
   {"softmax", "softmax_v2"},
   {"log_softmax", "log_softmax_v2"},
-  {"applymomentum", "applymomentum_d"},
+  {"apply_momentum", "apply_momentum_d"},
   {"re_lu6", "relu6"},
   {"re_lu6_grad", "relu6_grad"},
   {"re_lu", "relu"},
diff --git a/mindspore/ops/_op_impl/tbe/batchnorm.py b/mindspore/ops/_op_impl/tbe/batchnorm.py
index 6dd79245a3..ddb24ac3e7 100644
--- a/mindspore/ops/_op_impl/tbe/batchnorm.py
+++ b/mindspore/ops/_op_impl/tbe/batchnorm.py
@@ -36,19 +36,18 @@ batch_norm_op_info = TBERegOp("BatchNorm") \
     .output(2, "batch_variance", False, "required", "all") \
     .output(3, "reserve_space_1", False, "optional", "all") \
     .output(4, "reserve_space_2", False, "optional", "all") \
-    .output(5, "reserve_space_3", False, "optional", "all") \
     .dtype_format(DataType.F16_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
                   DataType.F32_Default, DataType.F16_Default, DataType.F32_Default, DataType.F32_Default,
-                  DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \
+                  DataType.F32_Default, DataType.F32_Default) \
     .dtype_format(DataType.F16_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD,
                   DataType.F32_5HD, DataType.F16_5HD, DataType.F32_5HD, DataType.F32_5HD,
-                  DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \
+                  DataType.F32_5HD, DataType.F32_5HD) \
     .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
                   DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
-                  DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \
+                  DataType.F32_Default, DataType.F32_Default) \
     .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD,
                   DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD,
-                  DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \
+                  DataType.F32_5HD, DataType.F32_5HD) \
     .get_op_info()
 
 

From e01692addaa2753dbf6b8565995b429c04463e8f Mon Sep 17 00:00:00 2001
From: changzherui <changzherui1@huawei.com>
Date: Tue, 5 May 2020 17:08:26 +0800
Subject: [PATCH 240/242] modify apply_momentum output var

---
 mindspore/ops/_op_impl/tbe/apply_momentum.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mindspore/ops/_op_impl/tbe/apply_momentum.py b/mindspore/ops/_op_impl/tbe/apply_momentum.py
index 42ce9d0e41..920a48e48d 100644
--- a/mindspore/ops/_op_impl/tbe/apply_momentum.py
+++ b/mindspore/ops/_op_impl/tbe/apply_momentum.py
@@ -29,7 +29,7 @@ apply_momentum_op_info = TBERegOp("ApplyMomentum") \
     .input(2, "lr", False, "required", "all") \
     .input(3, "grad", False, "required", "all") \
     .input(4, "momentum", False, "required", "all") \
-    .output(0, "var", False, "required", "all") \
+    .output(0, "out", False, "required", "all") \
     .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
                   DataType.F16_Default, DataType.F16_Default) \
     .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD,

From 48735c25caa8bd0f29267ebb7dcfac3d32ce9266 Mon Sep 17 00:00:00 2001
From: changzherui <changzherui1@huawei.com>
Date: Tue, 5 May 2020 17:39:10 +0800
Subject: [PATCH 241/242] modify applr momentum output 2

---
 mindspore/ccsrc/device/kernel_runtime.cc     |  1 +
 mindspore/ops/_op_impl/tbe/apply_momentum.py | 19 ++++++++++---------
 mindspore/ops/operations/nn_ops.py           |  5 +++++
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc
index d3fccc11fd..e77d348630 100644
--- a/mindspore/ccsrc/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/device/kernel_runtime.cc
@@ -201,6 +201,7 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
   if (AnfAlgo::GetCNodeName(kernel) == "ApplyMomentum") {
     auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0);
     AnfAlgo::SetOutputAddr(device_address, 0, kernel.get());
+    AnfAlgo::SetOutputAddr(device_address, 1, kernel.get());
     return;
   }
 
diff --git a/mindspore/ops/_op_impl/tbe/apply_momentum.py b/mindspore/ops/_op_impl/tbe/apply_momentum.py
index 920a48e48d..deb8f0d387 100644
--- a/mindspore/ops/_op_impl/tbe/apply_momentum.py
+++ b/mindspore/ops/_op_impl/tbe/apply_momentum.py
@@ -29,23 +29,24 @@ apply_momentum_op_info = TBERegOp("ApplyMomentum") \
     .input(2, "lr", False, "required", "all") \
     .input(3, "grad", False, "required", "all") \
     .input(4, "momentum", False, "required", "all") \
-    .output(0, "out", False, "required", "all") \
+    .output(0, "var", False, "required", "all") \
+    .output(1, "accum", False, "required", "all") \
     .dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
-                  DataType.F16_Default, DataType.F16_Default) \
+                  DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \
     .dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD,
-                  DataType.F16_Default, DataType.F16_5HD) \
+                  DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD) \
     .dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0,
-                  DataType.F16_Default, DataType.F16_C1HWNCoC0) \
+                  DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
     .dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ,
-                  DataType.F16_Default, DataType.F16_FracZ) \
+                  DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ) \
     .dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
-                  DataType.F32_Default, DataType.F32_Default) \
+                  DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \
     .dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD,
-                  DataType.F32_Default, DataType.F32_5HD) \
+                  DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD) \
     .dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0,
-                  DataType.F32_Default, DataType.F32_C1HWNCoC0) \
+                  DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
     .dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ,
-                  DataType.F32_Default, DataType.F32_FracZ) \
+                  DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ) \
     .get_op_info()
 
 
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index fb8e908924..2a2dbe08a8 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -1427,8 +1427,11 @@ class ApplyMomentum(PrimitiveWithInfer):
     def __init__(self, use_nesterov=False, use_locking=False, gradient_scale=1.0):
         self.init_prim_io_names(inputs=['variable', 'accumulation', 'learning_rate', 'gradient', 'momentum'],
                                 outputs=['output'])
+        self.is_tbe = context.get_context("device_target") == "Ascend"
 
     def infer_shape(self, v_shape, a_shape, l_shape, g_shape, m_shape):
+        if self.is_tbe:
+            return v_shape, v_shape
         return v_shape
 
     def infer_dtype(self, v_dtype, a_dtype, l_dtype, g_dtype, m_dtype):
@@ -1439,6 +1442,8 @@ class ApplyMomentum(PrimitiveWithInfer):
         validator.check_scalar_or_tensor_type_same({"l_dtype": l_dtype}, valid_types, self.name)
         validator.check_scalar_or_tensor_type_same({"g_dtype": g_dtype}, valid_types, self.name)
         validator.check_scalar_or_tensor_type_same({"m_dtype": m_dtype}, valid_types, self.name)
+        if self.is_tbe:
+            return g_dtype, g_dtype
         return g_dtype
 
 

From 446828f221709a6f7995e09087a31b675da73c16 Mon Sep 17 00:00:00 2001
From: changzherui <changzherui1@huawei.com>
Date: Tue, 5 May 2020 21:56:13 +0800
Subject: [PATCH 242/242] modify momentum test

---
 tests/ut/python/ops/test_momentum.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ut/python/ops/test_momentum.py b/tests/ut/python/ops/test_momentum.py
index 28b9637015..f25e4faf2d 100644
--- a/tests/ut/python/ops/test_momentum.py
+++ b/tests/ut/python/ops/test_momentum.py
@@ -38,7 +38,7 @@ def tensor_run_opt(opt, iters, learning_rate, momentum,
                    gradient, variable, moment):
     """ tensor_run_opt """
     success = True
-    new_weight = opt(variable, moment, learning_rate, gradient, momentum)
+    new_weight = opt(variable, moment, learning_rate, gradient, momentum)[0]
     success = F.depend(success, F.assign(variable, new_weight))
     return success