From 3c2057297e8e84398127a9c5cd8f1858ee385adf Mon Sep 17 00:00:00 2001
From: panyifeng <panyifeng@huawei.com>
Date: Fri, 19 Jun 2020 14:46:24 +0800
Subject: [PATCH] support multi param for tuple grad

---
 mindspore/ccsrc/operator/prim_others.cc       | 43 ++++++++++++++-----
 mindspore/ccsrc/pipeline/action.cc            |  3 +-
 .../pipeline/static_analysis/abstract_value.h |  8 ++--
 mindspore/common/parameter.py                 | 10 ++---
 mindspore/nn/optim/adam.py                    |  2 +-
 mindspore/nn/optim/ftrl.py                    |  2 +-
 mindspore/nn/optim/lazyadam.py                |  5 ++-
 mindspore/ops/composite/base.py               | 38 +++-------------
 tests/ut/python/nn/optim/test_adam.py         |  3 +-
 .../nn/optim/test_adam_with_tuple_grad.py     |  4 +-
 tests/ut/python/nn/optim/test_ftrl.py         |  3 +-
 tests/ut/python/nn/optim/test_lazyadam.py     |  3 +-
 .../python/nn/optim/test_proximal_ada_grad.py |  3 +-
 13 files changed, 65 insertions(+), 62 deletions(-)
diff --git a/mindspore/ccsrc/operator/prim_others.cc b/mindspore/ccsrc/operator/prim_others.cc
index a7323ed3cb..432b12f83b 100644
--- a/mindspore/ccsrc/operator/prim_others.cc
+++ b/mindspore/ccsrc/operator/prim_others.cc
@@ -59,7 +59,8 @@ class UndeterminedShapeType {
  public:
   explicit UndeterminedShapeType(const std::string &env_str) {
     // param_name indices_shape indices_type values_shape values_type dense_shape
-    // export UNDETERMINED_SPARSE_SHAPE_TYPES="w1:2:Int32:2 1 2:Float32:3 1 2"
+    // export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1
+    // 2:Float32:3 1 2"
     std::vector<string> fields;
     string tmp;
     std::stringstream input(env_str);
@@ -115,6 +116,20 @@ std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) {
 }
 const size_t UndeterminedShapeType::fields_num = 6;
 
+std::unordered_map<std::string, UndeterminedShapeType> g_undetermined_configs;
+void InitUndeterminedFromEnv(const std::string &sparse_shape_types) {
+  if (!g_undetermined_configs.empty()) {
+    return;
+  }
+  std::string tmp;
+  std::stringstream input(sparse_shape_types);
+  while (std::getline(input, tmp, ';')) {
+    auto config = UndeterminedShapeType(tmp);
+    g_undetermined_configs.insert(std::make_pair(config.param_name(), config));
+    MS_LOG(DEBUG) << "Undetermined config from env: " << tmp;
+  }
+}
+
 AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                     const AbstractBasePtrList &args_spec_list) {
   MS_EXCEPTION_IF_NULL(primitive);
@@ -128,27 +143,33 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt
     MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString();
   }
 
-  if (key->sparse_grad()) {
+  if (!key->sparse_grad().empty()) {
     // Will be fixed once undetermined type ready
     auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES");
     if (sparse_shape_types.empty()) {
-      sparse_shape_types = "w1:2:Int32:2 1 2:Float32:3 1 2";
+      sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2";
     }
-    MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString() << ", Undetermined shape is "
-                  << sparse_shape_types;
+    InitUndeterminedFromEnv(sparse_shape_types);
 
-    auto shape_types = UndeterminedShapeType(sparse_shape_types);
+    auto shape_types = g_undetermined_configs.find(key->sparse_grad());
+    if (shape_types == g_undetermined_configs.end()) {
+      MS_LOG(EXCEPTION) << "Param " << key->ToString()
+                        << " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES: "
+                        << sparse_shape_types;
+    }
+    MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString();
     AbstractBasePtrList sparse_list;
     // indices
-    auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.indices_type());
-    auto indices = std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types.indices_shape()));
+    auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.indices_type());
+    auto indices =
+      std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types->second.indices_shape()));
     sparse_list.emplace_back(indices);
     // values
-    auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.values_type());
-    auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types.values_shape()));
+    auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.values_type());
+    auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types->second.values_shape()));
     sparse_list.emplace_back(dout);
     // dense_shape
-    sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types.dense_shape()));
+    sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types->second.dense_shape()));
     return std::make_shared<AbstractTuple>(sparse_list);
   }
 
diff --git a/mindspore/ccsrc/pipeline/action.cc b/mindspore/ccsrc/pipeline/action.cc
index f127305d1b..5799ba35bd 100644
--- a/mindspore/ccsrc/pipeline/action.cc
+++ b/mindspore/ccsrc/pipeline/action.cc
@@ -229,7 +229,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
     if (param_node->has_default()) {
       auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
       AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true);
-      auto sparse_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
+      auto sparse_grad =
+        py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
       ptr->set_sparse_grad(sparse_grad);
 
       parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr);
diff --git a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
index dcd6f8f951..f3375d22d6 100644
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
+++ b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
@@ -44,7 +44,7 @@ class AbstractBase : public Base {
  public:
   explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType,
                         const BaseShapePtr &shape = kNoShape)
-      : value_(value), type_(type), shape_(shape), sparse_grad_(false) {}
+      : value_(value), type_(type), shape_(shape), sparse_grad_("") {}
   ~AbstractBase() override = default;
   MS_DECLARE_PARENT(AbstractBase, Base)
 
@@ -53,13 +53,13 @@ class AbstractBase : public Base {
 
   virtual bool operator==(const AbstractBase &other) const;
   void set_value(const ValuePtr &value) { value_ = value; }
-  void set_sparse_grad(const bool &sparse_grad) { sparse_grad_ = sparse_grad; }
+  void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; }
   void set_type(const TypePtr &type) { type_ = type; }
   void set_shape(const BaseShapePtr &shape) { shape_ = shape; }
   void set_value_desc(const std::string &desc) { value_desc_ = desc; }
   const std::string &value_desc() const { return value_desc_; }
   ValuePtr GetValueTrack() const { return value_; }
-  bool sparse_grad() const { return sparse_grad_; }
+  const std::string &sparse_grad() const { return sparse_grad_; }
   TypePtr GetTypeTrack() const { return type_; }
   BaseShapePtr GetShapeTrack() const { return shape_; }
 
@@ -87,7 +87,7 @@ class AbstractBase : public Base {
   TypePtr type_;
   BaseShapePtr shape_;
   std::string value_desc_;  // store initial value description for error report
-  bool sparse_grad_;
+  std::string sparse_grad_;
 };
 
 class AbstractScalar : public AbstractBase {
diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py
index 69affee2c3..6dca4fca9c 100644
--- a/mindspore/common/parameter.py
+++ b/mindspore/common/parameter.py
@@ -51,9 +51,9 @@ class Parameter:
         requires_grad (bool): True if the parameter requires gradient. Default: True.
         layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode,
             broadcast and gradients communication would not be applied on parameters. Default: False.
-        sparse_grad (bool): True if the parameter's gradient is sparse. Default: False.
+        sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty.
     """
-    def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=False):
+    def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=""):
         self.set_parameter_data(default_input)
         self.name = name
         self.requires_grad = requires_grad
@@ -181,9 +181,9 @@ class Parameter:
         return self._sparse_grad
 
     @sparse_grad.setter
-    def sparse_grad(self, value=True):
-        if not isinstance(value, bool):
-            raise TypeError("`sparse_grad` parameter must be bool type")
+    def sparse_grad(self, value=""):
+        if not isinstance(value, str):
+            raise TypeError("`sparse_grad` parameter must be str type")
         self._sparse_grad = value
 
     @property
diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index 92cab56a05..786be1bd0b 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -156,7 +156,7 @@ class Adam(Optimizer):
         To improve parameter groups performance, the customized order of parameters can be supported.
 
         The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
+        `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
         behavior is currently performed on the CPU, weight decay is not supported.
 
     Args:
diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py
index d1f49a3791..a40d6737cb 100644
--- a/mindspore/nn/optim/ftrl.py
+++ b/mindspore/nn/optim/ftrl.py
@@ -72,7 +72,7 @@ class FTRL(Optimizer):
 
     Note:
         The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
+        `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
         behavior is currently performed on the CPU, weight decay is not supported.
 
     Args:
diff --git a/mindspore/nn/optim/lazyadam.py b/mindspore/nn/optim/lazyadam.py
index d9df717b8a..48d33bf798 100644
--- a/mindspore/nn/optim/lazyadam.py
+++ b/mindspore/nn/optim/lazyadam.py
@@ -92,9 +92,10 @@ class LazyAdam(Optimizer):
         applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters.
 
         The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
-        `sparse_grad` of `Parameter` being set as True. The sparse behavior, to be notice, is not equivalent to the
+        `sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the
         original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under
-        continuous development. The sparse behavior is currently performed on the CPU, weight decay is not supported.
+        continuous development. The sparse behavior is currently performed on the CPU, weight decay is
+        not supported.
 
     Args:
         params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
diff --git a/mindspore/ops/composite/base.py b/mindspore/ops/composite/base.py
index 63e83a126c..e283867684 100644
--- a/mindspore/ops/composite/base.py
+++ b/mindspore/ops/composite/base.py
@@ -241,6 +241,7 @@ class HyperMap(HyperMap_):
             return func(*args_list)
         return tuple(map(hypermap, *args_list))
 
+
 class Map(Map_):
     """
     Map will apply the set operation on input sequences.
@@ -271,37 +272,12 @@ class Map(Map_):
             Map_.__init__(self)
 
     def __call__(self, *args):
-        func = args[0]
-        count = 0
-        count_max = 1
-        args_list = args[1:]
-        if self.ops is not None:
-            func = self.ops
-            args_list = args
-        for item in args_list:
-            if isinstance(item, (tuple, list)):
-                count_max = len(item)
-                break
-
-        def get_item(x):
-            nonlocal count
-            if isinstance(x, (tuple, list)):
-                return x[count]
-            return x
-
-        for i in range(count_max):
-            true_args = tuple(map(get_item, args_list))
-            func(*true_args)
-            count = i + 1
-        return True
-
-    def register(self, *type_names):
-        """Register a function for the given type string."""
-
-        def deco(fn):
-            self.register_fn(type_names, fn)
-            return fn
-        return deco
+        func = self.ops
+        args_list = args
+        if self.ops is None:
+            func = args[0]
+            args_list = args[1:]
+        return tuple(map(func, *args_list))
 
 
 class _ListAppend(ListAppend_):
diff --git a/tests/ut/python/nn/optim/test_adam.py b/tests/ut/python/nn/optim/test_adam.py
index be22c8abdc..3fd18b9664 100644
--- a/tests/ut/python/nn/optim/test_adam.py
+++ b/tests/ut/python/nn/optim/test_adam.py
@@ -53,7 +53,8 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
+                                 name="weight1", sparse_grad="sparse_key_w1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py b/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
index 86ea99b1ae..5222f920ba 100644
--- a/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
+++ b/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
@@ -154,8 +154,8 @@ def test_AdamWeightDecaySparse():
     class NetWithSparseGatherV2(nn.Cell):
         def __init__(self):
             super(NetWithSparseGatherV2, self).__init__()
-            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad=True)
-            self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2")
+            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad="sparse_key_w1")
+            self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2", sparse_grad="sparse_key_w2")
             self.gatherv2 = P.SparseGatherV2()
             self.axis = 0
         def construct(self, indices):
diff --git a/tests/ut/python/nn/optim/test_ftrl.py b/tests/ut/python/nn/optim/test_ftrl.py
index 213ce6c460..f0f094c177 100644
--- a/tests/ut/python/nn/optim/test_ftrl.py
+++ b/tests/ut/python/nn/optim/test_ftrl.py
@@ -41,7 +41,8 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
+                                 name="weight1", sparse_grad="sparse_key_w1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/optim/test_lazyadam.py b/tests/ut/python/nn/optim/test_lazyadam.py
index 77b02f9ff9..713fffc50d 100644
--- a/tests/ut/python/nn/optim/test_lazyadam.py
+++ b/tests/ut/python/nn/optim/test_lazyadam.py
@@ -43,7 +43,8 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
+                                 name="weight1", sparse_grad="sparse_key_w1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()
diff --git a/tests/ut/python/nn/optim/test_proximal_ada_grad.py b/tests/ut/python/nn/optim/test_proximal_ada_grad.py
index 52e418d39b..a43a4ad23d 100644
--- a/tests/ut/python/nn/optim/test_proximal_ada_grad.py
+++ b/tests/ut/python/nn/optim/test_proximal_ada_grad.py
@@ -40,7 +40,8 @@ class NetWithSparseGatherV2(nn.Cell):
     """ NetWithSparseGatherV2 definition """
     def __init__(self):
         super(NetWithSparseGatherV2, self).__init__()
-        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
+        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1",
+                                 sparse_grad="sparse_key_w1")
         self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2")
         self.axis = 0
         self.gather = P.SparseGatherV2()