support multi param for tuple grad

5 years ago · 3c2057297e
--- a/mindspore/ccsrc/operator/prim_others.cc
+++ b/mindspore/ccsrc/operator/prim_others.cc
@@ -59,7 +59,8 @@ class UndeterminedShapeType {
 public:
  explicit UndeterminedShapeType(const std::string &env_str) {
    // param_name indices_shape indices_type values_shape values_type dense_shape
    // export UNDETERMINED_SPARSE_SHAPE_TYPES="w1:2:Int32:2 1 2:Float32:3 1 2"
    // export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1
    // 2:Float32:3 1 2"
    std::vector<string> fields;
    string tmp;
    std::stringstream input(env_str);
@@ -115,6 +116,20 @@ std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) {
 }
 const size_t UndeterminedShapeType::fields_num = 6;
 std::unordered_map<std::string, UndeterminedShapeType> g_undetermined_configs;
 void InitUndeterminedFromEnv(const std::string &sparse_shape_types) {
  if (!g_undetermined_configs.empty()) {
    return;
  }
  std::string tmp;
  std::stringstream input(sparse_shape_types);
  while (std::getline(input, tmp, ';')) {
    auto config = UndeterminedShapeType(tmp);
    g_undetermined_configs.insert(std::make_pair(config.param_name(), config));
    MS_LOG(DEBUG) << "Undetermined config from env: " << tmp;
  }
 }
 AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                    const AbstractBasePtrList &args_spec_list) {
  MS_EXCEPTION_IF_NULL(primitive);
@@ -128,27 +143,33 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt
    MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString();
  }
  if (key->sparse_grad()) {
  if (!key->sparse_grad().empty()) {
    // Will be fixed once undetermined type ready
    auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES");
    if (sparse_shape_types.empty()) {
      sparse_shape_types = "w1:2:Int32:2 1 2:Float32:3 1 2";
      sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2";
    }
    MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString() << ", Undetermined shape is "
                  << sparse_shape_types;
    InitUndeterminedFromEnv(sparse_shape_types);
    auto shape_types = UndeterminedShapeType(sparse_shape_types);
    auto shape_types = g_undetermined_configs.find(key->sparse_grad());
    if (shape_types == g_undetermined_configs.end()) {
      MS_LOG(EXCEPTION) << "Param " << key->ToString()
                        << " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES: "
                        << sparse_shape_types;
    }
    MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString();
    AbstractBasePtrList sparse_list;
    // indices
    auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.indices_type());
    auto indices = std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types.indices_shape()));
    auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.indices_type());
    auto indices =
      std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types->second.indices_shape()));
    sparse_list.emplace_back(indices);
    // values
    auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.values_type());
    auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types.values_shape()));
    auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.values_type());
    auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types->second.values_shape()));
    sparse_list.emplace_back(dout);
    // dense_shape
    sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types.dense_shape()));
    sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types->second.dense_shape()));
    return std::make_shared<AbstractTuple>(sparse_list);
  }
--- a/mindspore/ccsrc/pipeline/action.cc
+++ b/mindspore/ccsrc/pipeline/action.cc
@@ -229,7 +229,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
    if (param_node->has_default()) {
      auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
      AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true);
      auto sparse_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
      auto sparse_grad =
        py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
      ptr->set_sparse_grad(sparse_grad);
      parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr);
--- a/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
+++ b/mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
@@ -44,7 +44,7 @@ class AbstractBase : public Base {
 public:
  explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType,
                        const BaseShapePtr &shape = kNoShape)
      : value_(value), type_(type), shape_(shape), sparse_grad_(false) {}
      : value_(value), type_(type), shape_(shape), sparse_grad_("") {}
  ~AbstractBase() override = default;
  MS_DECLARE_PARENT(AbstractBase, Base)
@@ -53,13 +53,13 @@ class AbstractBase : public Base {
  virtual bool operator==(const AbstractBase &other) const;
  void set_value(const ValuePtr &value) { value_ = value; }
  void set_sparse_grad(const bool &sparse_grad) { sparse_grad_ = sparse_grad; }
  void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; }
  void set_type(const TypePtr &type) { type_ = type; }
  void set_shape(const BaseShapePtr &shape) { shape_ = shape; }
  void set_value_desc(const std::string &desc) { value_desc_ = desc; }
  const std::string &value_desc() const { return value_desc_; }
  ValuePtr GetValueTrack() const { return value_; }
  bool sparse_grad() const { return sparse_grad_; }
  const std::string &sparse_grad() const { return sparse_grad_; }
  TypePtr GetTypeTrack() const { return type_; }
  BaseShapePtr GetShapeTrack() const { return shape_; }
@@ -87,7 +87,7 @@ class AbstractBase : public Base {
  TypePtr type_;
  BaseShapePtr shape_;
  std::string value_desc_;  // store initial value description for error report
  bool sparse_grad_;
  std::string sparse_grad_;
 };
 class AbstractScalar : public AbstractBase {
--- a/mindspore/common/parameter.py
+++ b/mindspore/common/parameter.py
@@ -51,9 +51,9 @@ class Parameter:
        requires_grad (bool): True if the parameter requires gradient. Default: True.
        layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode,
            broadcast and gradients communication would not be applied on parameters. Default: False.
        sparse_grad (bool): True if the parameter's gradient is sparse. Default: False.
        sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty.
    """
    def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=False):
    def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=""):
        self.set_parameter_data(default_input)
        self.name = name
        self.requires_grad = requires_grad
@@ -181,9 +181,9 @@ class Parameter:
        return self._sparse_grad
    @sparse_grad.setter
    def sparse_grad(self, value=True):
        if not isinstance(value, bool):
            raise TypeError("`sparse_grad` parameter must be bool type")
    def sparse_grad(self, value=""):
        if not isinstance(value, str):
            raise TypeError("`sparse_grad` parameter must be str type")
        self._sparse_grad = value
    @property
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -156,7 +156,7 @@ class Adam(Optimizer):
        To improve parameter groups performance, the customized order of parameters can be supported.
        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
        `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
        `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
        behavior is currently performed on the CPU, weight decay is not supported.
    Args:
--- a/mindspore/nn/optim/ftrl.py
+++ b/mindspore/nn/optim/ftrl.py
@@ -72,7 +72,7 @@ class FTRL(Optimizer):
    Note:
        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
        `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
        `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
        behavior is currently performed on the CPU, weight decay is not supported.
    Args:
--- a/mindspore/nn/optim/lazyadam.py
+++ b/mindspore/nn/optim/lazyadam.py
@@ -92,9 +92,10 @@ class LazyAdam(Optimizer):
        applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters.
        The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
        `sparse_grad` of `Parameter` being set as True. The sparse behavior, to be notice, is not equivalent to the
        `sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the
        original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under
        continuous development. The sparse behavior is currently performed on the CPU, weight decay is not supported.
        continuous development. The sparse behavior is currently performed on the CPU, weight decay is
        not supported.
    Args:
        params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
--- a/mindspore/ops/composite/base.py
+++ b/mindspore/ops/composite/base.py
@@ -241,6 +241,7 @@ class HyperMap(HyperMap_):
            return func(*args_list)
        return tuple(map(hypermap, *args_list))
 class Map(Map_):
    """
    Map will apply the set operation on input sequences.
@@ -271,37 +272,12 @@ class Map(Map_):
            Map_.__init__(self)
    def __call__(self, *args):
        func = args[0]
        count = 0
        count_max = 1
        args_list = args[1:]
        if self.ops is not None:
            func = self.ops
            args_list = args
        for item in args_list:
            if isinstance(item, (tuple, list)):
                count_max = len(item)
                break
        def get_item(x):
            nonlocal count
            if isinstance(x, (tuple, list)):
                return x[count]
            return x
        for i in range(count_max):
            true_args = tuple(map(get_item, args_list))
            func(*true_args)
            count = i + 1
        return True
    def register(self, *type_names):
        """Register a function for the given type string."""
        def deco(fn):
            self.register_fn(type_names, fn)
            return fn
        return deco
        func = self.ops
        args_list = args
        if self.ops is None:
            func = args[0]
            args_list = args[1:]
        return tuple(map(func, *args_list))
 class _ListAppend(ListAppend_):
--- a/tests/ut/python/nn/optim/test_adam.py
+++ b/tests/ut/python/nn/optim/test_adam.py
@@ -53,7 +53,8 @@ class NetWithSparseGatherV2(nn.Cell):
    """ NetWithSparseGatherV2 definition """
    def __init__(self):
        super(NetWithSparseGatherV2, self).__init__()
        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
                                 name="weight1", sparse_grad="sparse_key_w1")
        self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
        self.axis = 0
        self.gather = P.SparseGatherV2()
--- a/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
+++ b/tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
@@ -154,8 +154,8 @@ def test_AdamWeightDecaySparse():
    class NetWithSparseGatherV2(nn.Cell):
        def __init__(self):
            super(NetWithSparseGatherV2, self).__init__()
            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad=True)
            self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2")
            self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad="sparse_key_w1")
            self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2", sparse_grad="sparse_key_w2")
            self.gatherv2 = P.SparseGatherV2()
            self.axis = 0
        def construct(self, indices):
--- a/tests/ut/python/nn/optim/test_ftrl.py
+++ b/tests/ut/python/nn/optim/test_ftrl.py
@@ -41,7 +41,8 @@ class NetWithSparseGatherV2(nn.Cell):
    """ NetWithSparseGatherV2 definition """
    def __init__(self):
        super(NetWithSparseGatherV2, self).__init__()
        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
                                 name="weight1", sparse_grad="sparse_key_w1")
        self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
        self.axis = 0
        self.gather = P.SparseGatherV2()
--- a/tests/ut/python/nn/optim/test_lazyadam.py
+++ b/tests/ut/python/nn/optim/test_lazyadam.py
@@ -43,7 +43,8 @@ class NetWithSparseGatherV2(nn.Cell):
    """ NetWithSparseGatherV2 definition """
    def __init__(self):
        super(NetWithSparseGatherV2, self).__init__()
        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
                                 name="weight1", sparse_grad="sparse_key_w1")
        self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
        self.axis = 0
        self.gather = P.SparseGatherV2()
--- a/tests/ut/python/nn/optim/test_proximal_ada_grad.py
+++ b/tests/ut/python/nn/optim/test_proximal_ada_grad.py
@@ -40,7 +40,8 @@ class NetWithSparseGatherV2(nn.Cell):
    """ NetWithSparseGatherV2 definition """
    def __init__(self):
        super(NetWithSparseGatherV2, self).__init__()
        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1",
                                 sparse_grad="sparse_key_w1")
        self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2")
        self.axis = 0
        self.gather = P.SparseGatherV2()