Browse Source

support multi param for tuple grad

tags/v0.5.0-beta
panyifeng 5 years ago
parent
commit
3c2057297e
13 changed files with 65 additions and 62 deletions
  1. +32
    -11
      mindspore/ccsrc/operator/prim_others.cc
  2. +2
    -1
      mindspore/ccsrc/pipeline/action.cc
  3. +4
    -4
      mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
  4. +5
    -5
      mindspore/common/parameter.py
  5. +1
    -1
      mindspore/nn/optim/adam.py
  6. +1
    -1
      mindspore/nn/optim/ftrl.py
  7. +3
    -2
      mindspore/nn/optim/lazyadam.py
  8. +7
    -31
      mindspore/ops/composite/base.py
  9. +2
    -1
      tests/ut/python/nn/optim/test_adam.py
  10. +2
    -2
      tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
  11. +2
    -1
      tests/ut/python/nn/optim/test_ftrl.py
  12. +2
    -1
      tests/ut/python/nn/optim/test_lazyadam.py
  13. +2
    -1
      tests/ut/python/nn/optim/test_proximal_ada_grad.py

+ 32
- 11
mindspore/ccsrc/operator/prim_others.cc View File

@@ -59,7 +59,8 @@ class UndeterminedShapeType {
public: public:
explicit UndeterminedShapeType(const std::string &env_str) { explicit UndeterminedShapeType(const std::string &env_str) {
// param_name indices_shape indices_type values_shape values_type dense_shape // param_name indices_shape indices_type values_shape values_type dense_shape
// export UNDETERMINED_SPARSE_SHAPE_TYPES="w1:2:Int32:2 1 2:Float32:3 1 2"
// export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1
// 2:Float32:3 1 2"
std::vector<string> fields; std::vector<string> fields;
string tmp; string tmp;
std::stringstream input(env_str); std::stringstream input(env_str);
@@ -115,6 +116,20 @@ std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) {
} }
const size_t UndeterminedShapeType::fields_num = 6; const size_t UndeterminedShapeType::fields_num = 6;


std::unordered_map<std::string, UndeterminedShapeType> g_undetermined_configs;
void InitUndeterminedFromEnv(const std::string &sparse_shape_types) {
if (!g_undetermined_configs.empty()) {
return;
}
std::string tmp;
std::stringstream input(sparse_shape_types);
while (std::getline(input, tmp, ';')) {
auto config = UndeterminedShapeType(tmp);
g_undetermined_configs.insert(std::make_pair(config.param_name(), config));
MS_LOG(DEBUG) << "Undetermined config from env: " << tmp;
}
}

AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive, AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
const AbstractBasePtrList &args_spec_list) { const AbstractBasePtrList &args_spec_list) {
MS_EXCEPTION_IF_NULL(primitive); MS_EXCEPTION_IF_NULL(primitive);
@@ -128,27 +143,33 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt
MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString(); MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString();
} }


if (key->sparse_grad()) {
if (!key->sparse_grad().empty()) {
// Will be fixed once undetermined type ready // Will be fixed once undetermined type ready
auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES"); auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES");
if (sparse_shape_types.empty()) { if (sparse_shape_types.empty()) {
sparse_shape_types = "w1:2:Int32:2 1 2:Float32:3 1 2";
sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2";
} }
MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString() << ", Undetermined shape is "
<< sparse_shape_types;
InitUndeterminedFromEnv(sparse_shape_types);


auto shape_types = UndeterminedShapeType(sparse_shape_types);
auto shape_types = g_undetermined_configs.find(key->sparse_grad());
if (shape_types == g_undetermined_configs.end()) {
MS_LOG(EXCEPTION) << "Param " << key->ToString()
<< " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES: "
<< sparse_shape_types;
}
MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString();
AbstractBasePtrList sparse_list; AbstractBasePtrList sparse_list;
// indices // indices
auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.indices_type());
auto indices = std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types.indices_shape()));
auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.indices_type());
auto indices =
std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types->second.indices_shape()));
sparse_list.emplace_back(indices); sparse_list.emplace_back(indices);
// values // values
auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.values_type());
auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types.values_shape()));
auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.values_type());
auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types->second.values_shape()));
sparse_list.emplace_back(dout); sparse_list.emplace_back(dout);
// dense_shape // dense_shape
sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types.dense_shape()));
sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types->second.dense_shape()));
return std::make_shared<AbstractTuple>(sparse_list); return std::make_shared<AbstractTuple>(sparse_list);
} }




+ 2
- 1
mindspore/ccsrc/pipeline/action.cc View File

@@ -229,7 +229,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
if (param_node->has_default()) { if (param_node->has_default()) {
auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param()); auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param());
AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true); AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true);
auto sparse_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
auto sparse_grad =
py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad"));
ptr->set_sparse_grad(sparse_grad); ptr->set_sparse_grad(sparse_grad);


parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr); parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr);


+ 4
- 4
mindspore/ccsrc/pipeline/static_analysis/abstract_value.h View File

@@ -44,7 +44,7 @@ class AbstractBase : public Base {
public: public:
explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType, explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType,
const BaseShapePtr &shape = kNoShape) const BaseShapePtr &shape = kNoShape)
: value_(value), type_(type), shape_(shape), sparse_grad_(false) {}
: value_(value), type_(type), shape_(shape), sparse_grad_("") {}
~AbstractBase() override = default; ~AbstractBase() override = default;
MS_DECLARE_PARENT(AbstractBase, Base) MS_DECLARE_PARENT(AbstractBase, Base)


@@ -53,13 +53,13 @@ class AbstractBase : public Base {


virtual bool operator==(const AbstractBase &other) const; virtual bool operator==(const AbstractBase &other) const;
void set_value(const ValuePtr &value) { value_ = value; } void set_value(const ValuePtr &value) { value_ = value; }
void set_sparse_grad(const bool &sparse_grad) { sparse_grad_ = sparse_grad; }
void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; }
void set_type(const TypePtr &type) { type_ = type; } void set_type(const TypePtr &type) { type_ = type; }
void set_shape(const BaseShapePtr &shape) { shape_ = shape; } void set_shape(const BaseShapePtr &shape) { shape_ = shape; }
void set_value_desc(const std::string &desc) { value_desc_ = desc; } void set_value_desc(const std::string &desc) { value_desc_ = desc; }
const std::string &value_desc() const { return value_desc_; } const std::string &value_desc() const { return value_desc_; }
ValuePtr GetValueTrack() const { return value_; } ValuePtr GetValueTrack() const { return value_; }
bool sparse_grad() const { return sparse_grad_; }
const std::string &sparse_grad() const { return sparse_grad_; }
TypePtr GetTypeTrack() const { return type_; } TypePtr GetTypeTrack() const { return type_; }
BaseShapePtr GetShapeTrack() const { return shape_; } BaseShapePtr GetShapeTrack() const { return shape_; }


@@ -87,7 +87,7 @@ class AbstractBase : public Base {
TypePtr type_; TypePtr type_;
BaseShapePtr shape_; BaseShapePtr shape_;
std::string value_desc_; // store initial value description for error report std::string value_desc_; // store initial value description for error report
bool sparse_grad_;
std::string sparse_grad_;
}; };


class AbstractScalar : public AbstractBase { class AbstractScalar : public AbstractBase {


+ 5
- 5
mindspore/common/parameter.py View File

@@ -51,9 +51,9 @@ class Parameter:
requires_grad (bool): True if the parameter requires gradient. Default: True. requires_grad (bool): True if the parameter requires gradient. Default: True.
layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode, layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode,
broadcast and gradients communication would not be applied on parameters. Default: False. broadcast and gradients communication would not be applied on parameters. Default: False.
sparse_grad (bool): True if the parameter's gradient is sparse. Default: False.
sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty.
""" """
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=False):
def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=""):
self.set_parameter_data(default_input) self.set_parameter_data(default_input)
self.name = name self.name = name
self.requires_grad = requires_grad self.requires_grad = requires_grad
@@ -181,9 +181,9 @@ class Parameter:
return self._sparse_grad return self._sparse_grad


@sparse_grad.setter @sparse_grad.setter
def sparse_grad(self, value=True):
if not isinstance(value, bool):
raise TypeError("`sparse_grad` parameter must be bool type")
def sparse_grad(self, value=""):
if not isinstance(value, str):
raise TypeError("`sparse_grad` parameter must be str type")
self._sparse_grad = value self._sparse_grad = value


@property @property


+ 1
- 1
mindspore/nn/optim/adam.py View File

@@ -156,7 +156,7 @@ class Adam(Optimizer):
To improve parameter groups performance, the customized order of parameters can be supported. To improve parameter groups performance, the customized order of parameters can be supported.


The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
`sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
behavior is currently performed on the CPU, weight decay is not supported. behavior is currently performed on the CPU, weight decay is not supported.


Args: Args:


+ 1
- 1
mindspore/nn/optim/ftrl.py View File

@@ -72,7 +72,7 @@ class FTRL(Optimizer):


Note: Note:
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse
`sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
behavior is currently performed on the CPU, weight decay is not supported. behavior is currently performed on the CPU, weight decay is not supported.


Args: Args:


+ 3
- 2
mindspore/nn/optim/lazyadam.py View File

@@ -92,9 +92,10 @@ class LazyAdam(Optimizer):
applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters. applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters.


The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set as True. The sparse behavior, to be notice, is not equivalent to the
`sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the
original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under
continuous development. The sparse behavior is currently performed on the CPU, weight decay is not supported.
continuous development. The sparse behavior is currently performed on the CPU, weight decay is
not supported.


Args: Args:
params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated, params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,


+ 7
- 31
mindspore/ops/composite/base.py View File

@@ -241,6 +241,7 @@ class HyperMap(HyperMap_):
return func(*args_list) return func(*args_list)
return tuple(map(hypermap, *args_list)) return tuple(map(hypermap, *args_list))



class Map(Map_): class Map(Map_):
""" """
Map will apply the set operation on input sequences. Map will apply the set operation on input sequences.
@@ -271,37 +272,12 @@ class Map(Map_):
Map_.__init__(self) Map_.__init__(self)


def __call__(self, *args): def __call__(self, *args):
func = args[0]
count = 0
count_max = 1
args_list = args[1:]
if self.ops is not None:
func = self.ops
args_list = args
for item in args_list:
if isinstance(item, (tuple, list)):
count_max = len(item)
break

def get_item(x):
nonlocal count
if isinstance(x, (tuple, list)):
return x[count]
return x

for i in range(count_max):
true_args = tuple(map(get_item, args_list))
func(*true_args)
count = i + 1
return True

def register(self, *type_names):
"""Register a function for the given type string."""

def deco(fn):
self.register_fn(type_names, fn)
return fn
return deco
func = self.ops
args_list = args
if self.ops is None:
func = args[0]
args_list = args[1:]
return tuple(map(func, *args_list))




class _ListAppend(ListAppend_): class _ListAppend(ListAppend_):


+ 2
- 1
tests/ut/python/nn/optim/test_adam.py View File

@@ -53,7 +53,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """ """ NetWithSparseGatherV2 definition """
def __init__(self): def __init__(self):
super(NetWithSparseGatherV2, self).__init__() super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
name="weight1", sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
self.axis = 0 self.axis = 0
self.gather = P.SparseGatherV2() self.gather = P.SparseGatherV2()


+ 2
- 2
tests/ut/python/nn/optim/test_adam_with_tuple_grad.py View File

@@ -154,8 +154,8 @@ def test_AdamWeightDecaySparse():
class NetWithSparseGatherV2(nn.Cell): class NetWithSparseGatherV2(nn.Cell):
def __init__(self): def __init__(self):
super(NetWithSparseGatherV2, self).__init__() super(NetWithSparseGatherV2, self).__init__()
self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad=True)
self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2")
self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad="sparse_key_w1")
self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2", sparse_grad="sparse_key_w2")
self.gatherv2 = P.SparseGatherV2() self.gatherv2 = P.SparseGatherV2()
self.axis = 0 self.axis = 0
def construct(self, indices): def construct(self, indices):


+ 2
- 1
tests/ut/python/nn/optim/test_ftrl.py View File

@@ -41,7 +41,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """ """ NetWithSparseGatherV2 definition """
def __init__(self): def __init__(self):
super(NetWithSparseGatherV2, self).__init__() super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
name="weight1", sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
self.axis = 0 self.axis = 0
self.gather = P.SparseGatherV2() self.gather = P.SparseGatherV2()


+ 2
- 1
tests/ut/python/nn/optim/test_lazyadam.py View File

@@ -43,7 +43,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """ """ NetWithSparseGatherV2 definition """
def __init__(self): def __init__(self):
super(NetWithSparseGatherV2, self).__init__() super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)),
name="weight1", sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2")
self.axis = 0 self.axis = 0
self.gather = P.SparseGatherV2() self.gather = P.SparseGatherV2()


+ 2
- 1
tests/ut/python/nn/optim/test_proximal_ada_grad.py View File

@@ -40,7 +40,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """ """ NetWithSparseGatherV2 definition """
def __init__(self): def __init__(self):
super(NetWithSparseGatherV2, self).__init__() super(NetWithSparseGatherV2, self).__init__()
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True)
self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1",
sparse_grad="sparse_key_w1")
self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2") self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2")
self.axis = 0 self.axis = 0
self.gather = P.SparseGatherV2() self.gather = P.SparseGatherV2()


Loading…
Cancel
Save