| @@ -59,7 +59,8 @@ class UndeterminedShapeType { | |||||
| public: | public: | ||||
| explicit UndeterminedShapeType(const std::string &env_str) { | explicit UndeterminedShapeType(const std::string &env_str) { | ||||
| // param_name indices_shape indices_type values_shape values_type dense_shape | // param_name indices_shape indices_type values_shape values_type dense_shape | ||||
| // export UNDETERMINED_SPARSE_SHAPE_TYPES="w1:2:Int32:2 1 2:Float32:3 1 2" | |||||
| // export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 | |||||
| // 2:Float32:3 1 2" | |||||
| std::vector<string> fields; | std::vector<string> fields; | ||||
| string tmp; | string tmp; | ||||
| std::stringstream input(env_str); | std::stringstream input(env_str); | ||||
| @@ -115,6 +116,20 @@ std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) { | |||||
| } | } | ||||
| const size_t UndeterminedShapeType::fields_num = 6; | const size_t UndeterminedShapeType::fields_num = 6; | ||||
| std::unordered_map<std::string, UndeterminedShapeType> g_undetermined_configs; | |||||
| void InitUndeterminedFromEnv(const std::string &sparse_shape_types) { | |||||
| if (!g_undetermined_configs.empty()) { | |||||
| return; | |||||
| } | |||||
| std::string tmp; | |||||
| std::stringstream input(sparse_shape_types); | |||||
| while (std::getline(input, tmp, ';')) { | |||||
| auto config = UndeterminedShapeType(tmp); | |||||
| g_undetermined_configs.insert(std::make_pair(config.param_name(), config)); | |||||
| MS_LOG(DEBUG) << "Undetermined config from env: " << tmp; | |||||
| } | |||||
| } | |||||
| AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | ||||
| const AbstractBasePtrList &args_spec_list) { | const AbstractBasePtrList &args_spec_list) { | ||||
| MS_EXCEPTION_IF_NULL(primitive); | MS_EXCEPTION_IF_NULL(primitive); | ||||
| @@ -128,27 +143,33 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt | |||||
| MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString(); | MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString(); | ||||
| } | } | ||||
| if (key->sparse_grad()) { | |||||
| if (!key->sparse_grad().empty()) { | |||||
| // Will be fixed once undetermined type ready | // Will be fixed once undetermined type ready | ||||
| auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES"); | auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES"); | ||||
| if (sparse_shape_types.empty()) { | if (sparse_shape_types.empty()) { | ||||
| sparse_shape_types = "w1:2:Int32:2 1 2:Float32:3 1 2"; | |||||
| sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2"; | |||||
| } | } | ||||
| MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString() << ", Undetermined shape is " | |||||
| << sparse_shape_types; | |||||
| InitUndeterminedFromEnv(sparse_shape_types); | |||||
| auto shape_types = UndeterminedShapeType(sparse_shape_types); | |||||
| auto shape_types = g_undetermined_configs.find(key->sparse_grad()); | |||||
| if (shape_types == g_undetermined_configs.end()) { | |||||
| MS_LOG(EXCEPTION) << "Param " << key->ToString() | |||||
| << " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES: " | |||||
| << sparse_shape_types; | |||||
| } | |||||
| MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString(); | |||||
| AbstractBasePtrList sparse_list; | AbstractBasePtrList sparse_list; | ||||
| // indices | // indices | ||||
| auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.indices_type()); | |||||
| auto indices = std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types.indices_shape())); | |||||
| auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.indices_type()); | |||||
| auto indices = | |||||
| std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types->second.indices_shape())); | |||||
| sparse_list.emplace_back(indices); | sparse_list.emplace_back(indices); | ||||
| // values | // values | ||||
| auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.values_type()); | |||||
| auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types.values_shape())); | |||||
| auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.values_type()); | |||||
| auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types->second.values_shape())); | |||||
| sparse_list.emplace_back(dout); | sparse_list.emplace_back(dout); | ||||
| // dense_shape | // dense_shape | ||||
| sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types.dense_shape())); | |||||
| sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types->second.dense_shape())); | |||||
| return std::make_shared<AbstractTuple>(sparse_list); | return std::make_shared<AbstractTuple>(sparse_list); | ||||
| } | } | ||||
| @@ -229,7 +229,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) { | |||||
| if (param_node->has_default()) { | if (param_node->has_default()) { | ||||
| auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param()); | auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param()); | ||||
| AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true); | AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true); | ||||
| auto sparse_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad")); | |||||
| auto sparse_grad = | |||||
| py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad")); | |||||
| ptr->set_sparse_grad(sparse_grad); | ptr->set_sparse_grad(sparse_grad); | ||||
| parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr); | parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr); | ||||
| @@ -44,7 +44,7 @@ class AbstractBase : public Base { | |||||
| public: | public: | ||||
| explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType, | explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType, | ||||
| const BaseShapePtr &shape = kNoShape) | const BaseShapePtr &shape = kNoShape) | ||||
| : value_(value), type_(type), shape_(shape), sparse_grad_(false) {} | |||||
| : value_(value), type_(type), shape_(shape), sparse_grad_("") {} | |||||
| ~AbstractBase() override = default; | ~AbstractBase() override = default; | ||||
| MS_DECLARE_PARENT(AbstractBase, Base) | MS_DECLARE_PARENT(AbstractBase, Base) | ||||
| @@ -53,13 +53,13 @@ class AbstractBase : public Base { | |||||
| virtual bool operator==(const AbstractBase &other) const; | virtual bool operator==(const AbstractBase &other) const; | ||||
| void set_value(const ValuePtr &value) { value_ = value; } | void set_value(const ValuePtr &value) { value_ = value; } | ||||
| void set_sparse_grad(const bool &sparse_grad) { sparse_grad_ = sparse_grad; } | |||||
| void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; } | |||||
| void set_type(const TypePtr &type) { type_ = type; } | void set_type(const TypePtr &type) { type_ = type; } | ||||
| void set_shape(const BaseShapePtr &shape) { shape_ = shape; } | void set_shape(const BaseShapePtr &shape) { shape_ = shape; } | ||||
| void set_value_desc(const std::string &desc) { value_desc_ = desc; } | void set_value_desc(const std::string &desc) { value_desc_ = desc; } | ||||
| const std::string &value_desc() const { return value_desc_; } | const std::string &value_desc() const { return value_desc_; } | ||||
| ValuePtr GetValueTrack() const { return value_; } | ValuePtr GetValueTrack() const { return value_; } | ||||
| bool sparse_grad() const { return sparse_grad_; } | |||||
| const std::string &sparse_grad() const { return sparse_grad_; } | |||||
| TypePtr GetTypeTrack() const { return type_; } | TypePtr GetTypeTrack() const { return type_; } | ||||
| BaseShapePtr GetShapeTrack() const { return shape_; } | BaseShapePtr GetShapeTrack() const { return shape_; } | ||||
| @@ -87,7 +87,7 @@ class AbstractBase : public Base { | |||||
| TypePtr type_; | TypePtr type_; | ||||
| BaseShapePtr shape_; | BaseShapePtr shape_; | ||||
| std::string value_desc_; // store initial value description for error report | std::string value_desc_; // store initial value description for error report | ||||
| bool sparse_grad_; | |||||
| std::string sparse_grad_; | |||||
| }; | }; | ||||
| class AbstractScalar : public AbstractBase { | class AbstractScalar : public AbstractBase { | ||||
| @@ -51,9 +51,9 @@ class Parameter: | |||||
| requires_grad (bool): True if the parameter requires gradient. Default: True. | requires_grad (bool): True if the parameter requires gradient. Default: True. | ||||
| layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode, | layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode, | ||||
| broadcast and gradients communication would not be applied on parameters. Default: False. | broadcast and gradients communication would not be applied on parameters. Default: False. | ||||
| sparse_grad (bool): True if the parameter's gradient is sparse. Default: False. | |||||
| sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty. | |||||
| """ | """ | ||||
| def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=False): | |||||
| def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=""): | |||||
| self.set_parameter_data(default_input) | self.set_parameter_data(default_input) | ||||
| self.name = name | self.name = name | ||||
| self.requires_grad = requires_grad | self.requires_grad = requires_grad | ||||
| @@ -181,9 +181,9 @@ class Parameter: | |||||
| return self._sparse_grad | return self._sparse_grad | ||||
| @sparse_grad.setter | @sparse_grad.setter | ||||
| def sparse_grad(self, value=True): | |||||
| if not isinstance(value, bool): | |||||
| raise TypeError("`sparse_grad` parameter must be bool type") | |||||
| def sparse_grad(self, value=""): | |||||
| if not isinstance(value, str): | |||||
| raise TypeError("`sparse_grad` parameter must be str type") | |||||
| self._sparse_grad = value | self._sparse_grad = value | ||||
| @property | @property | ||||
| @@ -156,7 +156,7 @@ class Adam(Optimizer): | |||||
| To improve parameter groups performance, the customized order of parameters can be supported. | To improve parameter groups performance, the customized order of parameters can be supported. | ||||
| The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the | The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the | ||||
| `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse | |||||
| `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse | |||||
| behavior is currently performed on the CPU, weight decay is not supported. | behavior is currently performed on the CPU, weight decay is not supported. | ||||
| Args: | Args: | ||||
| @@ -72,7 +72,7 @@ class FTRL(Optimizer): | |||||
| Note: | Note: | ||||
| The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the | The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the | ||||
| `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse | |||||
| `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse | |||||
| behavior is currently performed on the CPU, weight decay is not supported. | behavior is currently performed on the CPU, weight decay is not supported. | ||||
| Args: | Args: | ||||
| @@ -92,9 +92,10 @@ class LazyAdam(Optimizer): | |||||
| applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters. | applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters. | ||||
| The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the | The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the | ||||
| `sparse_grad` of `Parameter` being set as True. The sparse behavior, to be notice, is not equivalent to the | |||||
| `sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the | |||||
| original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under | original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under | ||||
| continuous development. The sparse behavior is currently performed on the CPU, weight decay is not supported. | |||||
| continuous development. The sparse behavior is currently performed on the CPU, weight decay is | |||||
| not supported. | |||||
| Args: | Args: | ||||
| params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated, | params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated, | ||||
| @@ -241,6 +241,7 @@ class HyperMap(HyperMap_): | |||||
| return func(*args_list) | return func(*args_list) | ||||
| return tuple(map(hypermap, *args_list)) | return tuple(map(hypermap, *args_list)) | ||||
| class Map(Map_): | class Map(Map_): | ||||
| """ | """ | ||||
| Map will apply the set operation on input sequences. | Map will apply the set operation on input sequences. | ||||
| @@ -271,37 +272,12 @@ class Map(Map_): | |||||
| Map_.__init__(self) | Map_.__init__(self) | ||||
| def __call__(self, *args): | def __call__(self, *args): | ||||
| func = args[0] | |||||
| count = 0 | |||||
| count_max = 1 | |||||
| args_list = args[1:] | |||||
| if self.ops is not None: | |||||
| func = self.ops | |||||
| args_list = args | |||||
| for item in args_list: | |||||
| if isinstance(item, (tuple, list)): | |||||
| count_max = len(item) | |||||
| break | |||||
| def get_item(x): | |||||
| nonlocal count | |||||
| if isinstance(x, (tuple, list)): | |||||
| return x[count] | |||||
| return x | |||||
| for i in range(count_max): | |||||
| true_args = tuple(map(get_item, args_list)) | |||||
| func(*true_args) | |||||
| count = i + 1 | |||||
| return True | |||||
| def register(self, *type_names): | |||||
| """Register a function for the given type string.""" | |||||
| def deco(fn): | |||||
| self.register_fn(type_names, fn) | |||||
| return fn | |||||
| return deco | |||||
| func = self.ops | |||||
| args_list = args | |||||
| if self.ops is None: | |||||
| func = args[0] | |||||
| args_list = args[1:] | |||||
| return tuple(map(func, *args_list)) | |||||
| class _ListAppend(ListAppend_): | class _ListAppend(ListAppend_): | ||||
| @@ -53,7 +53,8 @@ class NetWithSparseGatherV2(nn.Cell): | |||||
| """ NetWithSparseGatherV2 definition """ | """ NetWithSparseGatherV2 definition """ | ||||
| def __init__(self): | def __init__(self): | ||||
| super(NetWithSparseGatherV2, self).__init__() | super(NetWithSparseGatherV2, self).__init__() | ||||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True) | |||||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), | |||||
| name="weight1", sparse_grad="sparse_key_w1") | |||||
| self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") | self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") | ||||
| self.axis = 0 | self.axis = 0 | ||||
| self.gather = P.SparseGatherV2() | self.gather = P.SparseGatherV2() | ||||
| @@ -154,8 +154,8 @@ def test_AdamWeightDecaySparse(): | |||||
| class NetWithSparseGatherV2(nn.Cell): | class NetWithSparseGatherV2(nn.Cell): | ||||
| def __init__(self): | def __init__(self): | ||||
| super(NetWithSparseGatherV2, self).__init__() | super(NetWithSparseGatherV2, self).__init__() | ||||
| self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad=True) | |||||
| self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2") | |||||
| self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad="sparse_key_w1") | |||||
| self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2", sparse_grad="sparse_key_w2") | |||||
| self.gatherv2 = P.SparseGatherV2() | self.gatherv2 = P.SparseGatherV2() | ||||
| self.axis = 0 | self.axis = 0 | ||||
| def construct(self, indices): | def construct(self, indices): | ||||
| @@ -41,7 +41,8 @@ class NetWithSparseGatherV2(nn.Cell): | |||||
| """ NetWithSparseGatherV2 definition """ | """ NetWithSparseGatherV2 definition """ | ||||
| def __init__(self): | def __init__(self): | ||||
| super(NetWithSparseGatherV2, self).__init__() | super(NetWithSparseGatherV2, self).__init__() | ||||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True) | |||||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), | |||||
| name="weight1", sparse_grad="sparse_key_w1") | |||||
| self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") | self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") | ||||
| self.axis = 0 | self.axis = 0 | ||||
| self.gather = P.SparseGatherV2() | self.gather = P.SparseGatherV2() | ||||
| @@ -43,7 +43,8 @@ class NetWithSparseGatherV2(nn.Cell): | |||||
| """ NetWithSparseGatherV2 definition """ | """ NetWithSparseGatherV2 definition """ | ||||
| def __init__(self): | def __init__(self): | ||||
| super(NetWithSparseGatherV2, self).__init__() | super(NetWithSparseGatherV2, self).__init__() | ||||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True) | |||||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), | |||||
| name="weight1", sparse_grad="sparse_key_w1") | |||||
| self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") | self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") | ||||
| self.axis = 0 | self.axis = 0 | ||||
| self.gather = P.SparseGatherV2() | self.gather = P.SparseGatherV2() | ||||
| @@ -40,7 +40,8 @@ class NetWithSparseGatherV2(nn.Cell): | |||||
| """ NetWithSparseGatherV2 definition """ | """ NetWithSparseGatherV2 definition """ | ||||
| def __init__(self): | def __init__(self): | ||||
| super(NetWithSparseGatherV2, self).__init__() | super(NetWithSparseGatherV2, self).__init__() | ||||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True) | |||||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", | |||||
| sparse_grad="sparse_key_w1") | |||||
| self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2") | self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2") | ||||
| self.axis = 0 | self.axis = 0 | ||||
| self.gather = P.SparseGatherV2() | self.gather = P.SparseGatherV2() | ||||