| @@ -59,7 +59,8 @@ class UndeterminedShapeType { | |||
| public: | |||
| explicit UndeterminedShapeType(const std::string &env_str) { | |||
| // param_name indices_shape indices_type values_shape values_type dense_shape | |||
| // export UNDETERMINED_SPARSE_SHAPE_TYPES="w1:2:Int32:2 1 2:Float32:3 1 2" | |||
| // export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 | |||
| // 2:Float32:3 1 2" | |||
| std::vector<string> fields; | |||
| string tmp; | |||
| std::stringstream input(env_str); | |||
| @@ -115,6 +116,20 @@ std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) { | |||
| } | |||
| const size_t UndeterminedShapeType::fields_num = 6; | |||
| std::unordered_map<std::string, UndeterminedShapeType> g_undetermined_configs; | |||
| void InitUndeterminedFromEnv(const std::string &sparse_shape_types) { | |||
| if (!g_undetermined_configs.empty()) { | |||
| return; | |||
| } | |||
| std::string tmp; | |||
| std::stringstream input(sparse_shape_types); | |||
| while (std::getline(input, tmp, ';')) { | |||
| auto config = UndeterminedShapeType(tmp); | |||
| g_undetermined_configs.insert(std::make_pair(config.param_name(), config)); | |||
| MS_LOG(DEBUG) << "Undetermined config from env: " << tmp; | |||
| } | |||
| } | |||
| AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePtr &primitive, | |||
| const AbstractBasePtrList &args_spec_list) { | |||
| MS_EXCEPTION_IF_NULL(primitive); | |||
| @@ -128,27 +143,33 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt | |||
| MS_LOG(EXCEPTION) << "EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: " << key->ToString(); | |||
| } | |||
| if (key->sparse_grad()) { | |||
| if (!key->sparse_grad().empty()) { | |||
| // Will be fixed once undetermined type ready | |||
| auto sparse_shape_types = common::GetEnv("UNDETERMINED_SPARSE_SHAPE_TYPES"); | |||
| if (sparse_shape_types.empty()) { | |||
| sparse_shape_types = "w1:2:Int32:2 1 2:Float32:3 1 2"; | |||
| sparse_shape_types = "sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1 2:Float32:3 1 2"; | |||
| } | |||
| MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString() << ", Undetermined shape is " | |||
| << sparse_shape_types; | |||
| InitUndeterminedFromEnv(sparse_shape_types); | |||
| auto shape_types = UndeterminedShapeType(sparse_shape_types); | |||
| auto shape_types = g_undetermined_configs.find(key->sparse_grad()); | |||
| if (shape_types == g_undetermined_configs.end()) { | |||
| MS_LOG(EXCEPTION) << "Param " << key->ToString() | |||
| << " has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES: " | |||
| << sparse_shape_types; | |||
| } | |||
| MS_LOG(DEBUG) << "EnvGetItem is sparse_grad " << key->ToString(); | |||
| AbstractBasePtrList sparse_list; | |||
| // indices | |||
| auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.indices_type()); | |||
| auto indices = std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types.indices_shape())); | |||
| auto indices_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.indices_type()); | |||
| auto indices = | |||
| std::make_shared<AbstractTensor>(indices_ele, std::make_shared<Shape>(shape_types->second.indices_shape())); | |||
| sparse_list.emplace_back(indices); | |||
| // values | |||
| auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types.values_type()); | |||
| auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types.values_shape())); | |||
| auto dout_ele = std::make_shared<AbstractScalar>(kAnyValue, shape_types->second.values_type()); | |||
| auto dout = std::make_shared<AbstractTensor>(dout_ele, std::make_shared<Shape>(shape_types->second.values_shape())); | |||
| sparse_list.emplace_back(dout); | |||
| // dense_shape | |||
| sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types.dense_shape())); | |||
| sparse_list.emplace_back(std::make_shared<AbstractTuple>(shape_types->second.dense_shape())); | |||
| return std::make_shared<AbstractTuple>(sparse_list); | |||
| } | |||
| @@ -229,7 +229,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) { | |||
| if (param_node->has_default()) { | |||
| auto param_value = std::dynamic_pointer_cast<ParamValuePy>(param_node->default_param()); | |||
| AbstractBasePtr ptr = abstract::FromValue(parse::data_converter::PyDataToValue(param_value->value()), true); | |||
| auto sparse_grad = py::cast<bool>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad")); | |||
| auto sparse_grad = | |||
| py::cast<std::string>(parse::python_adapter::GetPyObjAttr(param_value->value(), "sparse_grad")); | |||
| ptr->set_sparse_grad(sparse_grad); | |||
| parallel::ParallelParameterContextRestoreInNoTraining(func_graph, param_node, ptr); | |||
| @@ -44,7 +44,7 @@ class AbstractBase : public Base { | |||
| public: | |||
| explicit AbstractBase(const ValuePtr &value = nullptr, const TypePtr &type = kAnyType, | |||
| const BaseShapePtr &shape = kNoShape) | |||
| : value_(value), type_(type), shape_(shape), sparse_grad_(false) {} | |||
| : value_(value), type_(type), shape_(shape), sparse_grad_("") {} | |||
| ~AbstractBase() override = default; | |||
| MS_DECLARE_PARENT(AbstractBase, Base) | |||
| @@ -53,13 +53,13 @@ class AbstractBase : public Base { | |||
| virtual bool operator==(const AbstractBase &other) const; | |||
| void set_value(const ValuePtr &value) { value_ = value; } | |||
| void set_sparse_grad(const bool &sparse_grad) { sparse_grad_ = sparse_grad; } | |||
| void set_sparse_grad(const std::string &sparse_grad) { sparse_grad_ = sparse_grad; } | |||
| void set_type(const TypePtr &type) { type_ = type; } | |||
| void set_shape(const BaseShapePtr &shape) { shape_ = shape; } | |||
| void set_value_desc(const std::string &desc) { value_desc_ = desc; } | |||
| const std::string &value_desc() const { return value_desc_; } | |||
| ValuePtr GetValueTrack() const { return value_; } | |||
| bool sparse_grad() const { return sparse_grad_; } | |||
| const std::string &sparse_grad() const { return sparse_grad_; } | |||
| TypePtr GetTypeTrack() const { return type_; } | |||
| BaseShapePtr GetShapeTrack() const { return shape_; } | |||
| @@ -87,7 +87,7 @@ class AbstractBase : public Base { | |||
| TypePtr type_; | |||
| BaseShapePtr shape_; | |||
| std::string value_desc_; // store initial value description for error report | |||
| bool sparse_grad_; | |||
| std::string sparse_grad_; | |||
| }; | |||
| class AbstractScalar : public AbstractBase { | |||
| @@ -51,9 +51,9 @@ class Parameter: | |||
| requires_grad (bool): True if the parameter requires gradient. Default: True. | |||
| layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode, | |||
| broadcast and gradients communication would not be applied on parameters. Default: False. | |||
| sparse_grad (bool): True if the parameter's gradient is sparse. Default: False. | |||
| sparse_grad (str): Set if the parameter's gradient is sparse. Default: empty. | |||
| """ | |||
| def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=False): | |||
| def __init__(self, default_input, name, requires_grad=True, layerwise_parallel=False, sparse_grad=""): | |||
| self.set_parameter_data(default_input) | |||
| self.name = name | |||
| self.requires_grad = requires_grad | |||
| @@ -181,9 +181,9 @@ class Parameter: | |||
| return self._sparse_grad | |||
| @sparse_grad.setter | |||
| def sparse_grad(self, value=True): | |||
| if not isinstance(value, bool): | |||
| raise TypeError("`sparse_grad` parameter must be bool type") | |||
| def sparse_grad(self, value=""): | |||
| if not isinstance(value, str): | |||
| raise TypeError("`sparse_grad` parameter must be str type") | |||
| self._sparse_grad = value | |||
| @property | |||
| @@ -156,7 +156,7 @@ class Adam(Optimizer): | |||
| To improve parameter groups performance, the customized order of parameters can be supported. | |||
| The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the | |||
| `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse | |||
| `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse | |||
| behavior is currently performed on the CPU, weight decay is not supported. | |||
| Args: | |||
| @@ -72,7 +72,7 @@ class FTRL(Optimizer): | |||
| Note: | |||
| The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the | |||
| `sparse_grad` of `Parameter` being set as True. The sparse feature is under continuous development. The sparse | |||
| `sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse | |||
| behavior is currently performed on the CPU, weight decay is not supported. | |||
| Args: | |||
| @@ -92,9 +92,10 @@ class LazyAdam(Optimizer): | |||
| applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters. | |||
| The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the | |||
| `sparse_grad` of `Parameter` being set as True. The sparse behavior, to be notice, is not equivalent to the | |||
| `sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the | |||
| original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under | |||
| continuous development. The sparse behavior is currently performed on the CPU, weight decay is not supported. | |||
| continuous development. The sparse behavior is currently performed on the CPU, weight decay is | |||
| not supported. | |||
| Args: | |||
| params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated, | |||
| @@ -241,6 +241,7 @@ class HyperMap(HyperMap_): | |||
| return func(*args_list) | |||
| return tuple(map(hypermap, *args_list)) | |||
| class Map(Map_): | |||
| """ | |||
| Map will apply the set operation on input sequences. | |||
| @@ -271,37 +272,12 @@ class Map(Map_): | |||
| Map_.__init__(self) | |||
| def __call__(self, *args): | |||
| func = args[0] | |||
| count = 0 | |||
| count_max = 1 | |||
| args_list = args[1:] | |||
| if self.ops is not None: | |||
| func = self.ops | |||
| args_list = args | |||
| for item in args_list: | |||
| if isinstance(item, (tuple, list)): | |||
| count_max = len(item) | |||
| break | |||
| def get_item(x): | |||
| nonlocal count | |||
| if isinstance(x, (tuple, list)): | |||
| return x[count] | |||
| return x | |||
| for i in range(count_max): | |||
| true_args = tuple(map(get_item, args_list)) | |||
| func(*true_args) | |||
| count = i + 1 | |||
| return True | |||
| def register(self, *type_names): | |||
| """Register a function for the given type string.""" | |||
| def deco(fn): | |||
| self.register_fn(type_names, fn) | |||
| return fn | |||
| return deco | |||
| func = self.ops | |||
| args_list = args | |||
| if self.ops is None: | |||
| func = args[0] | |||
| args_list = args[1:] | |||
| return tuple(map(func, *args_list)) | |||
| class _ListAppend(ListAppend_): | |||
| @@ -53,7 +53,8 @@ class NetWithSparseGatherV2(nn.Cell): | |||
| """ NetWithSparseGatherV2 definition """ | |||
| def __init__(self): | |||
| super(NetWithSparseGatherV2, self).__init__() | |||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True) | |||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), | |||
| name="weight1", sparse_grad="sparse_key_w1") | |||
| self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") | |||
| self.axis = 0 | |||
| self.gather = P.SparseGatherV2() | |||
| @@ -154,8 +154,8 @@ def test_AdamWeightDecaySparse(): | |||
| class NetWithSparseGatherV2(nn.Cell): | |||
| def __init__(self): | |||
| super(NetWithSparseGatherV2, self).__init__() | |||
| self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad=True) | |||
| self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2") | |||
| self.w1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="w1", sparse_grad="sparse_key_w1") | |||
| self.w2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="w2", sparse_grad="sparse_key_w2") | |||
| self.gatherv2 = P.SparseGatherV2() | |||
| self.axis = 0 | |||
| def construct(self, indices): | |||
| @@ -41,7 +41,8 @@ class NetWithSparseGatherV2(nn.Cell): | |||
| """ NetWithSparseGatherV2 definition """ | |||
| def __init__(self): | |||
| super(NetWithSparseGatherV2, self).__init__() | |||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True) | |||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), | |||
| name="weight1", sparse_grad="sparse_key_w1") | |||
| self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") | |||
| self.axis = 0 | |||
| self.gather = P.SparseGatherV2() | |||
| @@ -43,7 +43,8 @@ class NetWithSparseGatherV2(nn.Cell): | |||
| """ NetWithSparseGatherV2 definition """ | |||
| def __init__(self): | |||
| super(NetWithSparseGatherV2, self).__init__() | |||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True) | |||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), | |||
| name="weight1", sparse_grad="sparse_key_w1") | |||
| self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype((np.float32))), name="weight2") | |||
| self.axis = 0 | |||
| self.gather = P.SparseGatherV2() | |||
| @@ -40,7 +40,8 @@ class NetWithSparseGatherV2(nn.Cell): | |||
| """ NetWithSparseGatherV2 definition """ | |||
| def __init__(self): | |||
| super(NetWithSparseGatherV2, self).__init__() | |||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", sparse_grad=True) | |||
| self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1", | |||
| sparse_grad="sparse_key_w1") | |||
| self.weight2 = Parameter(Tensor(np.ones([2, 1, 2]).astype(np.float32)), name="weight2") | |||
| self.axis = 0 | |||
| self.gather = P.SparseGatherV2() | |||