Merge pull request !5958 from lilei/modify_bugtags/v1.0.0
| @@ -124,7 +124,6 @@ constexpr char CLONED_INDEX[] = "cloned_index"; | |||
| constexpr char BE_CLONED_INDEX[] = "be_cloned_index"; | |||
| constexpr char GROUP_RANKS[] = "group_ranks"; | |||
| constexpr char IS_IN_FORWARD[] = "is_in_forward"; | |||
| constexpr char DEFAULT_INPUT[] = "default_input"; | |||
| constexpr char DTYPE[] = "DType"; | |||
| constexpr char DEV_NUM[] = "dev_num"; | |||
| constexpr char MEAN_FLAG[] = "mean_flag"; | |||
| @@ -154,17 +154,17 @@ void ConvertObjectToTensors(const py::dict &dict, TensorOrderMap *const tensors) | |||
| } | |||
| std::shared_ptr<Tensor> tensor; | |||
| std::string name = py::cast<std::string>(item.first); | |||
| if (py::isinstance<py::float_>(item.second.attr("default_input"))) { | |||
| if (py::isinstance<py::float_>(item.second.attr("data"))) { | |||
| // convert float to tensor with shape([1]) | |||
| tensor = std::make_shared<Tensor>(kNumberTypeFloat32, std::vector<int>({1})); | |||
| *(static_cast<float *>(tensor->data_c())) = py::cast<float>(item.second.attr("default_input")); | |||
| } else if (py::isinstance<py::int_>(item.second.attr("default_input"))) { | |||
| *(static_cast<float *>(tensor->data_c())) = py::cast<float>(item.second.attr("data")); | |||
| } else if (py::isinstance<py::int_>(item.second.attr("data"))) { | |||
| // convert int to tensor with shape([1]) | |||
| tensor = std::make_shared<Tensor>(kNumberTypeInt32, std::vector<int>({1})); | |||
| *(static_cast<float *>(tensor->data_c())) = py::cast<float>(item.second.attr("default_input")); | |||
| } else if (py::isinstance<Tensor>(item.second.attr("default_input"))) { | |||
| *(static_cast<float *>(tensor->data_c())) = py::cast<float>(item.second.attr("data")); | |||
| } else if (py::isinstance<Tensor>(item.second.attr("data"))) { | |||
| // cast tensor | |||
| tensor = py::cast<std::shared_ptr<Tensor>>(item.second.attr("default_input")); | |||
| tensor = py::cast<std::shared_ptr<Tensor>>(item.second.attr("data")); | |||
| } | |||
| if (tensor == nullptr) { | |||
| @@ -49,7 +49,7 @@ class Parameter(MetaTensor): | |||
| Each parameter of Cell is represented by Parameter class. | |||
| Args: | |||
| default_input (Union[Tensor, Initializer, Number]): Parameter data, to be set initialized. | |||
| set_data (Union[Tensor, Initializer, Number]): Parameter data, to be set initialized. | |||
| name (str): Name of the child parameter. | |||
| requires_grad (bool): True if the parameter requires gradient. Default: True. | |||
| layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in parallel mode, | |||
| @@ -78,7 +78,7 @@ class Parameter(MetaTensor): | |||
| >>> x = Tensor(np.ones((2,1))) | |||
| >>> net(x) | |||
| [[2.]] | |||
| >>> net.weight.set_parameter_data(Tensor(np.zeros((1,2)))) | |||
| >>> net.weight.set_data(Tensor(np.zeros((1,2)))) | |||
| >>> net(x) | |||
| [[0.]] | |||
| """ | |||
| @@ -136,7 +136,7 @@ class Parameter(MetaTensor): | |||
| @staticmethod | |||
| def _get_parameter_new_args(data): | |||
| """Set `default_input` of current `Parameter`.""" | |||
| """Set `set_data` of current `Parameter`.""" | |||
| if isinstance(data, bool): | |||
| raise ValueError('Parameter data can not be `bool`') | |||
| if isinstance(data, Initializer): | |||
| @@ -266,7 +266,7 @@ class Parameter(MetaTensor): | |||
| if init != 'same': | |||
| shape = self.shape | |||
| dtype = self.dtype | |||
| x.default_input = initializer(init, shape=shape, dtype=dtype) | |||
| x.set_data(initializer(init, shape=shape, dtype=dtype)) | |||
| return x | |||
| @property | |||
| @@ -292,16 +292,8 @@ class Parameter(MetaTensor): | |||
| @property | |||
| def data(self): | |||
| return self.default_input | |||
| @property | |||
| def default_input(self): | |||
| return self | |||
| @default_input.setter | |||
| def default_input(self, data): | |||
| self.set_parameter_data(data) | |||
| def _update_tensor_data(self, data): | |||
| "Update the parameter by a Tensor." | |||
| if isinstance(self, Tensor): | |||
| @@ -311,9 +303,9 @@ class Parameter(MetaTensor): | |||
| # create a new tensor | |||
| return Parameter(data, self.name, self.requires_grad) | |||
| def set_parameter_data(self, data, slice_shape=False): | |||
| def set_data(self, data, slice_shape=False): | |||
| """ | |||
| Set `default_input` of current `Parameter`. | |||
| Set `set_data` of current `Parameter`. | |||
| Args: | |||
| data (Union[Tensor, Initializer, int, float]): new data. | |||
| @@ -339,7 +331,7 @@ class Parameter(MetaTensor): | |||
| is_current_tensor = isinstance(self, Tensor) | |||
| if is_incoming_tensor and not is_current_tensor: | |||
| raise TypeError("Parameter is a `MetaTensor` and not initializered, `data` for `set_parameter_data`" | |||
| raise TypeError("Parameter is a `MetaTensor` and not initializered, `data` for `set_data`" | |||
| "should be a Initializer. If you want to update it by Tensor, call method" | |||
| "`init_parameters_data` of `Cell` to init and replace all the Parameter of" | |||
| "network, then call this method.") | |||
| @@ -360,7 +352,7 @@ class Parameter(MetaTensor): | |||
| else: | |||
| # also update the related inited parameter data | |||
| if self.inited_param is not None: | |||
| self.inited_param.set_parameter_data(data) | |||
| self.inited_param.set_data(data) | |||
| self.init_mode = data | |||
| elif is_incoming_tensor or is_current_tensor: | |||
| self._update_tensor_data(data) | |||
| @@ -374,7 +374,7 @@ class Cell(Cell_): | |||
| cells[name] = value | |||
| elif params and name in params: | |||
| if isinstance(value, Tensor) and self._params[name] is not None: | |||
| self._params[name].set_parameter_data(value) | |||
| self._params[name].set_data(value) | |||
| elif value is not None: | |||
| raise TypeError("Expected type in (Parameter, ParameterTuple), but got {}.".format(type(value))) | |||
| else: | |||
| @@ -438,7 +438,7 @@ class Cell(Cell_): | |||
| continue | |||
| layout = self.parameter_layout_dict[key] | |||
| new_tensor = _load_tensor_by_layout(tensor, layout) | |||
| params[key].set_parameter_data(new_tensor, True) | |||
| params[key].set_data(new_tensor, True) | |||
| else: | |||
| raise TypeError('Parameters need OrderedDict type, but got {}'. | |||
| format(type(params))) | |||
| @@ -138,7 +138,7 @@ def calc_broadcast_shape_from_param(params): | |||
| if value is None: | |||
| return None | |||
| if isinstance(value, Parameter): | |||
| value_t = value.default_input | |||
| value_t = value.data | |||
| else: | |||
| value_t = cast_to_tensor(value, mstype.float32) | |||
| broadcast_shape = utils.get_broadcast_shape( | |||
| @@ -159,9 +159,9 @@ def check_greater_equal_zero(value, name): | |||
| """ | |||
| if isinstance(value, Parameter): | |||
| if not isinstance(value.default_input, Tensor): | |||
| if not isinstance(value.data, Tensor): | |||
| return | |||
| value = value.default_input | |||
| value = value.data | |||
| comp = np.less(value.asnumpy(), np.zeros(value.shape)) | |||
| if comp.any(): | |||
| raise ValueError(f'{name} should be greater than ot equal to zero.') | |||
| @@ -182,9 +182,9 @@ def check_greater_zero(value, name): | |||
| if value is None: | |||
| raise ValueError(f'input value cannot be None in check_greater_zero') | |||
| if isinstance(value, Parameter): | |||
| if not isinstance(value.default_input, Tensor): | |||
| if not isinstance(value.data, Tensor): | |||
| return | |||
| value = value.default_input | |||
| value = value.data | |||
| comp = np.less(np.zeros(value.shape), value.asnumpy()) | |||
| if not comp.all(): | |||
| raise ValueError(f'{name} should be greater than zero.') | |||
| @@ -225,9 +225,9 @@ def check_prob(p): | |||
| if p is None: | |||
| raise ValueError(f'input value cannot be None in check_greater_zero') | |||
| if isinstance(p, Parameter): | |||
| if not isinstance(p.default_input, Tensor): | |||
| if not isinstance(p.data, Tensor): | |||
| return | |||
| p = p.default_input | |||
| p = p.data | |||
| comp = np.less(np.zeros(p.shape), p.asnumpy()) | |||
| if not comp.all(): | |||
| raise ValueError('Probabilities should be greater than zero') | |||
| @@ -251,7 +251,7 @@ class Cast(PrimitiveWithInfer): | |||
| if isinstance(x, numbers.Number): | |||
| return (True, Tensor(x, dtype=dtype)) | |||
| if isinstance(x, Parameter): | |||
| data = x.default_input | |||
| data = x.data | |||
| if data.dtype == dtype: | |||
| return (True, x) | |||
| return (False, None) | |||
| @@ -283,5 +283,5 @@ def load_nonquant_param_into_quant_net(quant_model, params_dict, quant_new_param | |||
| raise ValueError(f"Can't find match parameter in ckpt,param name = {name}") | |||
| value_param = next(iterable_dict[key_name], None) | |||
| if value_param is not None: | |||
| param.set_parameter_data(value_param[1].data) | |||
| param.set_data(value_param[1].data) | |||
| print(f'init model param {name} with checkpoint param {value_param[0]}') | |||
| @@ -63,7 +63,7 @@ def _special_process_par(par, new_par): | |||
| if delta_i == delta_len - 1: | |||
| new_val = new_par.data.asnumpy() | |||
| new_val = new_val.reshape(par.data.shape) | |||
| par.set_parameter_data(Tensor(new_val, par.data.dtype)) | |||
| par.set_data(Tensor(new_val, par.data.dtype)) | |||
| return True | |||
| return False | |||
| @@ -86,7 +86,7 @@ def _update_param(param, new_param): | |||
| raise RuntimeError(msg) | |||
| return | |||
| param.set_parameter_data(new_param.data) | |||
| param.set_data(new_param.data) | |||
| return | |||
| if isinstance(param.data, Tensor) and not isinstance(new_param.data, Tensor): | |||
| @@ -95,7 +95,7 @@ def _update_param(param, new_param): | |||
| msg = ("Net parameters {} shape({}) is not (1,), inconsitent with parameter_dict's(scalar)." | |||
| .format(param.name, param.data.shape)) | |||
| raise RuntimeError(msg) | |||
| param.set_parameter_data(initializer(new_param.data, param.data.shape, param.data.dtype)) | |||
| param.set_data(initializer(new_param.data, param.data.shape, param.data.dtype)) | |||
| elif isinstance(new_param.data, Tensor) and not isinstance(param.data, Tensor): | |||
| logger.error("Failed to combine the net and the parameters for param %s.", param.name) | |||
| @@ -104,7 +104,7 @@ def _update_param(param, new_param): | |||
| raise RuntimeError(msg) | |||
| else: | |||
| param.set_parameter_data(type(param.data)(new_param.data)) | |||
| param.set_data(type(param.data)(new_param.data)) | |||
| def _exec_save(ckpt_file_name, data_list): | |||
| @@ -90,7 +90,7 @@ if __name__ == '__main__': | |||
| if args_opt.platform == "Ascend": | |||
| for param in net.trainable_params(): | |||
| if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name: | |||
| param.set_parameter_data(initializer(XavierUniform(), param.data.shape, param.data.dtype)) | |||
| param.set_data(initializer(XavierUniform(), param.data.shape, param.data.dtype)) | |||
| group_params = [{'params': decayed_params, 'weight_decay': cfg.weight_decay}, | |||
| {'params': no_decayed_params}, | |||
| {'order_params': net.trainable_params()}] | |||
| @@ -260,15 +260,15 @@ class MobileNetV2Backbone(nn.Cell): | |||
| for _, m in self.cells_and_names(): | |||
| if isinstance(m, (nn.Conv2d, DepthwiseConv)): | |||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n), | |||
| m.weight.data.shape).astype("float32"))) | |||
| m.weight.set_data(Tensor(np.random.normal(0, np.sqrt(2. / n), | |||
| m.weight.data.shape).astype("float32"))) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data( | |||
| m.bias.set_data( | |||
| Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| m.gamma.set_parameter_data( | |||
| m.gamma.set_data( | |||
| Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) | |||
| m.beta.set_parameter_data( | |||
| m.beta.set_data( | |||
| Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) | |||
| @property | |||
| @@ -316,10 +316,10 @@ class MobileNetV2Head(nn.Cell): | |||
| self.init_parameters_data() | |||
| for _, m in self.cells_and_names(): | |||
| if isinstance(m, nn.Dense): | |||
| m.weight.set_parameter_data(Tensor(np.random.normal( | |||
| m.weight.set_data(Tensor(np.random.normal( | |||
| 0, 0.01, m.weight.data.shape).astype("float32"))) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data( | |||
| m.bias.set_data( | |||
| Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| @property | |||
| def get_head(self): | |||
| @@ -216,24 +216,24 @@ class mobilenetV2(nn.Cell): | |||
| if isinstance(m, nn.Conv2d): | |||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
| w = Tensor(np.random.normal(0, np.sqrt(2. / n), m.weight.data.shape).astype("float32")) | |||
| m.weight.set_parameter_data(w) | |||
| m.weight.set_data(w) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| m.bias.set_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.Conv2dBnAct): | |||
| n = m.conv.kernel_size[0] * m.conv.kernel_size[1] * m.conv.out_channels | |||
| w = Tensor(np.random.normal(0, np.sqrt(2. / n), m.conv.weight.data.shape).astype("float32")) | |||
| m.conv.weight.set_parameter_data(w) | |||
| m.conv.weight.set_data(w) | |||
| if m.conv.bias is not None: | |||
| m.conv.bias.set_parameter_data(Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32"))) | |||
| m.conv.bias.set_data(Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| m.gamma.set_parameter_data(Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) | |||
| m.beta.set_parameter_data(Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) | |||
| m.gamma.set_data(Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) | |||
| m.beta.set_data(Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.Dense): | |||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape).astype("float32"))) | |||
| m.weight.set_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape).astype("float32"))) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| m.bias.set_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.DenseBnAct): | |||
| m.dense.weight.set_parameter_data( | |||
| m.dense.weight.set_data( | |||
| Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32"))) | |||
| if m.dense.bias is not None: | |||
| m.dense.bias.set_parameter_data(Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32"))) | |||
| m.dense.bias.set_data(Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32"))) | |||
| @@ -221,24 +221,24 @@ class mobilenetV2(nn.Cell): | |||
| if isinstance(m, nn.Conv2d): | |||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
| w = Tensor(np.random.normal(0, np.sqrt(2. / n), m.weight.data.shape).astype("float32")) | |||
| m.weight.set_parameter_data(w) | |||
| m.weight.set_data(w) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| m.bias.set_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.Conv2dBnAct): | |||
| n = m.conv.kernel_size[0] * m.conv.kernel_size[1] * m.conv.out_channels | |||
| w = Tensor(np.random.normal(0, np.sqrt(2. / n), m.conv.weight.data.shape).astype("float32")) | |||
| m.conv.weight.set_parameter_data(w) | |||
| m.conv.weight.set_data(w) | |||
| if m.conv.bias is not None: | |||
| m.conv.bias.set_parameter_data(Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32"))) | |||
| m.conv.bias.set_data(Tensor(np.zeros(m.conv.bias.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| m.gamma.set_parameter_data(Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) | |||
| m.beta.set_parameter_data(Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) | |||
| m.gamma.set_data(Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) | |||
| m.beta.set_data(Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.Dense): | |||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape).astype("float32"))) | |||
| m.weight.set_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape).astype("float32"))) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| m.bias.set_data(Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.DenseBnAct): | |||
| m.dense.weight.set_parameter_data( | |||
| m.dense.weight.set_data( | |||
| Tensor(np.random.normal(0, 0.01, m.dense.weight.data.shape).astype("float32"))) | |||
| if m.dense.bias is not None: | |||
| m.dense.bias.set_parameter_data(Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32"))) | |||
| m.dense.bias.set_data(Tensor(np.zeros(m.dense.bias.data.shape, dtype="float32"))) | |||
| @@ -323,21 +323,21 @@ class MobileNetV3(nn.Cell): | |||
| for _, m in self.cells_and_names(): | |||
| if isinstance(m, (nn.Conv2d)): | |||
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2. / n), | |||
| m.weight.data.shape).astype("float32"))) | |||
| m.weight.set_data(Tensor(np.random.normal(0, np.sqrt(2. / n), | |||
| m.weight.data.shape).astype("float32"))) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data( | |||
| m.bias.set_data( | |||
| Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| m.gamma.set_parameter_data( | |||
| m.gamma.set_data( | |||
| Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) | |||
| m.beta.set_parameter_data( | |||
| m.beta.set_data( | |||
| Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.Dense): | |||
| m.weight.set_parameter_data(Tensor(np.random.normal( | |||
| m.weight.set_data(Tensor(np.random.normal( | |||
| 0, 0.01, m.weight.data.shape).astype("float32"))) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data( | |||
| m.bias.set_data( | |||
| Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| @@ -821,21 +821,21 @@ class NASNetAMobile(nn.Cell): | |||
| for _, m in self.cells_and_names(): | |||
| if isinstance(m, nn.Conv2d): | |||
| n = m.kernel_size[0]*m.kernel_size[1]*m.out_channels | |||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, np.sqrt(2./n), | |||
| m.weight.data.shape).astype("float32"))) | |||
| m.weight.set_data(Tensor(np.random.normal(0, np.sqrt(2./n), | |||
| m.weight.data.shape).astype("float32"))) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data( | |||
| m.bias.set_data( | |||
| Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| m.gamma.set_parameter_data( | |||
| m.gamma.set_data( | |||
| Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) | |||
| m.beta.set_parameter_data( | |||
| m.beta.set_data( | |||
| Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) | |||
| elif isinstance(m, nn.Dense): | |||
| m.weight.set_parameter_data(Tensor(np.random.normal( | |||
| m.weight.set_data(Tensor(np.random.normal( | |||
| 0, 0.01, m.weight.data.shape).astype("float32"))) | |||
| if m.bias is not None: | |||
| m.bias.set_parameter_data( | |||
| m.bias.set_data( | |||
| Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) | |||
| def construct(self, x): | |||
| @@ -108,13 +108,13 @@ if __name__ == '__main__': | |||
| else: | |||
| for _, cell in net.cells_and_names(): | |||
| if isinstance(cell, nn.Conv2d): | |||
| cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), | |||
| cell.weight.shape, | |||
| cell.weight.dtype) | |||
| cell.weight.set_data(weight_init.initializer(weight_init.XavierUniform(), | |||
| cell.weight.shape, | |||
| cell.weight.dtype)) | |||
| if isinstance(cell, nn.Dense): | |||
| cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), | |||
| cell.weight.shape, | |||
| cell.weight.dtype) | |||
| cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(), | |||
| cell.weight.shape, | |||
| cell.weight.dtype)) | |||
| # init lr | |||
| if args_opt.net == "resnet50" or args_opt.net == "se-resnet50": | |||
| @@ -93,13 +93,13 @@ if __name__ == '__main__': | |||
| else: | |||
| for _, cell in net.cells_and_names(): | |||
| if isinstance(cell, nn.Conv2d): | |||
| cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(), | |||
| cell.weight.shape, | |||
| cell.weight.dtype) | |||
| cell.weight.set_data(weight_init.initializer(weight_init.XavierUniform(), | |||
| cell.weight.shape, | |||
| cell.weight.dtype)) | |||
| if isinstance(cell, nn.Dense): | |||
| cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(), | |||
| cell.weight.shape, | |||
| cell.weight.dtype) | |||
| cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(), | |||
| cell.weight.shape, | |||
| cell.weight.dtype)) | |||
| if not config.use_label_smooth: | |||
| config.label_smooth_factor = 0.0 | |||
| loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) | |||
| @@ -61,21 +61,21 @@ class Resnet(ImageClassificationNetwork): | |||
| for cell in self.cells_and_names(): | |||
| if isinstance(cell, nn.Conv2d): | |||
| cell.weight.default_input = init.initializer( | |||
| cell.weight.set_data(init.initializer( | |||
| KaimingNormal(a=math.sqrt(5), mode='fan_out', nonlinearity='relu'), | |||
| cell.weight.shape, cell.weight.dtype) | |||
| cell.weight.shape, cell.weight.dtype)) | |||
| elif isinstance(cell, nn.BatchNorm2d): | |||
| cell.gamma.default_input = init.initializer('ones', cell.gamma.shape) | |||
| cell.beta.default_input = init.initializer('zeros', cell.beta.shape) | |||
| cell.gamma.set_data(init.initializer('ones', cell.gamma.shape)) | |||
| cell.beta.set_data(init.initializer('zeros', cell.beta.shape)) | |||
| # Zero-initialize the last BN in each residual branch, | |||
| # so that the residual branch starts with zeros, and each residual block behaves like an identity. | |||
| # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 | |||
| for cell in self.cells_and_names(): | |||
| if isinstance(cell, backbones.resnet.Bottleneck): | |||
| cell.bn3.gamma.default_input = init.initializer('zeros', cell.bn3.gamma.shape) | |||
| cell.bn3.gamma.set_data(init.initializer('zeros', cell.bn3.gamma.shape)) | |||
| elif isinstance(cell, backbones.resnet.BasicBlock): | |||
| cell.bn2.gamma.default_input = init.initializer('zeros', cell.bn2.gamma.shape) | |||
| cell.bn2.gamma.set_data(init.initializer('zeros', cell.bn2.gamma.shape)) | |||
| @@ -187,24 +187,24 @@ def default_recurisive_init(custom_cell): | |||
| """default_recurisive_init""" | |||
| for _, cell in custom_cell.cells_and_names(): | |||
| if isinstance(cell, nn.Conv2d): | |||
| cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype) | |||
| cell.weight.set_data(init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype)) | |||
| if cell.bias is not None: | |||
| fan_in, _ = _calculate_in_and_out(cell.weight) | |||
| bound = 1 / math.sqrt(fan_in) | |||
| cell.bias.default_input = init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype) | |||
| cell.bias.set_data(init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype)) | |||
| elif isinstance(cell, nn.Dense): | |||
| cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype) | |||
| cell.weight.set_data(init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype)) | |||
| if cell.bias is not None: | |||
| fan_in, _ = _calculate_in_and_out(cell.weight) | |||
| bound = 1 / math.sqrt(fan_in) | |||
| cell.bias.default_input = init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype) | |||
| cell.bias.set_data(init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype)) | |||
| elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)): | |||
| pass | |||
| @@ -163,11 +163,11 @@ class ShuffleNetV2(nn.Cell): | |||
| for name, m in self.cells_and_names(): | |||
| if isinstance(m, nn.Conv2d): | |||
| if 'first' in name: | |||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, | |||
| m.weight.data.shape).astype("float32"))) | |||
| m.weight.set_data(Tensor(np.random.normal(0, 0.01, | |||
| m.weight.data.shape).astype("float32"))) | |||
| else: | |||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, 1.0/m.weight.data.shape[1], | |||
| m.weight.data.shape).astype("float32"))) | |||
| m.weight.set_data(Tensor(np.random.normal(0, 1.0/m.weight.data.shape[1], | |||
| m.weight.data.shape).astype("float32"))) | |||
| if isinstance(m, nn.Dense): | |||
| m.weight.set_parameter_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape).astype("float32"))) | |||
| m.weight.set_data(Tensor(np.random.normal(0, 0.01, m.weight.data.shape).astype("float32"))) | |||
| @@ -22,9 +22,9 @@ def init_net_param(network, initialize_mode='TruncatedNormal'): | |||
| for p in params: | |||
| if 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name: | |||
| if initialize_mode == 'TruncatedNormal': | |||
| p.set_parameter_data(initializer(TruncatedNormal(), p.data.shape, p.data.dtype)) | |||
| p.set_data(initializer(TruncatedNormal(), p.data.shape, p.data.dtype)) | |||
| else: | |||
| p.set_parameter_data(initialize_mode, p.data.shape, p.data.dtype) | |||
| p.set_data(initialize_mode, p.data.shape, p.data.dtype) | |||
| def load_backbone_params(network, param_dict): | |||
| @@ -37,7 +37,7 @@ def load_backbone_params(network, param_dict): | |||
| if 'features_2' in param_name: | |||
| param_name = '.'.join(['features', str(int(name_split[1]) + 14)] + name_split[2:]) | |||
| if param_name in param_dict: | |||
| param.set_parameter_data(param_dict[param_name].data) | |||
| param.set_data(param_dict[param_name].data) | |||
| def filter_checkpoint_parameter(param_dict): | |||
| """remove useless parameters""" | |||
| @@ -187,24 +187,24 @@ def default_recurisive_init(custom_cell): | |||
| """default_recurisive_init""" | |||
| for _, cell in custom_cell.cells_and_names(): | |||
| if isinstance(cell, nn.Conv2d): | |||
| cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype) | |||
| cell.weight.set_data(init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype)) | |||
| if cell.bias is not None: | |||
| fan_in, _ = _calculate_in_and_out(cell.weight) | |||
| bound = 1 / math.sqrt(fan_in) | |||
| cell.bias.default_input = init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype) | |||
| cell.bias.set_data(init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype)) | |||
| elif isinstance(cell, nn.Dense): | |||
| cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype) | |||
| cell.weight.set_data(init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype)) | |||
| if cell.bias is not None: | |||
| fan_in, _ = _calculate_in_and_out(cell.weight) | |||
| bound = 1 / math.sqrt(fan_in) | |||
| cell.bias.default_input = init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype) | |||
| cell.bias.set_data(init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype)) | |||
| elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)): | |||
| pass | |||
| @@ -101,18 +101,18 @@ class Vgg(nn.Cell): | |||
| """ | |||
| for _, cell in self.cells_and_names(): | |||
| if isinstance(cell, nn.Conv2d): | |||
| cell.weight.default_input = init.initializer( | |||
| cell.weight.set_data(init.initializer( | |||
| KaimingNormal(a=math.sqrt(5), mode='fan_out', nonlinearity='relu'), | |||
| cell.weight.shape, cell.weight.dtype) | |||
| cell.weight.shape, cell.weight.dtype)) | |||
| if cell.bias is not None: | |||
| cell.bias.default_input = init.initializer( | |||
| 'zeros', cell.bias.shape, cell.bias.dtype) | |||
| cell.bias.set_data(init.initializer( | |||
| 'zeros', cell.bias.shape, cell.bias.dtype)) | |||
| elif isinstance(cell, nn.Dense): | |||
| cell.weight.default_input = init.initializer( | |||
| init.Normal(0.01), cell.weight.shape, cell.weight.dtype) | |||
| cell.weight.set_data(init.initializer( | |||
| init.Normal(0.01), cell.weight.shape, cell.weight.dtype)) | |||
| if cell.bias is not None: | |||
| cell.bias.default_input = init.initializer( | |||
| 'zeros', cell.bias.shape, cell.bias.dtype) | |||
| cell.bias.set_data(init.initializer( | |||
| 'zeros', cell.bias.shape, cell.bias.dtype)) | |||
| cfg = { | |||
| @@ -155,24 +155,24 @@ def default_recurisive_init(custom_cell): | |||
| """Initialize parameter.""" | |||
| for _, cell in custom_cell.cells_and_names(): | |||
| if isinstance(cell, nn.Conv2d): | |||
| cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype) | |||
| cell.weight.set_data(init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype)) | |||
| if cell.bias is not None: | |||
| fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight) | |||
| bound = 1 / math.sqrt(fan_in) | |||
| cell.bias.default_input = init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype) | |||
| cell.bias.set_data(init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype)) | |||
| elif isinstance(cell, nn.Dense): | |||
| cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype) | |||
| cell.weight.set_data(init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.shape, | |||
| cell.weight.dtype)) | |||
| if cell.bias is not None: | |||
| fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight) | |||
| bound = 1 / math.sqrt(fan_in) | |||
| cell.bias.default_input = init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype) | |||
| cell.bias.set_data(init.initializer(init.Uniform(bound), | |||
| cell.bias.shape, | |||
| cell.bias.dtype)) | |||
| elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)): | |||
| pass | |||
| @@ -69,12 +69,12 @@ def load_backbone(net, ckpt_path, args): | |||
| darknet_weight = '{}.weight'.format(name) | |||
| darknet_bias = '{}.bias'.format(name) | |||
| if darknet_weight in param_dict: | |||
| cell.weight.default_input = param_dict[darknet_weight].data | |||
| cell.weight.set_data(param_dict[darknet_weight].data) | |||
| find_param.append(darknet_weight) | |||
| else: | |||
| not_found_param.append(darknet_weight) | |||
| if darknet_bias in param_dict: | |||
| cell.bias.default_input = param_dict[darknet_bias].data | |||
| cell.bias.set_data(param_dict[darknet_bias].data) | |||
| find_param.append(darknet_bias) | |||
| else: | |||
| not_found_param.append(darknet_bias) | |||
| @@ -84,22 +84,22 @@ def load_backbone(net, ckpt_path, args): | |||
| darknet_gamma = '{}.gamma'.format(name) | |||
| darknet_beta = '{}.beta'.format(name) | |||
| if darknet_moving_mean in param_dict: | |||
| cell.moving_mean.default_input = param_dict[darknet_moving_mean].data | |||
| cell.moving_mean.set_data(param_dict[darknet_moving_mean].data) | |||
| find_param.append(darknet_moving_mean) | |||
| else: | |||
| not_found_param.append(darknet_moving_mean) | |||
| if darknet_moving_variance in param_dict: | |||
| cell.moving_variance.default_input = param_dict[darknet_moving_variance].data | |||
| cell.moving_variance.set_data(param_dict[darknet_moving_variance].data) | |||
| find_param.append(darknet_moving_variance) | |||
| else: | |||
| not_found_param.append(darknet_moving_variance) | |||
| if darknet_gamma in param_dict: | |||
| cell.gamma.default_input = param_dict[darknet_gamma].data | |||
| cell.gamma.set_data(param_dict[darknet_gamma].data) | |||
| find_param.append(darknet_gamma) | |||
| else: | |||
| not_found_param.append(darknet_gamma) | |||
| if darknet_beta in param_dict: | |||
| cell.beta.default_input = param_dict[darknet_beta].data | |||
| cell.beta.set_data(param_dict[darknet_beta].data) | |||
| find_param.append(darknet_beta) | |||
| else: | |||
| not_found_param.append(darknet_beta) | |||
| @@ -155,22 +155,22 @@ def default_recurisive_init(custom_cell): | |||
| """Initialize parameter.""" | |||
| for _, cell in custom_cell.cells_and_names(): | |||
| if isinstance(cell, nn.Conv2d): | |||
| cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.default_input.shape, | |||
| cell.weight.default_input.dtype).to_tensor() | |||
| cell.weight.set_data(init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.data.shape, | |||
| cell.weight.data.dtype).to_tensor()) | |||
| if cell.bias is not None: | |||
| fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy()) | |||
| fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.data.asnumpy()) | |||
| bound = 1 / math.sqrt(fan_in) | |||
| cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, cell.bias.default_input.shape), | |||
| cell.bias.default_input.dtype) | |||
| cell.bias.set_data(Tensor(np.random.uniform(-bound, bound, cell.bias.data.shape), | |||
| cell.bias.data.dtype)) | |||
| elif isinstance(cell, nn.Dense): | |||
| cell.weight.default_input = init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.default_input.shape, | |||
| cell.weight.default_input.dtype).to_tensor() | |||
| cell.weight.set_data(init.initializer(KaimingUniform(a=math.sqrt(5)), | |||
| cell.weight.data.shape, | |||
| cell.weight.data.dtype).to_tensor()) | |||
| if cell.bias is not None: | |||
| fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.default_input.asnumpy()) | |||
| fan_in, _ = _calculate_fan_in_and_fan_out(cell.weight.data.asnumpy()) | |||
| bound = 1 / math.sqrt(fan_in) | |||
| cell.bias.default_input = Tensor(np.random.uniform(-bound, bound, cell.bias.default_input.shape), | |||
| cell.bias.default_input.dtype) | |||
| cell.bias.set_data(Tensor(np.random.uniform(-bound, bound, cell.bias.data.shape), | |||
| cell.bias.data.dtype)) | |||
| elif isinstance(cell, (nn.BatchNorm2d, nn.BatchNorm1d)): | |||
| pass | |||
| @@ -66,12 +66,12 @@ def load_backbone(net, ckpt_path, args): | |||
| darknet_weight = '{}.weight'.format(name) | |||
| darknet_bias = '{}.bias'.format(name) | |||
| if darknet_weight in param_dict: | |||
| cell.weight.default_input = param_dict[darknet_weight].data | |||
| cell.weight.set_data(param_dict[darknet_weight].data) | |||
| find_param.append(darknet_weight) | |||
| else: | |||
| not_found_param.append(darknet_weight) | |||
| if darknet_bias in param_dict: | |||
| cell.bias.default_input = param_dict[darknet_bias].data | |||
| cell.bias.set_data(param_dict[darknet_bias].data) | |||
| find_param.append(darknet_bias) | |||
| else: | |||
| not_found_param.append(darknet_bias) | |||
| @@ -81,22 +81,22 @@ def load_backbone(net, ckpt_path, args): | |||
| darknet_gamma = '{}.gamma'.format(name) | |||
| darknet_beta = '{}.beta'.format(name) | |||
| if darknet_moving_mean in param_dict: | |||
| cell.moving_mean.default_input = param_dict[darknet_moving_mean].data | |||
| cell.moving_mean.set_data(param_dict[darknet_moving_mean].data) | |||
| find_param.append(darknet_moving_mean) | |||
| else: | |||
| not_found_param.append(darknet_moving_mean) | |||
| if darknet_moving_variance in param_dict: | |||
| cell.moving_variance.default_input = param_dict[darknet_moving_variance].data | |||
| cell.moving_variance.set_data(param_dict[darknet_moving_variance].data) | |||
| find_param.append(darknet_moving_variance) | |||
| else: | |||
| not_found_param.append(darknet_moving_variance) | |||
| if darknet_gamma in param_dict: | |||
| cell.gamma.default_input = param_dict[darknet_gamma].data | |||
| cell.gamma.set_data(param_dict[darknet_gamma].data) | |||
| find_param.append(darknet_gamma) | |||
| else: | |||
| not_found_param.append(darknet_gamma) | |||
| if darknet_beta in param_dict: | |||
| cell.beta.default_input = param_dict[darknet_beta].data | |||
| cell.beta.set_data(param_dict[darknet_beta].data) | |||
| find_param.append(darknet_beta) | |||
| else: | |||
| not_found_param.append(darknet_beta) | |||
| @@ -60,7 +60,7 @@ def init_net_param(network, init_value='ones'): | |||
| params = network.trainable_params() | |||
| for p in params: | |||
| if isinstance(p.data, Tensor) and 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name: | |||
| p.set_parameter_data(initializer(init_value, p.data.shape, p.data.dtype)) | |||
| p.set_data(initializer(init_value, p.data.shape, p.data.dtype)) | |||
| def main(): | |||
| @@ -81,7 +81,7 @@ def transformer_infer(config, dataset): | |||
| weights = load_infer_weights(config) | |||
| for param in params: | |||
| value = param.default_input | |||
| value = param.data | |||
| name = param.name | |||
| if name not in weights: | |||
| raise ValueError(f"{name} is not found in weights.") | |||
| @@ -93,7 +93,7 @@ def transformer_infer(config, dataset): | |||
| print(name, value.asnumpy().shape) | |||
| if weights_name in weights: | |||
| assert weights_name in weights | |||
| param.default_input = Tensor(weights[weights_name], mstype.float32) | |||
| param.set_data(Tensor(weights[weights_name], mstype.float32)) | |||
| else: | |||
| raise ValueError(f"{weights_name} is not found in checkpoint.") | |||
| else: | |||
| @@ -128,24 +128,24 @@ def _build_training_pipeline(config: TransformerConfig, | |||
| raise ValueError(f"Param {weights_name} is not found in ckpt file.") | |||
| if isinstance(weights[weights_name], Parameter): | |||
| param.default_input = weights[weights_name].default_input | |||
| param.set_data(weights[weights_name].data) | |||
| elif isinstance(weights[weights_name], Tensor): | |||
| param.default_input = Tensor(weights[weights_name].asnumpy(), config.dtype) | |||
| param.set_data(Tensor(weights[weights_name].asnumpy(), config.dtype)) | |||
| elif isinstance(weights[weights_name], np.ndarray): | |||
| param.default_input = Tensor(weights[weights_name], config.dtype) | |||
| param.set_data(Tensor(weights[weights_name], config.dtype)) | |||
| else: | |||
| param.default_input = weights[weights_name] | |||
| param.set_data(weights[weights_name]) | |||
| else: | |||
| for param in net_with_loss.trainable_params(): | |||
| name = param.name | |||
| value = param.default_input | |||
| value = param.data | |||
| if isinstance(value, Tensor): | |||
| if name.endswith(".gamma"): | |||
| param.default_input = one_weight(value.asnumpy().shape) | |||
| param.set_data(one_weight(value.asnumpy().shape)) | |||
| elif name.endswith(".beta") or name.endswith(".bias"): | |||
| param.default_input = zero_weight(value.asnumpy().shape) | |||
| param.set_data(zero_weight(value.asnumpy().shape)) | |||
| else: | |||
| param.default_input = weight_variable(value.asnumpy().shape) | |||
| param.set_data(weight_variable(value.asnumpy().shape)) | |||
| dataset = pre_training_dataset if pre_training_dataset is not None \ | |||
| else fine_tune_dataset | |||
| @@ -39,7 +39,7 @@ def set_block_param_with_rand(net, rand_func=None): | |||
| return | |||
| net.init_parameters_data() | |||
| for param in net.trainable_params(): | |||
| param.default_input = Tensor(rand_func(param.default_input.asnumpy().shape)) | |||
| param.set_data(Tensor(rand_func(param.data.asnumpy().shape))) | |||
| def compile_block(net, *inputs, rand_func=None, training=True): | |||
| @@ -192,7 +192,7 @@ class _GradChecker: | |||
| def check_against_numeric_one_step(self, args, index, out_index): | |||
| if isinstance(args, ParameterTuple): | |||
| x = args[index].default_input.asnumpy() | |||
| x = args[index].data.asnumpy() | |||
| else: | |||
| x = args[index] | |||
| x_shape = x.shape | |||
| @@ -239,7 +239,7 @@ class _GradChecker: | |||
| def check_against_numeric_jacobian_one_step(self, args, index, out_index): | |||
| if isinstance(args, ParameterTuple): | |||
| x = args[index].default_input.asnumpy() | |||
| x = args[index].data.asnumpy() | |||
| else: | |||
| x = args[index] | |||
| x_shape = x.shape | |||
| @@ -55,7 +55,7 @@ def init_net_param(network, init_value='ones'): | |||
| params = network.trainable_params() | |||
| for p in params: | |||
| if isinstance(p.data, Tensor) and 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name: | |||
| p.set_parameter_data(initializer(init_value, p.data.shape, p.data.dtype)) | |||
| p.set_data(initializer(init_value, p.data.shape, p.data.dtype)) | |||
| class ModelCallback(Callback): | |||
| def __init__(self): | |||
| @@ -211,22 +211,22 @@ def test_bert_performance(): | |||
| callback = ModelCallback() | |||
| params = netwithloss.trainable_params() | |||
| for param in params: | |||
| value = param.default_input | |||
| value = param.data | |||
| name = param.name | |||
| if isinstance(value, Tensor): | |||
| if name.split('.')[-1] in ['weight']: | |||
| if name.split('.')[-3] in ['cls2']: | |||
| logger.info("***************** BERT param name is 1 {}".format(name)) | |||
| param.default_input = weight_variable(value.asnumpy().shape) | |||
| param.set_data(weight_variable(value.asnumpy().shape)) | |||
| else: | |||
| logger.info("***************** BERT param name is 2 {}".format(name)) | |||
| tempshape = value.asnumpy().shape | |||
| shape = (tempshape[1], tempshape[0]) | |||
| weight_value = weight_variable(shape).asnumpy() | |||
| param.default_input = Tensor(np.transpose(weight_value, [1, 0])) | |||
| param.set_data(Tensor(np.transpose(weight_value, [1, 0]))) | |||
| else: | |||
| logger.info("***************** BERT param name is 3 {}".format(name)) | |||
| param.default_input = weight_variable(value.asnumpy().shape) | |||
| param.set_data(weight_variable(value.asnumpy().shape)) | |||
| time_monitor_callback = TimeMonitor(sink_size) | |||
| model.train(new_repeat_count, ds, callbacks=[time_monitor_callback, callback], | |||
| dataset_sink_mode=True, sink_size=sink_size) | |||
| @@ -208,22 +208,22 @@ def test_bert_percision(): | |||
| callback = ModelCallback() | |||
| params = netwithloss.trainable_params() | |||
| for param in params: | |||
| value = param.default_input | |||
| value = param.data | |||
| name = param.name | |||
| if isinstance(value, Tensor): | |||
| if name.split('.')[-1] in ['weight']: | |||
| if name.split('.')[-3] in ['cls2']: | |||
| logger.info("***************** BERT param name is 1 {}".format(name)) | |||
| param.default_input = weight_variable(value.asnumpy().shape) | |||
| param.set_data(weight_variable(value.asnumpy().shape)) | |||
| else: | |||
| logger.info("***************** BERT param name is 2 {}".format(name)) | |||
| tempshape = value.asnumpy().shape | |||
| shape = (tempshape[1], tempshape[0]) | |||
| weight_value = weight_variable(shape).asnumpy() | |||
| param.default_input = Tensor(np.transpose(weight_value, [1, 0])) | |||
| param.set_data(Tensor(np.transpose(weight_value, [1, 0]))) | |||
| else: | |||
| logger.info("***************** BERT param name is 3 {}".format(name)) | |||
| param.default_input = weight_variable(value.asnumpy().shape) | |||
| param.set_data(weight_variable(value.asnumpy().shape)) | |||
| model.train(new_repeat_count, ds, callbacks=callback, dataset_sink_mode=False) | |||
| # assertion occurs while the loss value, overflow state or loss_scale value is wrong | |||
| @@ -173,22 +173,22 @@ def test_bert_tdt(): | |||
| netwithloss.init_parameters_data() | |||
| params = netwithloss.trainable_params() | |||
| for param in params: | |||
| value = param.default_input | |||
| value = param.data | |||
| name = param.name | |||
| if isinstance(value, Tensor): | |||
| if name.split('.')[-1] in ['weight']: | |||
| if name.split('.')[-3] in ['cls2']: | |||
| logger.info("***************** BERT param name is 1 {}".format(name)) | |||
| param.default_input = weight_variable(value.asnumpy().shape) | |||
| param.set_data(weight_variable(value.asnumpy().shape)) | |||
| else: | |||
| logger.info("***************** BERT param name is 2 {}".format(name)) | |||
| tempshape = value.asnumpy().shape | |||
| shape = (tempshape[1], tempshape[0]) | |||
| weight_value = weight_variable(shape).asnumpy() | |||
| param.default_input = Tensor(np.transpose(weight_value, [1, 0])) | |||
| param.set_data(Tensor(np.transpose(weight_value, [1, 0]))) | |||
| else: | |||
| logger.info("***************** BERT param name is 3 {}".format(name)) | |||
| param.default_input = weight_variable(value.asnumpy().shape) | |||
| param.set_data(weight_variable(value.asnumpy().shape)) | |||
| model.train(1, ds, callbacks=callback, dataset_sink_mode=False) | |||
| # assertion occurs while the loss value, overflow state or loss_scale value is wrong | |||
| @@ -65,7 +65,7 @@ class LossCallBack(Callback): | |||
| def model_fine_tune(train_net, fix_weight_layer): | |||
| train_net.init_parameters_data() | |||
| for para in train_net.trainable_params(): | |||
| para.set_parameter_data(Tensor(np.ones(para.data.shape).astype(np.float32) * 0.02)) | |||
| para.set_data(Tensor(np.ones(para.data.shape).astype(np.float32) * 0.02)) | |||
| if fix_weight_layer in para.name: | |||
| para.requires_grad = False | |||
| @@ -48,6 +48,6 @@ def test_net(): | |||
| net = Net() | |||
| output = net(gradient, indices) | |||
| print(output) | |||
| print(net.var.default_input) | |||
| print(net.m.default_input) | |||
| print(net.v.default_input) | |||
| print(net.var.data) | |||
| print(net.m.data) | |||
| print(net.v.data) | |||
| @@ -45,6 +45,6 @@ def test_net(): | |||
| net = Net() | |||
| output = net(gradient, indices) | |||
| print(output) | |||
| print(net.var.default_input) | |||
| print(net.accum.default_input) | |||
| print(net.linear.default_input) | |||
| print(net.var.data) | |||
| print(net.accum.data) | |||
| print(net.linear.data) | |||
| @@ -48,6 +48,6 @@ def test_net(): | |||
| net = Net() | |||
| output = net(gradient, indices) | |||
| print(output) | |||
| print(net.var.default_input) | |||
| print(net.m.default_input) | |||
| print(net.v.default_input) | |||
| print(net.var.data) | |||
| print(net.m.data) | |||
| print(net.v.data) | |||
| @@ -43,5 +43,5 @@ def test_net(): | |||
| net = Net() | |||
| output = net(gradient, indices) | |||
| print(output) | |||
| print(net.var.default_input) | |||
| print(net.accum.default_input) | |||
| print(net.var.data) | |||
| print(net.accum.data) | |||
| @@ -65,9 +65,9 @@ def test_op1(): | |||
| scatter_nd_update = ScatterNdUpdate1() | |||
| scatter_nd_update(indices, update) | |||
| print("x:\n", scatter_nd_update.x.default_input) | |||
| print("x:\n", scatter_nd_update.x.data) | |||
| expect = [[1.0, 0.3, 3.6], [0.4, 2.2, -3.2]] | |||
| assert np.allclose(scatter_nd_update.x.default_input.asnumpy(), np.array(expect, np.float)) | |||
| assert np.allclose(scatter_nd_update.x.data.asnumpy(), np.array(expect, np.float)) | |||
| @pytest.mark.level0 | |||
| @@ -79,9 +79,9 @@ def test_op2(): | |||
| scatter_nd_update = ScatterNdUpdate2() | |||
| scatter_nd_update(indices, update) | |||
| print("x:\n", scatter_nd_update.x.default_input) | |||
| print("x:\n", scatter_nd_update.x.data) | |||
| expect = [1, 11, 3, 10, 9, 6, 7, 12] | |||
| assert np.allclose(scatter_nd_update.x.default_input.asnumpy(), np.array(expect, dtype=float)) | |||
| assert np.allclose(scatter_nd_update.x.data.asnumpy(), np.array(expect, dtype=float)) | |||
| @pytest.mark.level0 | |||
| @@ -96,9 +96,9 @@ def test_op3(): | |||
| scatter_nd_update = ScatterNdUpdate3() | |||
| scatter_nd_update(indices, update) | |||
| print("x:\n", scatter_nd_update.x.default_input) | |||
| print("x:\n", scatter_nd_update.x.data) | |||
| expect = [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], | |||
| [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], | |||
| [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], | |||
| [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] | |||
| assert np.allclose(scatter_nd_update.x.default_input.asnumpy(), np.array(expect, dtype=float)) | |||
| assert np.allclose(scatter_nd_update.x.data.asnumpy(), np.array(expect, dtype=float)) | |||
| @@ -54,7 +54,7 @@ def test_net(): | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| sparse_apply_adam = Net() | |||
| sparse_apply_adam(gradient, indices) | |||
| print(sparse_apply_adam.var.default_input) | |||
| print(sparse_apply_adam.var.data) | |||
| expect_var = np.array([[[0.9996838, 0.9996838, 0.9996838], | |||
| [0.9996838, 0.9996838, 0.9996838], | |||
| [0.9996838, 0.9996838, 0.9996838]], | |||
| @@ -64,4 +64,4 @@ def test_net(): | |||
| [[0.9996838, 0.9996838, 0.9996838], | |||
| [0.9996838, 0.9996838, 0.9996838], | |||
| [0.9996838, 0.9996838, 0.9996838]]]).astype(np.float32) | |||
| assert np.all(sparse_apply_adam.var.default_input.asnumpy() == expect_var) | |||
| assert np.all(sparse_apply_adam.var.data.asnumpy() == expect_var) | |||
| @@ -46,7 +46,7 @@ def test_net(): | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| sparse_apply_ftrl = Net() | |||
| sparse_apply_ftrl(gradient, indices) | |||
| print(sparse_apply_ftrl.var.default_input) | |||
| print(sparse_apply_ftrl.var.data) | |||
| expect_var = np.array([[[0.291479, 0.291479, 0.291479], | |||
| [0.291479, 0.291479, 0.291479], | |||
| [0.291479, 0.291479, 0.291479]], | |||
| @@ -56,4 +56,4 @@ def test_net(): | |||
| [[0.291479, 0.291479, 0.291479], | |||
| [0.291479, 0.291479, 0.291479], | |||
| [0.291479, 0.291479, 0.291479]]]).astype(np.float32) | |||
| assert np.all(sparse_apply_ftrl.var.default_input.asnumpy() == expect_var) | |||
| assert np.all(sparse_apply_ftrl.var.data.asnumpy() == expect_var) | |||
| @@ -48,7 +48,7 @@ def test_net(): | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| sparse_apply_proximal_adagrad = Net() | |||
| sparse_apply_proximal_adagrad(gradient, indices) | |||
| print(sparse_apply_proximal_adagrad.var.default_input) | |||
| print(sparse_apply_proximal_adagrad.var.data) | |||
| expect_var = np.array([[[0.9929289, 0.9929289, 0.9929289], | |||
| [0.9929289, 0.9929289, 0.9929289], | |||
| [0.9929289, 0.9929289, 0.9929289]], | |||
| @@ -58,4 +58,4 @@ def test_net(): | |||
| [[0.9929289, 0.9929289, 0.9929289], | |||
| [0.9929289, 0.9929289, 0.9929289], | |||
| [0.9929289, 0.9929289, 0.9929289]]]).astype(np.float32) | |||
| assert np.all(sparse_apply_proximal_adagrad.var.default_input.asnumpy() == expect_var) | |||
| assert np.all(sparse_apply_proximal_adagrad.var.data.asnumpy() == expect_var) | |||
| @@ -47,7 +47,7 @@ def test_assign(): | |||
| error = np.ones(shape=[2, 2]) * 1.0e-6 | |||
| diff1 = output.asnumpy() - value | |||
| diff2 = assign.var.default_input.asnumpy() - value | |||
| diff2 = assign.var.data.asnumpy() - value | |||
| assert np.all(diff1 < error) | |||
| assert np.all(-diff1 < error) | |||
| assert np.all(diff2 < error) | |||
| @@ -158,15 +158,15 @@ def test_parameter_compute(): | |||
| def test_scalar_parameter_update(): | |||
| # float | |||
| fp = Parameter(0.5, 'fp') | |||
| fp.default_input = 0.8 | |||
| assert np.array_equal(fp.default_input.asnumpy(), np.array(0.8, np.float32)) | |||
| fp.default_input = 1 | |||
| assert np.array_equal(fp.default_input.asnumpy(), np.array(1.0, np.float32)) | |||
| fp.set_data(0.8) | |||
| assert np.array_equal(fp.data.asnumpy(), np.array(0.8, np.float32)) | |||
| fp.set_data(1) | |||
| assert np.array_equal(fp.data.asnumpy(), np.array(1.0, np.float32)) | |||
| int_ = Parameter(1, 'fp') | |||
| int_.default_input = 2 | |||
| assert np.array_equal(int_.default_input.asnumpy(), np.array(2, np.int32)) | |||
| int_.set_data(2) | |||
| assert np.array_equal(int_.data.asnumpy(), np.array(2, np.int32)) | |||
| with pytest.raises(TypeError): | |||
| int_.default_input = 1.2 | |||
| int_.set_data(1.2) | |||
| # Tensor | |||
| fp32 = Tensor(0.5, mstype.float32) | |||
| int32 = Tensor(2, mstype.int32) | |||
| @@ -175,59 +175,59 @@ def test_scalar_parameter_update(): | |||
| bool_ = Tensor(np.array(True, dtype=np.bool_)) | |||
| # updata_by_tensor | |||
| fp32_p = Parameter(fp32, 'fp32') | |||
| fp32_p.default_input = 0.8 | |||
| fp32_p.default_input = 1 | |||
| fp32_p.default_input = int32 | |||
| fp32_p.default_input = fp32 | |||
| fp32_p.default_input = int16 | |||
| fp32_p.default_input = fp16 | |||
| fp32_p.default_input = bool_ | |||
| fp32_p.set_data(0.8) | |||
| fp32_p.set_data(1) | |||
| fp32_p.set_data(int32) | |||
| fp32_p.set_data(fp32) | |||
| fp32_p.set_data(int16) | |||
| fp32_p.set_data(fp16) | |||
| fp32_p.set_data(bool_) | |||
| # updata_by_tensor | |||
| fp16_p = Parameter(fp16, 'fp16') | |||
| with pytest.raises(TypeError): | |||
| fp16_p.default_input = fp32 | |||
| fp16_p.set_data(fp32) | |||
| def test_parameter_lazy_init(): | |||
| # support lazy init in SEMI_AUTO_PARALLEL mode | |||
| context.reset_auto_parallel_context() | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8) | |||
| # Call init_data() without set default_input. | |||
| # Call init_data() without set set_data. | |||
| para = Parameter(initializer('ones', [1, 2, 3], mstype.float32), 'test1') | |||
| assert not isinstance(para.default_input, Tensor) | |||
| assert not isinstance(para.data, Tensor) | |||
| para = para.init_data() | |||
| assert isinstance(para.default_input, Tensor) | |||
| assert np.array_equal(para.default_input.asnumpy(), np.ones((1, 2, 3))) | |||
| assert isinstance(para.data, Tensor) | |||
| assert np.array_equal(para.data.asnumpy(), np.ones((1, 2, 3))) | |||
| # Call init_data() after default_input is set. | |||
| # Call init_data() after set_data is set. | |||
| para = Parameter(initializer('ones', [1, 2, 3], mstype.float32), 'test2') | |||
| assert not isinstance(para.default_input, Tensor) | |||
| assert not isinstance(para.data, Tensor) | |||
| # expect type error when not init | |||
| with pytest.raises(TypeError): | |||
| para.default_input = Tensor(np.zeros((1, 2, 3))) | |||
| para.set_data(Tensor(np.zeros((1, 2, 3)))) | |||
| # init then assign | |||
| para = para.init_data() | |||
| # check the type | |||
| with pytest.raises(TypeError): | |||
| para.default_input = Tensor(np.zeros((1, 2, 3))) | |||
| para.set_data(Tensor(np.zeros((1, 2, 3)))) | |||
| # check the shape | |||
| with pytest.raises(ValueError): | |||
| para.default_input = Tensor(np.zeros((1, 2))) | |||
| para.set_data(Tensor(np.zeros((1, 2)))) | |||
| # expect change ok | |||
| para.default_input = Tensor(np.zeros((1, 2, 3)).astype(np.float32)) | |||
| assert np.array_equal(para.default_input.asnumpy(), np.zeros((1, 2, 3))) | |||
| para.default_input = initializer('ones', [1, 2, 3], mstype.float32) | |||
| assert isinstance(para.default_input, Tensor) | |||
| para.set_data(Tensor(np.zeros((1, 2, 3)).astype(np.float32))) | |||
| assert np.array_equal(para.data.asnumpy(), np.zeros((1, 2, 3))) | |||
| para.set_data(initializer('ones', [1, 2, 3], mstype.float32)) | |||
| assert isinstance(para.data, Tensor) | |||
| # same object and has inited | |||
| assert np.array_equal(para.default_input.asnumpy(), np.ones((1, 2, 3))) | |||
| assert np.array_equal(para.data.asnumpy(), np.ones((1, 2, 3))) | |||
| # expect no effect. | |||
| para.init_data() | |||
| assert np.array_equal(para.default_input.asnumpy(), np.ones((1, 2, 3))) | |||
| para.set_parameter_data(Tensor(np.zeros((1, 2)).astype(np.float32)), slice_shape=True) | |||
| assert np.array_equal(para.default_input.asnumpy(), np.zeros((1, 2))) | |||
| para.set_parameter_data(initializer('ones', [1, 2], mstype.float32), slice_shape=True) | |||
| assert np.array_equal(para.default_input.asnumpy(), np.ones((1, 2))) | |||
| assert np.array_equal(para.data.asnumpy(), np.ones((1, 2, 3))) | |||
| para.set_data(Tensor(np.zeros((1, 2)).astype(np.float32)), slice_shape=True) | |||
| assert np.array_equal(para.data.asnumpy(), np.zeros((1, 2))) | |||
| para.set_data(initializer('ones', [1, 2], mstype.float32), slice_shape=True) | |||
| assert np.array_equal(para.data.asnumpy(), np.ones((1, 2))) | |||
| context.reset_auto_parallel_context() | |||
| @@ -243,7 +243,7 @@ def test_parameter_as_output(): | |||
| self.updated = updated | |||
| self.p = Parameter(self.initial, name="weight") | |||
| self.new_p = self.p.init_data() | |||
| self.new_p.set_parameter_data(self.updated) | |||
| self.new_p.set_data(self.updated) | |||
| def construct(self): | |||
| return self.new_p | |||
| @@ -217,7 +217,7 @@ def test_onnx_export_load_run(name, net, inp): | |||
| print(outputs[0]) | |||
| # overwrite default weight to run model | |||
| for item in net.trainable_params(): | |||
| default_value = item.default_input.asnumpy() | |||
| default_value = item.data.asnumpy() | |||
| input_map[item.name] = np.ones(default_value.shape, dtype=default_value.dtype) | |||
| outputs = ort_session.run(None, input_map) | |||
| print(outputs[0]) | |||
| @@ -201,7 +201,7 @@ def test_checkpoint_cb_for_save_op_update_net(): | |||
| net = Net() | |||
| _set_cur_net(net) | |||
| _checkpoint_cb_for_save_op(parameter_list) | |||
| assert net.conv.weight.default_input.asnumpy()[0][0][0][0] == 1 | |||
| assert net.conv.weight.data.asnumpy()[0][0][0][0] == 1 | |||
| def test_internal_callback_param(): | |||
| @@ -203,7 +203,7 @@ def test_load_param_into_net_error_dict(): | |||
| def test_load_param_into_net_erro_dict_param(): | |||
| net = Net(10) | |||
| net.init_parameters_data() | |||
| assert net.conv1.weight.default_input.asnumpy()[0][0][0][0] == 0 | |||
| assert net.conv1.weight.data.asnumpy()[0][0][0][0] == 0 | |||
| parameter_dict = {} | |||
| one_param = '' | |||
| @@ -216,7 +216,7 @@ def test_load_param_into_net_has_more_param(): | |||
| """ test_load_param_into_net_has_more_param """ | |||
| net = Net(10) | |||
| net.init_parameters_data() | |||
| assert net.conv1.weight.default_input.asnumpy()[0][0][0][0] == 0 | |||
| assert net.conv1.weight.data.asnumpy()[0][0][0][0] == 0 | |||
| parameter_dict = {} | |||
| one_param = Parameter(Tensor(np.ones(shape=(64, 3, 7, 7)), dtype=mstype.float32), | |||
| @@ -226,13 +226,13 @@ def test_load_param_into_net_has_more_param(): | |||
| name="conv1.weight") | |||
| parameter_dict["conv1.w"] = two_param | |||
| load_param_into_net(net, parameter_dict) | |||
| assert net.conv1.weight.default_input.asnumpy()[0][0][0][0] == 1 | |||
| assert net.conv1.weight.data.asnumpy()[0][0][0][0] == 1 | |||
| def test_load_param_into_net_param_type_and_shape_error(): | |||
| net = Net(10) | |||
| net.init_parameters_data() | |||
| assert net.conv1.weight.default_input.asnumpy()[0][0][0][0] == 0 | |||
| assert net.conv1.weight.data.asnumpy()[0][0][0][0] == 0 | |||
| parameter_dict = {} | |||
| one_param = Parameter(Tensor(np.ones(shape=(64, 3, 7, 7))), name="conv1.weight") | |||
| @@ -244,7 +244,7 @@ def test_load_param_into_net_param_type_and_shape_error(): | |||
| def test_load_param_into_net_param_type_error(): | |||
| net = Net(10) | |||
| net.init_parameters_data() | |||
| assert net.conv1.weight.default_input.asnumpy()[0][0][0][0] == 0 | |||
| assert net.conv1.weight.data.asnumpy()[0][0][0][0] == 0 | |||
| parameter_dict = {} | |||
| one_param = Parameter(Tensor(np.ones(shape=(64, 3, 7, 7)), dtype=mstype.int32), | |||
| @@ -257,7 +257,7 @@ def test_load_param_into_net_param_type_error(): | |||
| def test_load_param_into_net_param_shape_error(): | |||
| net = Net(10) | |||
| net.init_parameters_data() | |||
| assert net.conv1.weight.default_input.asnumpy()[0][0][0][0] == 0 | |||
| assert net.conv1.weight.data.asnumpy()[0][0][0][0] == 0 | |||
| parameter_dict = {} | |||
| one_param = Parameter(Tensor(np.ones(shape=(64, 3, 7,)), dtype=mstype.int32), | |||
| @@ -270,14 +270,14 @@ def test_load_param_into_net_param_shape_error(): | |||
| def test_load_param_into_net(): | |||
| net = Net(10) | |||
| net.init_parameters_data() | |||
| assert net.conv1.weight.default_input.asnumpy()[0][0][0][0] == 0 | |||
| assert net.conv1.weight.data.asnumpy()[0][0][0][0] == 0 | |||
| parameter_dict = {} | |||
| one_param = Parameter(Tensor(np.ones(shape=(64, 3, 7, 7)), dtype=mstype.float32), | |||
| name="conv1.weight") | |||
| parameter_dict["conv1.weight"] = one_param | |||
| load_param_into_net(net, parameter_dict) | |||
| assert net.conv1.weight.default_input.asnumpy()[0][0][0][0] == 1 | |||
| assert net.conv1.weight.data.asnumpy()[0][0][0][0] == 1 | |||
| def test_save_checkpoint_for_network(): | |||