| @@ -54,11 +54,8 @@ void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph | |||
| } | |||
| auto tensor = node_value->cast<TensorPtr>(); | |||
| MS_EXCEPTION_IF_NULL(tensor); | |||
| TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(item_node, 0); | |||
| if (output_type_id == kTypeUnknown) { | |||
| output_type_id = AnfAlgo::GetOutputInferDataType(item_node, 0); | |||
| } | |||
| size_t type_size = sizeof(TypeIdToType(output_type_id)); | |||
| TypeId output_type_id = AnfAlgo::GetOutputInferDataType(item_node, 0); | |||
| size_t type_size = GetTypeByte(TypeIdToType(output_type_id)); | |||
| ShapeVector data_shape = tensor->shape(); | |||
| size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>()); | |||
| DeviceAddressPtr address = nullptr; | |||
| @@ -245,7 +242,7 @@ void CPUKernelRuntime::BindInputTensorAddressPtr(const session::KernelGraph &ker | |||
| if (tensor_address != nullptr && tensor_address != address) { | |||
| tensor->data_sync(false); | |||
| } | |||
| if (tensor->data_type() == address->type_id_) { | |||
| if (GetTypeByte(TypeIdToType(tensor->data_type())) == GetTypeByte(TypeIdToType(address->type_id_))) { | |||
| address->ptr_ = tensor->data_c(); | |||
| } else { | |||
| ShapeVector data_shape = tensor->shape(); | |||
| @@ -210,7 +210,7 @@ bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr, | |||
| const std::vector<KernelAttr> &kernel_attrs, const std::vector<std::string> &input_formats, | |||
| const std::vector<TypeId> &input_types, const std::vector<size_t> &input_not_cnode_indexes, | |||
| const std::vector<std::string> &infer_output_formats, const std::vector<TypeId> &infer_output_types, | |||
| bool strict) { | |||
| std::pair<bool, bool> *matched, bool strict) { | |||
| int max_type_matched_num = -1; | |||
| int max_format_matched_num = -1; | |||
| for (auto kernel_attr : kernel_attrs) { | |||
| @@ -244,10 +244,13 @@ bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr, | |||
| } | |||
| // All formats and data types matched | |||
| if (max_type_matched_num == SizeToInt(input_types.size()) && | |||
| max_format_matched_num == SizeToInt(input_types.size()) && | |||
| output_type_format_matched_num.first == SizeToInt(infer_output_types.size()) && | |||
| output_type_format_matched_num.second == SizeToInt(infer_output_types.size())) { | |||
| return true; | |||
| max_format_matched_num == SizeToInt(input_types.size())) { | |||
| matched->first = true; | |||
| if (output_type_format_matched_num.first == SizeToInt(infer_output_types.size()) && | |||
| output_type_format_matched_num.second == SizeToInt(infer_output_types.size())) { | |||
| matched->second = true; | |||
| return true; | |||
| } | |||
| } | |||
| } | |||
| return false; | |||
| @@ -261,22 +264,23 @@ void SetKernelInfo(const CNodePtr &kernel_node) { | |||
| std::vector<std::string> infer_output_formats; | |||
| std::vector<TypeId> infer_output_types; | |||
| MS_LOG(INFO) << "SetKernelInfo, CNode Name: " << AnfAlgo::GetCNodeName(kernel_node); | |||
| GetInputFormatsAndDtypes(kernel_node, &input_formats, &input_types, &input_not_cnode_indexes); | |||
| GetOutputInferFormatsAndDtypes(kernel_node, &infer_output_formats, &infer_output_types); | |||
| auto kernel_attrs = | |||
| kernel::CPUKernelFactory::GetInstance().GetSupportedKernelAttrList(AnfAlgo::GetCNodeName(kernel_node)); | |||
| if (kernel_attrs.empty()) { | |||
| MS_LOG(EXCEPTION) << "Operator[" << AnfAlgo::GetCNodeName(kernel_node) << "] is not support."; | |||
| } | |||
| GetInputFormatsAndDtypes(kernel_node, &input_formats, &input_types, &input_not_cnode_indexes); | |||
| GetOutputInferFormatsAndDtypes(kernel_node, &infer_output_formats, &infer_output_types); | |||
| KernelAttr selected_kernel_attr; | |||
| bool matched = true; | |||
| std::pair<bool, bool> matched = std::make_pair(false, false); | |||
| if (!SelectKernel(kernel_node, &selected_kernel_attr, kernel_attrs, input_formats, input_types, | |||
| input_not_cnode_indexes, infer_output_formats, infer_output_types, true)) { | |||
| matched = SelectKernel(kernel_node, &selected_kernel_attr, kernel_attrs, input_formats, input_types, | |||
| input_not_cnode_indexes, infer_output_formats, infer_output_types, false); | |||
| input_not_cnode_indexes, infer_output_formats, infer_output_types, &matched, true)) { | |||
| matched = std::make_pair(false, false); | |||
| SelectKernel(kernel_node, &selected_kernel_attr, kernel_attrs, input_formats, input_types, input_not_cnode_indexes, | |||
| infer_output_formats, infer_output_types, &matched, false); | |||
| } | |||
| if (selected_kernel_attr.GetInputSize() > 0 && (matched || input_types.size() == input_not_cnode_indexes.size())) { | |||
| if (selected_kernel_attr.GetInputSize() > 0 && | |||
| (matched.first || input_types.size() == input_not_cnode_indexes.size())) { | |||
| MS_LOG(INFO) << "Input format and dtype is matched"; | |||
| GetOutputFormatsAndDtypes(kernel_node, selected_kernel_attr, &output_formats, &output_types); | |||
| UpdatePrevNotCNodeFormatDtype(selected_kernel_attr, input_not_cnode_indexes, kernel_node); | |||
| @@ -0,0 +1,118 @@ | |||
| import numpy as np | |||
| import pytest | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.ops import operations as P | |||
| import mindspore.context as context | |||
| from mindspore.nn import Dense | |||
| from mindspore.nn import TrainOneStepCell, WithLossCell | |||
| from mindspore.nn.optim import Momentum | |||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||
| class Net(nn.Cell): | |||
| def __init__(self): | |||
| super(Net, self).__init__() | |||
| self.bias_add = P.BiasAdd() | |||
| self.bias_add1 = P.BiasAdd() | |||
| def construct(self, x, b, c): | |||
| return self.bias_add1(self.bias_add(x, b), c) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_bias_add1(): | |||
| x = np.ones([2, 2]).astype(np.float16) | |||
| b = np.array([1, 1]).astype(np.float16) | |||
| c = np.array([1, 1]).astype(np.float16) | |||
| bias_add = Net() | |||
| output = bias_add(Tensor(x), Tensor(b), Tensor(c)) | |||
| expect_output = np.ones([2, 2]).astype(np.float16) * 3 | |||
| assert np.all(output.asnumpy() == expect_output) | |||
| class Net1(nn.Cell): | |||
| def __init__(self): | |||
| super(Net1, self).__init__() | |||
| self.bias_add = P.BiasAdd() | |||
| self.mul = P.Mul() | |||
| def construct(self, x, a, b): | |||
| p1 = self.bias_add(x, b) | |||
| p2 = self.bias_add(x, a) | |||
| p3 = self.mul(p1, p2) | |||
| return p3 | |||
| class Net2(nn.Cell): | |||
| def __init__(self): | |||
| super(Net2, self).__init__() | |||
| self.bias_add = P.BiasAdd() | |||
| self.bias_add1 = P.BiasAdd() | |||
| def construct(self, x, b, c): | |||
| return self.bias_add1(self.bias_add(x, b), c) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_bias_add2(): | |||
| x = np.ones([2, 2]).astype(np.float32) | |||
| a = np.array([1, 1]).astype(np.float32) | |||
| b = np.array([1, 1]).astype(np.float32) | |||
| c = np.array([1, 1]).astype(np.float32) | |||
| bias_add = Net1() | |||
| output = bias_add(Tensor(x), Tensor(a), Tensor(b)) | |||
| print(output) | |||
| net2 = Net2() | |||
| output2 = net2(Tensor(x), Tensor(b), Tensor(c)) | |||
| print(output2) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="CPU") | |||
| class MomentumNet(nn.Cell): | |||
| def __init__(self): | |||
| super(MomentumNet, self).__init__() | |||
| self.batch_size = 1 | |||
| self.reshape = P.Reshape() | |||
| weight = Tensor(np.ones([10, 16]).astype(np.float32) * 0.01) | |||
| self.fc1 = Dense(16, 10, weight_init=weight) | |||
| def construct(self, input_x): | |||
| output = self.reshape(input_x, (self.batch_size, -1)) | |||
| output = self.fc1(output) | |||
| return output | |||
| @pytest.mark.level0 | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_onecard | |||
| def test_momentum(): | |||
| epoch = 1 | |||
| net = MomentumNet() | |||
| learning_rate = (0.1, 0.2) | |||
| momentum = 0.9 | |||
| optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) | |||
| criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') | |||
| net_with_criterion = WithLossCell(net, criterion) | |||
| train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer | |||
| train_network.set_train() | |||
| losses = [] | |||
| for _ in range(epoch): | |||
| data = Tensor(np.arange(0, 16).reshape(1, 1, 4, 4).astype(np.float32) * 0.01) | |||
| label = Tensor(np.array([0]).astype(np.int32)) | |||
| loss = train_network(data, label) | |||
| losses.append(loss) | |||
| print("================================") | |||
| print(losses) | |||
| return losses | |||