Merge pull request !3202 from kisnwang/cpu-conv2d-support-diff-kernel-sizetags/v0.6.0-beta
| @@ -32,8 +32,6 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); | |||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||
| int kernel_size = SizeToInt(weight_shape[3]); | |||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE); | |||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION); | |||
| if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) { | |||
| @@ -57,6 +55,7 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| std::vector<int> int_padding_r; | |||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE); | |||
| std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]}); | |||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); | |||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "get padding failed"; | |||
| @@ -32,8 +32,6 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); | |||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||
| int kernel_size = SizeToInt(weight_shape[3]); | |||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE); | |||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION); | |||
| if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { | |||
| @@ -53,6 +51,7 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE); | |||
| std::vector<int> int_padding_l; | |||
| std::vector<int> int_padding_r; | |||
| std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]}); | |||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); | |||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "get padding failed"; | |||
| @@ -33,7 +33,6 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); | |||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||
| int kernel_size = SizeToInt(weight_shape[3]); | |||
| auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDE); | |||
| auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, DILATION); | |||
| if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { | |||
| @@ -52,6 +51,7 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| std::vector<int> int_padding_l; | |||
| std::vector<int> int_padding_r; | |||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PAD_MODE); | |||
| std::vector<size_t> kernel_size({weight_shape[2], weight_shape[3]}); | |||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); | |||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "conv2d grad get padding failed"; | |||
| @@ -23,7 +23,7 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, | |||
| const std::vector<size_t> &src_shape, int kernel_size, int stride, | |||
| const std::vector<size_t> &src_shape, const std::vector<size_t> &kernel_size, int stride, | |||
| std::vector<int> *padding_l, std::vector<int> *padding_r) { | |||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||
| if (src_shape.size() < 2) { | |||
| @@ -32,11 +32,13 @@ void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pa | |||
| std::vector<int> weight_height; | |||
| weight_height.emplace_back(src_shape[src_shape.size() - 2]); | |||
| weight_height.emplace_back(src_shape[src_shape.size() - 1]); | |||
| int rad = kernel_size / 2; | |||
| int need_pad = kernel_size - 1; | |||
| MS_LOG(INFO) << "pad mode " << pad_mode; | |||
| if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) { | |||
| for (auto wh : weight_height) { | |||
| for (size_t i = 0; i < weight_height.size(); ++i) { | |||
| auto wh = weight_height[i]; | |||
| int rad = kernel_size[i] / 2; | |||
| int need_pad = kernel_size[i] - 1; | |||
| int re = (wh - 1) % stride; | |||
| int pad = std::max(rad - (re / 2), 0); | |||
| padding_r->emplace_back(pad); | |||
| @@ -33,7 +33,8 @@ class MKLCPUKernel : public CPUKernel { | |||
| protected: | |||
| void GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector<size_t> &src_shape, | |||
| int kernel_size, int stride, std::vector<int> *padding_l, std::vector<int> *padding_r); | |||
| const std::vector<size_t> &kernel_size, int stride, std::vector<int> *padding_l, | |||
| std::vector<int> *padding_r); | |||
| void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false); | |||
| void SetArgumentHandle(int arg_key, void *ptr); | |||
| dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const; | |||
| @@ -28,17 +28,18 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| std::vector<size_t> dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); | |||
| dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); | |||
| dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); | |||
| std::vector<int> kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE); | |||
| std::vector<int> origin_kernel_sizes = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, KSIZE); | |||
| std::vector<int> strides = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, STRIDES); | |||
| if (kernel_sizes.size() != 4 || strides.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "invalid kernel size " << kernel_sizes.size() << " or stride size " << strides.size(); | |||
| if (origin_kernel_sizes.size() != 4 || strides.size() != 4) { | |||
| MS_LOG(EXCEPTION) << "invalid kernel size " << origin_kernel_sizes.size() << " or stride size " << strides.size(); | |||
| } | |||
| dnnl::memory::dims strides_dims{strides[2], strides[3]}; | |||
| dnnl::memory::dims kernels_dims{kernel_sizes[2], kernel_sizes[3]}; | |||
| dnnl::memory::dims kernels_dims{origin_kernel_sizes[2], origin_kernel_sizes[3]}; | |||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING); | |||
| std::vector<int> int_padding_l; | |||
| std::vector<int> int_padding_r; | |||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_sizes[3], strides[3], &int_padding_l, &int_padding_r); | |||
| std::vector<size_t> kernel_size({IntToSize(origin_kernel_sizes[2]), IntToSize(origin_kernel_sizes[3])}); | |||
| GetPadding(kernel_node, pad_mode, src_shape, kernel_size, strides[3], &int_padding_l, &int_padding_r); | |||
| if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { | |||
| MS_LOG(EXCEPTION) << "pooling get padding failed"; | |||
| } | |||
| @@ -34,7 +34,7 @@ void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { | |||
| } | |||
| std::vector<int> padding_r; | |||
| const std::string pad_mode = AnfAlgo::GetNodeAttr<std::string>(kernel_node, PADDING); | |||
| kernel_size_ = kernel_sizes[3]; | |||
| kernel_size_ = {IntToSize(kernel_sizes[2]), IntToSize(kernel_sizes[3])}; | |||
| stride_ = strides[3]; | |||
| GetPadding(kernel_node, pad_mode, src_shape_, kernel_size_, stride_, &padding_l_, &padding_r); | |||
| } | |||
| @@ -77,7 +77,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d | |||
| size_t diff_index = 0; | |||
| for (size_t h = 0; h < dst_shape_[2]; ++h) { | |||
| box[0].first = IntToSize(std::max(h_start, 0)); | |||
| box[0].second = IntToSize(std::min(h_start + kernel_size_, src_height)); | |||
| box[0].second = IntToSize(std::min(h_start + SizeToInt(kernel_size_[1]), src_height)); | |||
| for (size_t w = 0; w < src_shape_[3]; ++w) { | |||
| row_max_pair[w].first = 0; | |||
| row_max_pair[w].second = 0; | |||
| @@ -85,7 +85,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d | |||
| int w_start = -padding_l_[1]; | |||
| for (size_t w = 0; w < dst_shape_[3]; ++w) { | |||
| box[1].first = IntToSize(std::max(w_start, 0)); | |||
| box[1].second = IntToSize(std::min(w_start + kernel_size_, src_width)); | |||
| box[1].second = IntToSize(std::min(w_start + SizeToInt(kernel_size_[0]), src_width)); | |||
| RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair); | |||
| diff_index += 1; | |||
| w_start += stride_; | |||
| @@ -37,7 +37,8 @@ class PoolingGradCPUKernel : public MKLCPUKernel { | |||
| void RowPoolingGrad(const float *input, float *output, float diff, const std::vector<std::pair<size_t, size_t>> &box, | |||
| std::vector<std::pair<size_t, float>> *row_max_pair); | |||
| void ChannelPoolingGrad(const float *input, const float *diff, float *output); | |||
| int stride_{0}, kernel_size_{0}; | |||
| int stride_{0}; | |||
| std::vector<size_t> kernel_size_; | |||
| std::vector<int> padding_l_; | |||
| std::vector<size_t> src_shape_; | |||
| std::vector<size_t> dst_shape_; | |||
| @@ -36,23 +36,6 @@ namespace mindspore { | |||
| namespace device { | |||
| namespace cpu { | |||
| const size_t INIT_NODE_REF = 1; | |||
| namespace { | |||
| TypeId GetCPUSupportOutputTypeId(const TypeId type_id) { | |||
| TypeId support_type_id = type_id; | |||
| if (type_id == kNumberTypeUInt32) { | |||
| support_type_id = kNumberTypeInt32; | |||
| } | |||
| if (type_id == kNumberTypeFloat || type_id == kNumberTypeFloat16 || type_id == kNumberTypeFloat32 || | |||
| type_id == kNumberTypeFloat64) { | |||
| support_type_id = kNumberTypeFloat32; | |||
| } | |||
| if (support_type_id != kNumberTypeInt32 && support_type_id != kNumberTypeFloat32) { | |||
| MS_LOG(EXCEPTION) << "Check output type failed."; | |||
| } | |||
| return support_type_id; | |||
| } | |||
| } // namespace | |||
| void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) { | |||
| AssignValueNodeAddress(kernel_graph); | |||
| AssignInputNodeAddress(kernel_graph); | |||
| @@ -157,15 +140,25 @@ tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(const CNodePtr &node, s | |||
| auto shape = AnfAlgo::GetOutputInferShape(node, index); | |||
| std::vector<int> temp_shape; | |||
| (void)temp_shape.insert(temp_shape.end(), shape.begin(), shape.end()); | |||
| TypeId type_id = AnfAlgo::GetOutputInferDataType(node, index); | |||
| type_id = GetCPUSupportOutputTypeId(type_id); | |||
| tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(type_id, temp_shape); | |||
| TypeId infer_type_id = AnfAlgo::GetOutputInferDataType(node, index); | |||
| TypeId device_type_id = AnfAlgo::GetOutputDeviceDataType(node, index); | |||
| tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(infer_type_id, temp_shape); | |||
| MS_EXCEPTION_IF_NULL(tensor); | |||
| if (bound_addresses->find(address) != bound_addresses->end()) { | |||
| tensor->set_device_address(address); | |||
| need_sync_outputs->emplace_back(tensor); | |||
| } else { | |||
| address->ptr_ = tensor->data_c(); | |||
| if (infer_type_id != device_type_id) { | |||
| size_t type_size = GetTypeByte(TypeIdToType(device_type_id)); | |||
| std::vector<int> data_shape = tensor->shape(); | |||
| size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>()); | |||
| address->ptr_ = resource_manager_.MemMalloc(tensor_size); | |||
| need_sync_outputs->emplace_back(tensor); | |||
| tensor->set_device_address(address); | |||
| need_sync_outputs->emplace_back(tensor); | |||
| } else { | |||
| address->ptr_ = tensor->data_c(); | |||
| } | |||
| address->ref_count_ = INIT_NODE_REF; | |||
| (void)bound_addresses->insert(address); | |||
| } | |||
| @@ -226,12 +219,13 @@ void CPUKernelRuntime::BindInputOutput(const session::KernelGraph *kernel_graph, | |||
| if (tensor_address != nullptr && tensor_address != address) { | |||
| (void)tensor->data_sync(); | |||
| } | |||
| std::vector<int> data_shape = tensor->shape(); | |||
| size_t tensor_size = | |||
| std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies<size_t>()); | |||
| if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) { | |||
| address->ptr_ = tensor->data_c(); | |||
| } else { | |||
| std::vector<int> data_shape = tensor->shape(); | |||
| size_t tensor_size = | |||
| std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies<size_t>()); | |||
| address->ptr_ = resource_manager_.MemMalloc(tensor_size); | |||
| if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(), | |||
| tensor->data_c())) { | |||
| @@ -141,7 +141,11 @@ void SetKernelInfo(const CNodePtr &kernel_node) { | |||
| if (kernel_attr.GetAllSame()) { | |||
| ExpandKernelAttr(kernel_node, &kernel_attr); | |||
| } | |||
| if (IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) { | |||
| bool ignore_check = false; | |||
| if (index == kernel_attrs.size() - 1 && input_types.size() == input_not_cnode_indexes.size()) { | |||
| ignore_check = true; | |||
| } | |||
| if (ignore_check || IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) { | |||
| size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); | |||
| if (kernel_attr.GetOutputSize() != output_num) { | |||
| MS_LOG(DEBUG) << "Output num is not equal!"; | |||
| @@ -222,7 +222,7 @@ std::string GetCNodeTarget(const AnfNodePtr &node) { | |||
| } | |||
| auto target = GetValue<std::string>(att_target); | |||
| if (kTargetSet.find(target) == kTargetSet.end()) { | |||
| MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target"; | |||
| MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target, but get " << target; | |||
| } | |||
| return target; | |||
| } | |||