From: @wangyanling10 Reviewed-by: Signed-off-by:pull/13559/MERGE
| @@ -0,0 +1,121 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| template <typename T> | |||||
| void BroadcastToCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) { | |||||
| MS_EXCEPTION_IF_NULL(kernel_node); | |||||
| input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); | |||||
| output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0); | |||||
| size_t offset = output_shape_.size() - input_shape_.size(); | |||||
| for (size_t i = 0; i < offset; ++i) { | |||||
| input_shape_.insert(input_shape_.begin(), 1); | |||||
| } | |||||
| for (size_t i = 0; i < input_shape_.size(); ++i) { | |||||
| if (output_shape_[i] < input_shape_[i] || output_shape_[i] % input_shape_[i] != 0) { | |||||
| MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_ << " to " | |||||
| << "output tensor with shape " << output_shape_ | |||||
| << ". Output shape must be the integer times of input shape at the " << i << " dim!"; | |||||
| } | |||||
| } | |||||
| for (size_t j = 0; j < output_shape_.size(); j++) { | |||||
| nums_ *= output_shape_[j]; | |||||
| } | |||||
| tmp_ptr_ = reinterpret_cast<T *>(malloc(nums_ * sizeof(T))); | |||||
| } | |||||
| // BroadcastTo | |||||
| template <typename T> | |||||
| void BroadcastToCPUKernel<T>::BroadcastToImpl(size_t dim) { | |||||
| if (dim == output_shape_.size() - 1) { | |||||
| size_t input_nums = 1; | |||||
| for (size_t j = 0; j < input_shape_.size() - 1; ++j) { | |||||
| input_nums *= input_shape_[j]; | |||||
| } | |||||
| size_t rate = output_shape_[dim] / input_shape_[dim]; | |||||
| for (size_t j = 0; j < input_nums; ++j) { | |||||
| T *in_ptr = input_ptr_ + input_shape_[dim] * j; | |||||
| for (size_t i = 0; i < rate; ++i) { | |||||
| T *out_ptr = tmp_ptr_ + (j * rate + i) * input_shape_[dim]; | |||||
| memcpy_s(out_ptr, input_shape_[dim] * sizeof(T), in_ptr, input_shape_[dim] * sizeof(T)); | |||||
| } | |||||
| } | |||||
| size_t elems = input_shape_[dim] * rate * input_nums; | |||||
| memcpy_s(output_ptr_, elems * sizeof(T), tmp_ptr_, elems * sizeof(T)); | |||||
| return; | |||||
| } | |||||
| BroadcastToImpl(dim + 1); | |||||
| size_t rate = output_shape_[dim] / input_shape_[dim]; | |||||
| if (rate > 1) { | |||||
| size_t elems_nums = 1; | |||||
| for (size_t j = output_shape_.size() - 1; j > dim; --j) { | |||||
| elems_nums *= output_shape_[j]; | |||||
| } | |||||
| size_t input_nums = 1; | |||||
| for (size_t j = 0; j < dim; ++j) { | |||||
| input_nums *= input_shape_[j]; | |||||
| } | |||||
| for (size_t j = 0; j < input_nums; ++j) { | |||||
| T *in_ptr = output_ptr_ + elems_nums * j; | |||||
| for (size_t i = 0; i < rate; ++i) { | |||||
| T *out_ptr = tmp_ptr_ + (j * rate + i) * elems_nums; | |||||
| memcpy_s(out_ptr, elems_nums * sizeof(T), in_ptr, elems_nums * sizeof(T)); | |||||
| } | |||||
| } | |||||
| size_t elems = elems_nums * rate * input_nums; | |||||
| memcpy_s(output_ptr_, elems * sizeof(T), tmp_ptr_, elems * sizeof(T)); | |||||
| } | |||||
| } | |||||
| template <typename T> | |||||
| bool BroadcastToCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||||
| const std::vector<AddressPtr> &outputs) { | |||||
| if (inputs.size() != 1 || outputs.size() != 1) { | |||||
| MS_LOG(EXCEPTION) << "Wrong number of inputs or outputs!"; | |||||
| return false; | |||||
| } | |||||
| if ((inputs[0] == nullptr) || (inputs[0]->size == 0)) { | |||||
| MS_LOG(EXCEPTION) << "Input data is NULL!"; | |||||
| return false; | |||||
| } | |||||
| if ((outputs[0] == nullptr) || (outputs[0]->size == 0)) { | |||||
| MS_LOG(EXCEPTION) << "Output data is NULL!"; | |||||
| return false; | |||||
| } | |||||
| input_ptr_ = reinterpret_cast<T *>(inputs[0]->addr); | |||||
| output_ptr_ = reinterpret_cast<T *>(outputs[0]->addr); | |||||
| BroadcastToImpl(0); | |||||
| return true; | |||||
| } | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| @@ -0,0 +1,65 @@ | |||||
| /** | |||||
| * Copyright 2021Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_BROADCAST_TO_CPU_KERNEL_H | |||||
| #define MINDSPORE_BROADCAST_TO_CPU_KERNEL_H | |||||
| #include <vector> | |||||
| #include <memory> | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel.h" | |||||
| #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" | |||||
| namespace mindspore { | |||||
| namespace kernel { | |||||
| template <typename T> | |||||
| class BroadcastToCPUKernel : public CPUKernel { | |||||
| public: | |||||
| BroadcastToCPUKernel() = default; | |||||
| ~BroadcastToCPUKernel() override { | |||||
| if (tmp_ptr_ != nullptr) { | |||||
| free(tmp_ptr_); | |||||
| tmp_ptr_ = nullptr; | |||||
| } | |||||
| }; | |||||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||||
| const std::vector<AddressPtr> &outputs) override; | |||||
| void InitKernel(const CNodePtr &kernel_node) override; | |||||
| void BroadcastToImpl(size_t dim); | |||||
| size_t Index(const size_t &index, const size_t &dim) { return dim == 1 ? 0 : index; } | |||||
| private: | |||||
| std::vector<size_t> input_shape_; | |||||
| std::vector<size_t> output_shape_; | |||||
| size_t nums_{1}; | |||||
| T *input_ptr_{nullptr}; | |||||
| T *output_ptr_{nullptr}; | |||||
| T *tmp_ptr_{nullptr}; | |||||
| }; | |||||
| MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||||
| BroadcastToCPUKernel<float>); | |||||
| MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||||
| BroadcastToCPUKernel<int>); | |||||
| MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), | |||||
| BroadcastToCPUKernel<bool>); | |||||
| } // namespace kernel | |||||
| } // namespace mindspore | |||||
| #endif // MINDSPORE_BROADCAST_TO_CPU_KERNEL_H | |||||
| @@ -118,7 +118,7 @@ class SequentialCell(Cell): | |||||
| TypeError: If the type of the `args` is not list or OrderedDict. | TypeError: If the type of the `args` is not list or OrderedDict. | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> conv = nn.Conv2d(3, 2, 3, pad_mode='valid', weight_init="ones") | >>> conv = nn.Conv2d(3, 2, 3, pad_mode='valid', weight_init="ones") | ||||
| @@ -555,7 +555,7 @@ class Conv2dTranspose(_Conv): | |||||
| ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). | ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0). | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad') | >>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad') | ||||
| @@ -740,7 +740,7 @@ class Conv1dTranspose(_Conv): | |||||
| ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. | ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> net = nn.Conv1dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad') | >>> net = nn.Conv1dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad') | ||||
| @@ -81,7 +81,7 @@ class Embedding(Cell): | |||||
| ValueError: If `padding_idx` is an int which not in range [0, `vocab_size`]. | ValueError: If `padding_idx` is an int which not in range [0, `vocab_size`]. | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> net = nn.Embedding(20000, 768, True) | >>> net = nn.Embedding(20000, 768, True) | ||||
| @@ -226,7 +226,7 @@ class SSIM(Cell): | |||||
| ValueError: If `filter_size` is less than 0. | ValueError: If `filter_size` is less than 0. | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> net = nn.SSIM() | >>> net = nn.SSIM() | ||||
| @@ -417,7 +417,7 @@ class PSNR(Cell): | |||||
| ValueError: If length of shape of `img1` or `img2` is not equal to 4. | ValueError: If length of shape of `img1` or `img2` is not equal to 4. | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> net = nn.PSNR() | >>> net = nn.PSNR() | ||||
| @@ -78,7 +78,7 @@ class ReduceLogSumExp(Cell): | |||||
| TypeError: If dtype of `x` is neither float16 nor float32. | TypeError: If dtype of `x` is neither float16 nor float32. | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) | >>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32)) | ||||
| @@ -926,7 +926,7 @@ class Moments(Cell): | |||||
| TypeError: If dtype of `input_x` is neither float16 nor float32. | TypeError: If dtype of `input_x` is neither float16 nor float32. | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> net = nn.Moments(axis=3, keep_dims=True) | >>> net = nn.Moments(axis=3, keep_dims=True) | ||||
| @@ -293,7 +293,7 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver): | |||||
| TypeError: If `quant_delay` is not greater than or equal to 0. | TypeError: If `quant_delay` is not greater than or equal to 0. | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> fake_quant = nn.FakeQuantWithMinMaxObserver() | >>> fake_quant = nn.FakeQuantWithMinMaxObserver() | ||||
| @@ -448,7 +448,7 @@ class Conv2dBnFoldQuantOneConv(Cell): | |||||
| ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. | ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'. | ||||
| Supported Platforms: | Supported Platforms: | ||||
| ``Ascend`` ``GPU`` | |||||
| ``Ascend`` ``GPU`` ``CPU`` | |||||
| Examples: | Examples: | ||||
| >>> qconfig = compression.quant.create_quant_config() | >>> qconfig = compression.quant.create_quant_config() | ||||
| @@ -0,0 +1,95 @@ | |||||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| import numpy as np | |||||
| import pytest | |||||
| import mindspore.context as context | |||||
| from mindspore.common.tensor import Tensor | |||||
| from mindspore.ops import operations as P | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.env_onecard | |||||
| def test_broadcast(): | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||||
| shape = (4, 5, 2, 3, 4, 5, 6) | |||||
| x_np = np.random.rand(2, 3, 1, 5, 1).astype(np.float32) | |||||
| output = P.BroadcastTo(shape)(Tensor(x_np)) | |||||
| expect = np.broadcast_to(x_np, shape) | |||||
| assert np.allclose(output.asnumpy(), expect) | |||||
| shape = (3, 4, 5, 6) | |||||
| x_np = np.random.rand(3, 1, 5, 1).astype(np.float32) | |||||
| output = P.BroadcastTo(shape)(Tensor(x_np)) | |||||
| expect = np.broadcast_to(x_np, shape) | |||||
| assert np.allclose(output.asnumpy(), expect) | |||||
| x1_np = np.random.rand(3, 1, 5, 1).astype(np.float16) | |||||
| output = P.BroadcastTo(shape)(Tensor(x1_np)) | |||||
| expect = np.broadcast_to(x1_np, shape) | |||||
| assert np.allclose(output.asnumpy(), expect) | |||||
| shape = (2, 3, 4, 5) | |||||
| x1_np = np.random.rand(4, 5).astype(np.float32) | |||||
| output = P.BroadcastTo(shape)(Tensor(x1_np)) | |||||
| expect = np.broadcast_to(x1_np, shape) | |||||
| assert np.allclose(output.asnumpy(), expect) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.env_onecard | |||||
| def test_broadcast_dyn_init(): | |||||
| """ | |||||
| Test running the op with -1's in the init shape to support varied inputs. | |||||
| """ | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||||
| ms_shape = (-1, 4, 5, 6) | |||||
| np_shape = (3, 4, 5, 6) | |||||
| x_np = np.random.rand(3, 1, 5, 1).astype(np.float32) | |||||
| output = P.BroadcastTo(ms_shape)(Tensor(x_np)) | |||||
| expect = np.broadcast_to(x_np, np_shape) | |||||
| assert np.allclose(output.asnumpy(), expect) | |||||
| x1_np = np.random.rand(3, 1, 5, 1).astype(np.float16) | |||||
| output = P.BroadcastTo(ms_shape)(Tensor(x1_np)) | |||||
| expect = np.broadcast_to(x1_np, np_shape) | |||||
| assert np.allclose(output.asnumpy(), expect) | |||||
| ms_shape = (2, 3, -1, 5) | |||||
| np_shape = (2, 3, 4, 5) | |||||
| x1_np = np.random.rand(4, 5).astype(np.float32) | |||||
| output = P.BroadcastTo(ms_shape)(Tensor(x1_np)) | |||||
| expect = np.broadcast_to(x1_np, np_shape) | |||||
| assert np.allclose(output.asnumpy(), expect) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.env_onecard | |||||
| def test_broadcast_dyn_invalid_init(): | |||||
| """ | |||||
| Test running the op with -1's in the init shape in incorrect positions. | |||||
| Expected to fail. | |||||
| """ | |||||
| context.set_context(mode=context.GRAPH_MODE, device_target='CPU') | |||||
| ms_shape = (2, -1, 4, 5) | |||||
| x_np = np.random.rand(4, 5).astype(np.float32) | |||||
| with pytest.raises(ValueError): | |||||
| P.BroadcastTo(ms_shape)(Tensor(x_np)) | |||||