| @@ -0,0 +1,44 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "backend/kernel_compiler/gpu/arrays/squeeze_gpu_kernel.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), | |||
| SqueezeGpuKernel, half) | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), | |||
| SqueezeGpuKernel, float) | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), | |||
| SqueezeGpuKernel, double) | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8), | |||
| SqueezeGpuKernel, int8_t) | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), | |||
| SqueezeGpuKernel, int16_t) | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), | |||
| SqueezeGpuKernel, int) | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), | |||
| SqueezeGpuKernel, int64_t) | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8), | |||
| SqueezeGpuKernel, uint8_t) | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16), | |||
| SqueezeGpuKernel, uint16_t) | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32), | |||
| SqueezeGpuKernel, uint32_t) | |||
| MS_REG_GPU_KERNEL_ONE(Squeeze, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), | |||
| SqueezeGpuKernel, bool) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,93 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SQUEEZE_GPU_KERNEL_H | |||
| #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SQUEEZE_GPU_KERNEL_H | |||
| #include <functional> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel.h" | |||
| #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| template <typename T> | |||
| class SqueezeGpuKernel : public GpuKernel { | |||
| public: | |||
| SqueezeGpuKernel() { ResetResource(); } | |||
| ~SqueezeGpuKernel() override = default; | |||
| const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; } | |||
| const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; } | |||
| const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; } | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &, | |||
| const std::vector<AddressPtr> &outputs, void *stream_ptr) override { | |||
| T *input = GetDeviceAddress<T>(inputs, 0); | |||
| T *output = GetDeviceAddress<T>(outputs, 0); | |||
| cudaError_t ret = | |||
| cudaMemcpyAsync(output, input, input_size_, cudaMemcpyDeviceToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)); | |||
| if (ret) { | |||
| MS_LOG(ERROR) << "cudaMemcpyAsync error in SqueezeGpuKernel::Launch, error code is " << ret; | |||
| return false; | |||
| } | |||
| return true; | |||
| } | |||
| bool Init(const CNodePtr &kernel_node) override { | |||
| kernel_node_ = kernel_node; | |||
| auto axis = GetAttr<std::vector<int64_t>>(kernel_node, "axis"); | |||
| auto input_shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); | |||
| int64_t dims = SizeToLong(input_shape.size()); | |||
| if (dims == 0) { | |||
| MS_LOG(ERROR) << "Squeeze requires input tensor's dimension can't be 0, but got 0."; | |||
| return false; | |||
| } | |||
| for (const auto i : axis) { | |||
| if (i < -dims || i >= dims) { | |||
| MS_LOG(ERROR) << "Squeeze requires axis should be in [" << -dims << ", " << dims << "), but got " << i << "."; | |||
| return false; | |||
| } | |||
| } | |||
| input_size_ = std::accumulate(input_shape.begin(), input_shape.end(), sizeof(T), std::multiplies<size_t>()); | |||
| InitSizeLists(); | |||
| return true; | |||
| } | |||
| void ResetResource() noexcept override { | |||
| input_size_ = 0; | |||
| input_size_list_.clear(); | |||
| output_size_list_.clear(); | |||
| workspace_size_list_.clear(); | |||
| } | |||
| protected: | |||
| void InitSizeLists() override { | |||
| input_size_list_.push_back(input_size_); | |||
| output_size_list_.push_back(input_size_); | |||
| } | |||
| private: | |||
| size_t input_size_; | |||
| std::vector<size_t> input_size_list_; | |||
| std::vector<size_t> output_size_list_; | |||
| std::vector<size_t> workspace_size_list_; | |||
| }; | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SQUEEZE_GPU_KERNEL_H | |||
| @@ -31,8 +31,6 @@ from .mul import _mul_akg | |||
| from .notequal import _notequal_akg | |||
| from .relu6 import _relu6_akg | |||
| from .relu6_grad import _relu6_grad_akg | |||
| from .squeeze import _squeeze_akg | |||
| from .squeeze_grad import _squeeze_grad_akg | |||
| from .sub import _sub_akg | |||
| from .tile import _tile_akg | |||
| # Please insert op register in lexicographical order of the filename. | |||
| @@ -1,40 +0,0 @@ | |||
| # Copyright 2019-2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| """Squeeze op""" | |||
| from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType | |||
| squeeze_op_info = AkgGpuRegOp("Squeeze") \ | |||
| .fusion_type("OPAQUE") \ | |||
| .input(0, "x") \ | |||
| .output(0, "output") \ | |||
| .attr("axis", "optional", "listInt") \ | |||
| .dtype_format(DataType.F16_Default, DataType.F16_Default) \ | |||
| .dtype_format(DataType.F32_Default, DataType.F32_Default) \ | |||
| .dtype_format(DataType.F64_Default, DataType.F64_Default) \ | |||
| .dtype_format(DataType.I8_Default, DataType.I8_Default) \ | |||
| .dtype_format(DataType.I16_Default, DataType.I16_Default) \ | |||
| .dtype_format(DataType.I32_Default, DataType.I32_Default) \ | |||
| .dtype_format(DataType.I64_Default, DataType.I64_Default) \ | |||
| .dtype_format(DataType.U8_Default, DataType.U8_Default) \ | |||
| .dtype_format(DataType.U16_Default, DataType.U16_Default) \ | |||
| .dtype_format(DataType.U32_Default, DataType.U32_Default) \ | |||
| .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \ | |||
| .get_op_info() | |||
| @op_info_register(squeeze_op_info) | |||
| def _squeeze_akg(): | |||
| """Squeeze AutoDiff register""" | |||
| return | |||
| @@ -1,35 +0,0 @@ | |||
| # Copyright 2019 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| """SqueezeGrad op""" | |||
| from mindspore.ops.op_info_register import op_info_register, AkgGpuRegOp, DataType | |||
| squeeze_grad_op_info = AkgGpuRegOp("SqueezeGrad") \ | |||
| .fusion_type("OPAQUE") \ | |||
| .input(0, "y_grad") \ | |||
| .output(0, "output") \ | |||
| .attr("x_shape", "required", "listInt") \ | |||
| .dtype_format(DataType.F16_Default, DataType.F16_Default) \ | |||
| .dtype_format(DataType.F32_Default, DataType.F32_Default) \ | |||
| .dtype_format(DataType.I32_Default, DataType.I32_Default) \ | |||
| .dtype_format(DataType.I16_Default, DataType.I16_Default) \ | |||
| .dtype_format(DataType.U8_Default, DataType.U8_Default) \ | |||
| .dtype_format(DataType.BOOL_Default, DataType.BOOL_Default) \ | |||
| .get_op_info() | |||
| @op_info_register(squeeze_grad_op_info) | |||
| def _squeeze_grad_akg(): | |||
| """SqueezeGrad AutoDiff register""" | |||
| return | |||