You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

gelu_kernel.h 2.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_KERNEL_GPU_NN_GELU_GPU_KERNEL_H_
  17. #define MINDSPORE_CCSRC_KERNEL_GPU_NN_GELU_GPU_KERNEL_H_
  18. #include <vector>
  19. #include "kernel/gpu/gpu_kernel.h"
  20. #include "kernel/gpu/gpu_kernel_factory.h"
  21. #include "kernel/gpu/kernel_constants.h"
  22. #include "kernel/gpu/cuda_impl/gelu_impl.cuh"
  23. namespace mindspore {
  24. namespace kernel {
  25. template <typename T>
  26. class GeluGpuKernel : public GpuKernel {
  27. public:
  28. GeluGpuKernel() : input_size_(0) {}
  29. ~GeluGpuKernel() override = default;
  30. const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
  31. const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
  32. const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
  33. bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
  34. const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
  35. T *input_addr = GetDeviceAddress<T>(inputs, 0);
  36. T *output_addr = GetDeviceAddress<T>(outputs, 0);
  37. Gelu(input_size_ / sizeof(T), input_addr, output_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
  38. return true;
  39. }
  40. bool Init(const CNodePtr &kernel_node) override {
  41. InitResource();
  42. input_size_ = sizeof(T);
  43. auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
  44. for (auto dim : input_shape) {
  45. input_size_ *= dim;
  46. }
  47. InitSizeLists();
  48. return true;
  49. }
  50. protected:
  51. void InitSizeLists() override {
  52. input_size_list_.push_back(input_size_);
  53. output_size_list_.push_back(input_size_);
  54. }
  55. private:
  56. std::vector<size_t> input_size_list_;
  57. std::vector<size_t> output_size_list_;
  58. std::vector<size_t> workspace_size_list_;
  59. size_t input_size_;
  60. };
  61. } // namespace kernel
  62. } // namespace mindspore
  63. #endif // MINDSPORE_CCSRC_KERNEL_GPU_NN_GELU_GPU_KERNEL_H_