You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

launch_mul.cc 3.0 kB

5 years ago
5 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "runtime/device/launch_mul.h"
  17. #include "abstract/utils.h"
  18. #include "backend/session/single_kernel_graph.h"
  19. #include "frontend/parallel/context.h"
  20. namespace mindspore::device {
  21. std::shared_ptr<session::KernelGraph> LaunchMul::ObtainMulKernelGraph() {
  22. std::vector<TypeId> input_dtypes = {dtype_, dtype_};
  23. std::vector<TypeId> output_dtypes = {dtype_};
  24. // obtain input & output shapes
  25. size_t dtype_size = abstract::TypeIdSize(dtype_);
  26. if (dtype_size == 0) {
  27. MS_LOG(EXCEPTION) << "Divide by zero.";
  28. }
  29. int64_t shape = SizeToLong(total_size_ / dtype_size);
  30. std::vector<std::vector<int64_t>> input_shapes = {{shape}, {1}};
  31. std::vector<std::vector<size_t>> output_shapes = {{static_cast<size_t>(shape)}};
  32. auto mul_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp(
  33. kMulOpName, input_dtypes, input_shapes, output_dtypes, output_shapes);
  34. MS_EXCEPTION_IF_NULL(mul_graph);
  35. return mul_graph;
  36. }
  37. kernel::KernelMod *LaunchMul::ObtainLaunchMulKernelMod() {
  38. if (mul_graph_ == nullptr) {
  39. // construct mul kernel graph
  40. mul_graph_ = ObtainMulKernelGraph();
  41. MS_EXCEPTION_IF_NULL(mul_graph_);
  42. // kernel select
  43. KernelSelect(mul_graph_);
  44. // kernel build
  45. KernelBuild(mul_graph_);
  46. }
  47. // obtain kernel_mod
  48. if (mul_graph_->execution_order().size() != 1) {
  49. MS_LOG(ERROR) << "the execution order of the mul graph should have only one node, however, it has "
  50. << mul_graph_->execution_order().size() << " nodes.";
  51. }
  52. return AnfAlgo::GetKernelMod(mul_graph_->execution_order()[0]);
  53. }
  54. void LaunchMul::ObtainMulInputsAddr() {
  55. inputs_addr_.push_back(input1_addr_);
  56. auto parallel_context = parallel::ParallelContext::GetInstance();
  57. MS_EXCEPTION_IF_NULL(parallel_context);
  58. auto device_num = parallel_context->device_num();
  59. if (device_num == 0) {
  60. MS_LOG(ERROR) << "device num can't be zero";
  61. }
  62. input2_value_ = 1.0f / device_num;
  63. auto size = abstract::TypeIdSize(dtype_);
  64. auto input_size = AlignSizeForLaunchKernel(size * 1);
  65. // alloc memory
  66. input2_addr_ = AllocDeviceMem(input_size);
  67. CopyHostMemToDevice(size, input_size);
  68. inputs_addr_.push_back(input2_addr_);
  69. }
  70. void LaunchMul::FreeInputDeviceMemory() {
  71. input1_addr_ = nullptr;
  72. if (input2_addr_ != nullptr) {
  73. FreeDeviceMem(input2_addr_);
  74. input2_addr_ = nullptr;
  75. }
  76. inputs_addr_.clear();
  77. }
  78. } // namespace mindspore::device