You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

launch_mul.cc 2.9 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "runtime/device/launch_mul.h"
  17. #include <vector>
  18. #include <memory>
  19. #include "abstract/utils.h"
  20. #include "backend/session/single_kernel_graph.h"
  21. #include "frontend/parallel/context.h"
  22. namespace mindspore::device {
  23. std::shared_ptr<session::KernelGraph> LaunchMul::ObtainMulKernelGraph() {
  24. std::vector<TypeId> input_dtypes = {dtype_, dtype_};
  25. std::vector<TypeId> output_dtypes = {dtype_};
  26. // obtain input & output shapes
  27. size_t dtype_size = abstract::TypeIdSize(dtype_);
  28. int64_t shape = total_size_ / dtype_size;
  29. std::vector<std::vector<int64_t>> input_shapes = {{shape}, {1}};
  30. std::vector<std::vector<size_t>> output_shapes = {{static_cast<size_t>(shape)}};
  31. auto mul_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp(
  32. kMulOpName, input_dtypes, input_shapes, output_dtypes, output_shapes);
  33. MS_EXCEPTION_IF_NULL(mul_graph);
  34. return mul_graph;
  35. }
  36. kernel::KernelMod *LaunchMul::ObtainLaunchMulKernelMod() {
  37. if (mul_graph_ == nullptr) {
  38. // construct mul kernel graph
  39. mul_graph_ = ObtainMulKernelGraph();
  40. MS_EXCEPTION_IF_NULL(mul_graph_);
  41. // kernel select
  42. KernelSelect(mul_graph_);
  43. // kernel build
  44. KernelBuild(mul_graph_);
  45. }
  46. // obtain kernel_mod
  47. if (mul_graph_->execution_order().size() != 1) {
  48. MS_LOG(ERROR) << "the execution order of the mul graph should have only one node";
  49. }
  50. return AnfAlgo::GetKernelMod(mul_graph_->execution_order()[0]);
  51. }
  52. void LaunchMul::ObtainMulInputsAddr() {
  53. inputs_addr_.push_back(input1_addr_);
  54. auto parallel_context = parallel::ParallelContext::GetInstance();
  55. MS_EXCEPTION_IF_NULL(parallel_context);
  56. auto device_num = parallel_context->device_num();
  57. if (device_num == 0) {
  58. MS_LOG(ERROR) << "device num can't be zero";
  59. }
  60. input2_value_ = 1.0 / device_num;
  61. auto size = abstract::TypeIdSize(dtype_);
  62. auto input_size = AlignSizeForLaunchKernel(size * 1);
  63. // alloc memory
  64. input2_addr_ = AllocDeviceMem(input_size);
  65. CopyHostMemToDevice(size, input_size);
  66. inputs_addr_.push_back(input2_addr_);
  67. }
  68. void LaunchMul::FreeInputDeviceMemory() {
  69. input1_addr_ = nullptr;
  70. if (input2_addr_ != nullptr) {
  71. FreeDeviceMem(input2_addr_);
  72. input2_addr_ = nullptr;
  73. }
  74. inputs_addr_.clear();
  75. }
  76. } // namespace mindspore::device