You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

launch_mul.cc 3.0 kB

5 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "runtime/device/launch_mul.h"
  17. #include <vector>
  18. #include <memory>
  19. #include "abstract/utils.h"
  20. #include "backend/session/single_kernel_graph.h"
  21. #include "frontend/parallel/context.h"
  22. namespace mindspore::device {
  23. std::shared_ptr<session::KernelGraph> LaunchMul::ObtainMulKernelGraph() {
  24. std::vector<TypeId> input_dtypes = {dtype_, dtype_};
  25. std::vector<TypeId> output_dtypes = {dtype_};
  26. // obtain input & output shapes
  27. size_t dtype_size = abstract::TypeIdSize(dtype_);
  28. if (dtype_size == 0) {
  29. MS_LOG(EXCEPTION) << "Divide by zero.";
  30. }
  31. int64_t shape = SizeToLong(total_size_ / dtype_size);
  32. std::vector<std::vector<int64_t>> input_shapes = {{shape}, {1}};
  33. std::vector<std::vector<size_t>> output_shapes = {{static_cast<size_t>(shape)}};
  34. auto mul_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp(
  35. kMulOpName, input_dtypes, input_shapes, output_dtypes, output_shapes);
  36. MS_EXCEPTION_IF_NULL(mul_graph);
  37. return mul_graph;
  38. }
  39. kernel::KernelMod *LaunchMul::ObtainLaunchMulKernelMod() {
  40. if (mul_graph_ == nullptr) {
  41. // construct mul kernel graph
  42. mul_graph_ = ObtainMulKernelGraph();
  43. MS_EXCEPTION_IF_NULL(mul_graph_);
  44. // kernel select
  45. KernelSelect(mul_graph_);
  46. // kernel build
  47. KernelBuild(mul_graph_);
  48. }
  49. // obtain kernel_mod
  50. if (mul_graph_->execution_order().size() != 1) {
  51. MS_LOG(ERROR) << "the execution order of the mul graph should have only one node";
  52. }
  53. return AnfAlgo::GetKernelMod(mul_graph_->execution_order()[0]);
  54. }
  55. void LaunchMul::ObtainMulInputsAddr() {
  56. inputs_addr_.push_back(input1_addr_);
  57. auto parallel_context = parallel::ParallelContext::GetInstance();
  58. MS_EXCEPTION_IF_NULL(parallel_context);
  59. auto device_num = parallel_context->device_num();
  60. if (device_num == 0) {
  61. MS_LOG(ERROR) << "device num can't be zero";
  62. }
  63. input2_value_ = 1.0 / device_num;
  64. auto size = abstract::TypeIdSize(dtype_);
  65. auto input_size = AlignSizeForLaunchKernel(size * 1);
  66. // alloc memory
  67. input2_addr_ = AllocDeviceMem(input_size);
  68. CopyHostMemToDevice(size, input_size);
  69. inputs_addr_.push_back(input2_addr_);
  70. }
  71. void LaunchMul::FreeInputDeviceMemory() {
  72. input1_addr_ = nullptr;
  73. if (input2_addr_ != nullptr) {
  74. FreeDeviceMem(input2_addr_);
  75. input2_addr_ = nullptr;
  76. }
  77. inputs_addr_.clear();
  78. }
  79. } // namespace mindspore::device