Browse Source

AllReduce input and output size aligned by 512

feature/build-system-rewrite
LaiYongqiang 4 years ago
parent
commit
96c5f129aa
2 changed files with 10 additions and 5 deletions
  1. +1
    -1
      mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_context.cc
  2. +9
    -4
      mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc

+ 1
- 1
mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_context.cc View File

@@ -872,7 +872,7 @@ bool AscendDeviceContext::LaunchAtomicClean(const CNodePtr &node, const std::vec
// Launch Atomic Node
auto kernel_mod = AnfAlgo::GetKernelMod(atomic_node);
MS_EXCEPTION_IF_NULL(kernel_mod);
return kernel_mod->Launch(atomic_inputs, {}, {}, GetKernelStream(atomic_node));
return kernel_mod->Launch(atomic_inputs, {}, {}, GetKernelStream(node));
}

void AscendDeviceContext::InsertEventBeforeRunTask(const KernelGraphPtr &graph) const {


+ 9
- 4
mindspore/ccsrc/runtime/graph_scheduler/actor/data_prepare_actor.cc View File

@@ -25,6 +25,7 @@
#include "mindrt/include/async/async.h"
#include "utils/log_adapter.h"
#include "utils/convert_utils.h"
#include "runtime/device/memory_manager.h"

namespace mindspore {
namespace runtime {
@@ -75,8 +76,10 @@ void FetchContinuousMemoryInfo(const CNodePtr &node, std::vector<DeviceTensorPtr
for (size_t i = 0; i < intput_sizes.size(); ++i) {
const auto &device_tensor = AnfAlgo::GetPrevNodeMutableOutputAddr(node, i, false);
MS_EXCEPTION_IF_NULL(device_tensor);
*total_size += intput_sizes[i];
(void)size_list->emplace_back(intput_sizes[i]);
auto origin_size = intput_sizes[i];
auto align_size = device::MemoryManager::GetCommonAlignSize(origin_size);
*total_size += align_size;
(void)size_list->emplace_back(align_size);
(void)addr_list->emplace_back(device_tensor);
}
} else {
@@ -84,8 +87,10 @@ void FetchContinuousMemoryInfo(const CNodePtr &node, std::vector<DeviceTensorPtr
for (size_t i = 0; i < output_sizes.size(); ++i) {
const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(node, i, false);
MS_EXCEPTION_IF_NULL(device_tensor);
*total_size += output_sizes[i];
(void)size_list->emplace_back(output_sizes[i]);
auto origin_size = output_sizes[i];
auto align_size = device::MemoryManager::GetCommonAlignSize(origin_size);
*total_size += align_size;
(void)size_list->emplace_back(align_size);
(void)addr_list->emplace_back(device_tensor);
}
}


Loading…
Cancel
Save