!14412 fix a bug with launch allreduce in pynative mode

From: @lvchangquan Reviewed-by: @chujinjin,@jjfeing,@lilongfei15 Signed-off-by: @lilongfei15
5 years ago · e13e2f1333
--- a/mindspore/ccsrc/runtime/device/bucket.cc
+++ b/mindspore/ccsrc/runtime/device/bucket.cc
@@ -92,6 +92,7 @@ void Bucket::CalculateMean() {
  MS_EXCEPTION_IF_NULL(parallel_context);
  auto grad_mean = parallel_context->gradients_mean();
  if (!grad_mean) {
    UpdateTensorOutputAddr(ar_output_addr_);
    return;
  }
  if (launch_mul_ == nullptr) {
@@ -102,12 +103,16 @@ void Bucket::CalculateMean() {
  launch_mul_->SetInputAddr(ar_output_addr_);
  // launch mean
  launch_mul_->LaunchOpKernel();
  // store output tensor addr
  // store tensor output addr
  auto launch_output = launch_mul_->GetKernelOutputAddr();
  if (launch_output.size() != 1) {
    MS_LOG(ERROR) << "launch mul outputs should have one output";
    MS_LOG(EXCEPTION) << "launch mul outputs should have one output";
  }
  uint8_t *tensor_output = launch_output[0];
  UpdateTensorOutputAddr(launch_output[0]);
 }

 void Bucket::UpdateTensorOutputAddr(uint8_t *addr) {
  uint8_t *tensor_output = addr;
  for (size_t i = 0; i < bucket_size_; ++i) {
    new_tensor_output_addrs_.emplace_back(tensor_output);
    tensor_output += align_size_list_[i];
--- a/mindspore/ccsrc/runtime/device/bucket.h
+++ b/mindspore/ccsrc/runtime/device/bucket.h
@@ -84,6 +84,7 @@ class Bucket {
  virtual void FreeAllDeviceMem() = 0;
  virtual void FreeDeviceMem(void *dev_ptr) = 0;
  virtual void CopyTensorToContiguousMemory() = 0;
  void UpdateTensorOutputAddr(uint8_t *addr);
  void LazyDeleteOldAddr();
 };
 }  // namespace mindspore::device