diff --git a/mindspore/ccsrc/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/device/ascend/ascend_device_address.cc index ccf9dfe405..e5f94ff849 100644 --- a/mindspore/ccsrc/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/device/ascend/ascend_device_address.cc @@ -174,7 +174,13 @@ bool AscendDeviceAddress::SyncDeviceToHostAndConvertFormat(const std::vector &host_shape, TypeId host_type, size_t slot, Debugger *debugger) const; #endif + private: bool SyncDeviceToHostAndConvertFormat(const std::vector &shape, size_t size, TypeId type, void *host_ptr) const; bool ConvertFormatAndSyncHostToDevice(const std::vector &shape, size_t size, TypeId type, diff --git a/mindspore/ccsrc/device/device_address.h b/mindspore/ccsrc/device/device_address.h index e02d231dd5..0447cc2539 100644 --- a/mindspore/ccsrc/device/device_address.h +++ b/mindspore/ccsrc/device/device_address.h @@ -63,6 +63,7 @@ class DeviceAddress { size_t GetSize() const { return size_; } std::string format() const { return format_; } TypeId type_id() const { return type_id_; } + void set_host_shape(const std::vector &shape) { host_shape_ = shape; } virtual void set_status(DeviceAddressStatus status) {} virtual DeviceAddressStatus status() const { return DeviceAddressStatus::kInDevice; } virtual DeviceAddressType DeviceType() const { return DeviceAddressType::kUnknown; } @@ -77,6 +78,7 @@ class DeviceAddress { string format_{"DefaultFormat"}; TypeId type_id_{kNumberTypeFloat16}; bool from_mem_pool_{false}; + std::vector host_shape_{}; friend class KernelRuntime; friend class MemoryManager; friend class mindspore::device::ascend::tasksink::TaskGenerator; diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc index 4581141790..529c0d63b6 100644 --- a/mindspore/ccsrc/device/kernel_runtime.cc +++ b/mindspore/ccsrc/device/kernel_runtime.cc @@ -258,6 +258,7 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { std::string output_format = AnfAlgo::GetOutputFormat(kernel, i); auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); + device_address->set_host_shape(trans::GetRuntimePaddingShape(kernel, i)); MS_EXCEPTION_IF_NULL(device_address); auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); if (!ret) { @@ -506,7 +507,9 @@ void KernelRuntime::AssignNodeOutputMem(int flag, const AnfNodePtr &node, int in } std::string output_format = AnfAlgo::GetOutputFormat(node, i); auto output_type = AnfAlgo::GetOutputDeviceDataType(node, i); - AnfAlgo::SetOutputAddr(CreateDeviceAddress(ptr, output_sizes[i], output_format, output_type), i, node.get()); + auto device_address = CreateDeviceAddress(ptr, output_sizes[i], output_format, output_type); + device_address->set_host_shape(trans::GetRuntimePaddingShape(node, i)); + AnfAlgo::SetOutputAddr(device_address, i, node.get()); } } diff --git a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc index 981e2255f3..ff864401b1 100644 --- a/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc +++ b/mindspore/ccsrc/pre_activate/ascend/ascend_backend_optimization.cc @@ -238,16 +238,11 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr(); auto ir_fusion_pm = std::make_shared("ir_fusion_pm"); - if (context_ptr->execution_mode() == kPynativeMode) { - ir_fusion_pm->AddPass(std::make_shared()); - ir_fusion_pm->AddPass(std::make_shared()); - } else { - ir_fusion_pm->AddPass(std::make_shared()); - ir_fusion_pm->AddPass(std::make_shared()); - ir_fusion_pm->AddPass(std::make_shared()); - ir_fusion_pm->AddPass(std::make_shared()); - ir_fusion_pm->AddPass(std::make_shared()); - } + ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); if (context_ptr->ir_fusion_flag()) { AddAscendBackendOptionalIRFusion(ir_fusion_pm.get()); @@ -287,8 +282,11 @@ void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr(); auto ir_fusion_pm = std::make_shared("ir_fusion_pm"); - ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); + ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); ir_fusion_pm->AddPass(std::make_shared()); diff --git a/mindspore/nn/layer/normalization.py b/mindspore/nn/layer/normalization.py index 4c7ea9d4d6..d6c920b620 100644 --- a/mindspore/nn/layer/normalization.py +++ b/mindspore/nn/layer/normalization.py @@ -84,14 +84,13 @@ class _BatchNorm(Cell): self.dtype = P.DType() self.reshape = P.Reshape() self.is_ascend = context.get_context("device_target") == "Ascend" - self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE self.momentum = 1.0 - momentum if context.get_context("enable_ge"): self.is_ge_backend = True else: self.is_ge_backend = False - if self.is_graph_mode and (self.is_ge_backend or self.is_ascend): + if self.is_ge_backend or self.is_ascend: self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps) else: @@ -153,7 +152,7 @@ class _BatchNorm(Cell): if self.is_ge_backend and self.is_global: axes, re_shape = _shape_infer(F.shape(x), self.num_features) y = self._global_sync(x, axes, re_shape) - elif self.is_graph_mode and (self.is_ge_backend or self.is_ascend): + elif self.is_ge_backend or self.is_ascend: if self.is_global: axes, re_shape = _shape_infer(F.shape(x), self.num_features) y = self._global_sync(x, axes, re_shape)