Merge pull request !937 from JoyLvliang/fix-set-output-addr-fail-in-gpu-pynativetags/v0.3.0-alpha
| @@ -198,17 +198,16 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { | |||||
| if (output_sizes.empty()) { | if (output_sizes.empty()) { | ||||
| return; | return; | ||||
| } | } | ||||
| if (AnfAlgo::GetCNodeName(kernel) == "ApplyMomentum") { | |||||
| auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0); | |||||
| AnfAlgo::SetOutputAddr(device_address, 0, kernel.get()); | |||||
| AnfAlgo::SetOutputAddr(device_address, 1, kernel.get()); | |||||
| return; | |||||
| } | |||||
| for (size_t i = 0; i < output_sizes.size(); ++i) { | for (size_t i = 0; i < output_sizes.size(); ++i) { | ||||
| if (AnfAlgo::OutputAddrExist(kernel, i)) { | if (AnfAlgo::OutputAddrExist(kernel, i)) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| if (AnfAlgo::GetCNodeName(kernel) == kApplyMomentumOpName) { | |||||
| auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i); | |||||
| AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); | |||||
| continue; | |||||
| } | |||||
| std::string output_format = AnfAlgo::GetOutputFormat(kernel, i); | std::string output_format = AnfAlgo::GetOutputFormat(kernel, i); | ||||
| auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); | auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); | ||||
| auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); | auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); | ||||
| @@ -195,8 +195,13 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap | |||||
| } | } | ||||
| auto optimizer = std::make_shared<GraphOptimizer>(); | auto optimizer = std::make_shared<GraphOptimizer>(); | ||||
| auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm"); | auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm"); | ||||
| ir_fusion_pm->AddPass(std::make_shared<BatchNormGradSplit>()); | |||||
| ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormFusion>()); | |||||
| if (context_ptr->execution_mode() == kPynativeMode) { | |||||
| ir_fusion_pm->AddPass(std::make_shared<BnSplit>()); | |||||
| ir_fusion_pm->AddPass(std::make_shared<BnGradSplit>()); | |||||
| } else { | |||||
| ir_fusion_pm->AddPass(std::make_shared<BatchNormGradSplit>()); | |||||
| ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormFusion>()); | |||||
| } | |||||
| ir_fusion_pm->AddPass(std::make_shared<AddMemcpyAsync>()); | ir_fusion_pm->AddPass(std::make_shared<AddMemcpyAsync>()); | ||||
| if (context_ptr->ir_fusion_flag()) { | if (context_ptr->ir_fusion_flag()) { | ||||
| AddAscendBackendOptionalIRFusion(ir_fusion_pm.get()); | AddAscendBackendOptionalIRFusion(ir_fusion_pm.get()); | ||||
| @@ -256,6 +256,8 @@ void ConvertPyObjectToTensor(const py::object &input_object, const PrimitivePtr | |||||
| tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::list>(input_object), nullptr); | tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::list>(input_object), nullptr); | ||||
| } else if (py::isinstance<py::array>(input_object)) { | } else if (py::isinstance<py::array>(input_object)) { | ||||
| tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::array>(input_object), nullptr); | tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::array>(input_object), nullptr); | ||||
| } else if (py::isinstance<py::none>(input_object)) { | |||||
| return; | |||||
| } else if (py::isinstance<py::tuple>(input_object)) { | } else if (py::isinstance<py::tuple>(input_object)) { | ||||
| auto tuple_inputs = py::cast<py::tuple>(input_object); | auto tuple_inputs = py::cast<py::tuple>(input_object); | ||||
| if (py::isinstance<tensor::Tensor>(tuple_inputs[0])) { | if (py::isinstance<tensor::Tensor>(tuple_inputs[0])) { | ||||
| @@ -77,10 +77,12 @@ class Tensor(Tensor_): | |||||
| def __eq__(self, other): | def __eq__(self, other): | ||||
| if not isinstance(other, Tensor): | if not isinstance(other, Tensor): | ||||
| return False | return False | ||||
| x = self.asnumpy() | |||||
| y = other.asnumpy() | |||||
| out = np.equal(x, y) | |||||
| return Tensor(np.array(out)) | |||||
| return Tensor(np.array(self.asnumpy() == other.asnumpy())) | |||||
| def __ne__(self, other): | |||||
| if not isinstance(other, Tensor): | |||||
| return True | |||||
| return Tensor(np.array(self.asnumpy() != other.asnumpy())) | |||||
| def __hash__(self): | def __hash__(self): | ||||
| return hash(id(self)) | return hash(id(self)) | ||||
| @@ -82,6 +82,7 @@ class _BatchNorm(Cell): | |||||
| self.dtype = P.DType() | self.dtype = P.DType() | ||||
| self.reshape = P.Reshape() | self.reshape = P.Reshape() | ||||
| self.is_ascend = context.get_context("device_target") == "Ascend" | self.is_ascend = context.get_context("device_target") == "Ascend" | ||||
| self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE | |||||
| if context.get_context("enable_ge"): | if context.get_context("enable_ge"): | ||||
| self.is_ge_backend = True | self.is_ge_backend = True | ||||
| @@ -89,7 +90,7 @@ class _BatchNorm(Cell): | |||||
| else: | else: | ||||
| self.is_ge_backend = False | self.is_ge_backend = False | ||||
| self.momentum = 1.0 - momentum | self.momentum = 1.0 - momentum | ||||
| if self.is_ge_backend or self.is_ascend: | |||||
| if self.is_graph_mode and (self.is_ge_backend or self.is_ascend): | |||||
| self.bn_train = P.BatchNorm(is_training=True, | self.bn_train = P.BatchNorm(is_training=True, | ||||
| epsilon=self.eps) | epsilon=self.eps) | ||||
| else: | else: | ||||
| @@ -147,7 +148,7 @@ class _BatchNorm(Cell): | |||||
| if self.is_ge_backend and self.is_global: | if self.is_ge_backend and self.is_global: | ||||
| axes, re_shape = _shape_infer(F.shape(x), self.num_features) | axes, re_shape = _shape_infer(F.shape(x), self.num_features) | ||||
| y = self._global_sync(x, axes, re_shape) | y = self._global_sync(x, axes, re_shape) | ||||
| elif self.is_ge_backend or self.is_ascend: | |||||
| elif self.is_graph_mode and (self.is_ge_backend or self.is_ascend): | |||||
| y, batch_mean, batch_var, _, _ = \ | y, batch_mean, batch_var, _, _ = \ | ||||
| self.bn_train(x, | self.bn_train(x, | ||||
| self.gamma, | self.gamma, | ||||