| @@ -55,7 +55,7 @@ void AscendBucket::AllocateAllReduceAddr() { | |||||
| auto origin_size = device_address->GetSize(); | auto origin_size = device_address->GetSize(); | ||||
| auto align_size = MemoryManager::GetCommonAlignSize(origin_size); | auto align_size = MemoryManager::GetCommonAlignSize(origin_size); | ||||
| origin_size_list.emplace_back(origin_size); | origin_size_list.emplace_back(origin_size); | ||||
| align_size_list_.emplace_back(align_size); | |||||
| (void)align_size_list_.emplace_back(align_size); | |||||
| total_size += align_size; | total_size += align_size; | ||||
| memcpy_input_addrs_.emplace_back(std::make_shared<kernel::Address>( | memcpy_input_addrs_.emplace_back(std::make_shared<kernel::Address>( | ||||
| static_cast<uint8_t *>(device_address->GetMutablePtr()), device_address->GetSize())); | static_cast<uint8_t *>(device_address->GetMutablePtr()), device_address->GetSize())); | ||||
| @@ -213,12 +213,12 @@ nlohmann::json ConstructInputs(const std::vector<size_t> &input_shape, const std | |||||
| real_input[name] = src; | real_input[name] = src; | ||||
| real_input[ori_format] = kOpFormat_NCHW; | real_input[ori_format] = kOpFormat_NCHW; | ||||
| for (auto shape : output_shape) { | for (auto shape : output_shape) { | ||||
| real_input[ori_shape].push_back(shape); | |||||
| (void)real_input[ori_shape].emplace_back(shape); | |||||
| } | } | ||||
| real_input[param_type] = param_type_required; | real_input[param_type] = param_type_required; | ||||
| // obtain inputs shape | // obtain inputs shape | ||||
| for (auto shape : input_shape) { | for (auto shape : input_shape) { | ||||
| real_input[shape_str].push_back(shape); | |||||
| (void)real_input[shape_str].emplace_back(shape); | |||||
| } | } | ||||
| real_input[valid] = true; | real_input[valid] = true; | ||||
| input_json.push_back(real_input); | input_json.push_back(real_input); | ||||
| @@ -235,12 +235,12 @@ nlohmann::json ConstructOutputs(const std::vector<size_t> &output_shape, mindspo | |||||
| real_output[name] = dst; | real_output[name] = dst; | ||||
| real_output[ori_format] = kOpFormat_NCHW; | real_output[ori_format] = kOpFormat_NCHW; | ||||
| for (auto shape : output_shape) { | for (auto shape : output_shape) { | ||||
| real_output[ori_shape].push_back(shape); | |||||
| (void)real_output[ori_shape].emplace_back(shape); | |||||
| } | } | ||||
| real_output[param_type] = param_type_required; | real_output[param_type] = param_type_required; | ||||
| // obtain outputs shape | // obtain outputs shape | ||||
| for (auto shape : output_shape) { | for (auto shape : output_shape) { | ||||
| real_output[shape_str].push_back(shape); | |||||
| (void)real_output[shape_str].emplace_back(shape); | |||||
| } | } | ||||
| real_output[valid] = true; | real_output[valid] = true; | ||||
| output_json.push_back(real_output); | output_json.push_back(real_output); | ||||
| @@ -342,7 +342,7 @@ bool AscendDeviceAddress::SyncDeviceToHost(const ShapeVector &shape, size_t size | |||||
| return sync_ok; | return sync_ok; | ||||
| } | } | ||||
| void AscendDeviceAddress::LaunchTransData(kernel::KernelModPtr kernel_mod_ptr, void *output_address_ptr, | |||||
| void AscendDeviceAddress::LaunchTransData(const kernel::KernelModPtr &kernel_mod_ptr, void *output_address_ptr, | |||||
| size_t output_size, const std::vector<size_t> &workspace_size_list) const { | size_t output_size, const std::vector<size_t> &workspace_size_list) const { | ||||
| MS_EXCEPTION_IF_NULL(kernel_mod_ptr); | MS_EXCEPTION_IF_NULL(kernel_mod_ptr); | ||||
| auto input_address = std::make_shared<kernel::Address>(); | auto input_address = std::make_shared<kernel::Address>(); | ||||
| @@ -383,7 +383,7 @@ void AscendDeviceAddress::LaunchTransData(kernel::KernelModPtr kernel_mod_ptr, v | |||||
| } | } | ||||
| kernel::KernelModPtr AscendDeviceAddress::CompileTransDataAndObtainKernelMod(const nlohmann::json &kernel_json) const { | kernel::KernelModPtr AscendDeviceAddress::CompileTransDataAndObtainKernelMod(const nlohmann::json &kernel_json) const { | ||||
| static std::set<std::string> constructed_kernel; | |||||
| static std::set<std::string> constructed_kernel = {}; | |||||
| auto build_manager = std::make_shared<kernel::ParallelBuildManager>(); | auto build_manager = std::make_shared<kernel::ParallelBuildManager>(); | ||||
| MS_EXCEPTION_IF_NULL(build_manager); | MS_EXCEPTION_IF_NULL(build_manager); | ||||
| std::string processor = process_aicore; | std::string processor = process_aicore; | ||||
| @@ -394,7 +394,7 @@ kernel::KernelModPtr AscendDeviceAddress::CompileTransDataAndObtainKernelMod(con | |||||
| std::string json_name = kernel_json[op_info_str][kernel_name_str]; | std::string json_name = kernel_json[op_info_str][kernel_name_str]; | ||||
| // op build | // op build | ||||
| if (constructed_kernel.find(json_name) == constructed_kernel.end()) { | if (constructed_kernel.find(json_name) == constructed_kernel.end()) { | ||||
| auto task_id = build_manager->StartCompileOp(kernel_json); | |||||
| auto task_id = kernel::ParallelBuildManager::StartCompileOp(kernel_json); | |||||
| build_manager->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list); | build_manager->SaveTaskInfo(task_id, nullptr, json_name, input_size_list, output_size_list); | ||||
| } | } | ||||
| while (!build_manager->IsAllTaskFinish()) { | while (!build_manager->IsAllTaskFinish()) { | ||||
| @@ -410,7 +410,7 @@ kernel::KernelModPtr AscendDeviceAddress::CompileTransDataAndObtainKernelMod(con | |||||
| } | } | ||||
| (void)build_manager->TaskFinishProcess(task_id, build_result, false); | (void)build_manager->TaskFinishProcess(task_id, build_result, false); | ||||
| } | } | ||||
| constructed_kernel.insert(json_name); | |||||
| (void)constructed_kernel.insert(json_name); | |||||
| // search cache | // search cache | ||||
| auto cached_kernel_pack = TbeUtils::SearchCache(json_name, processor); | auto cached_kernel_pack = TbeUtils::SearchCache(json_name, processor); | ||||
| MS_EXCEPTION_IF_NULL(cached_kernel_pack); | MS_EXCEPTION_IF_NULL(cached_kernel_pack); | ||||
| @@ -58,7 +58,7 @@ class AscendDeviceAddress : public DeviceAddress { | |||||
| mindspore::TypeId type, void *host_ptr) const; | mindspore::TypeId type, void *host_ptr) const; | ||||
| void SyncStream() const; | void SyncStream() const; | ||||
| void LaunchTransData(kernel::KernelModPtr kernel_mod_ptr, void *output_address_ptr, size_t output_size, | |||||
| void LaunchTransData(const kernel::KernelModPtr &kernel_mod_ptr, void *output_address_ptr, size_t output_size, | |||||
| const std::vector<size_t> &workspace_size_list) const; | const std::vector<size_t> &workspace_size_list) const; | ||||
| std::vector<size_t> GetDeviceShape(std::vector<size_t> *host_shape) const; | std::vector<size_t> GetDeviceShape(std::vector<size_t> *host_shape) const; | ||||
| std::vector<size_t> GetWorkspaceSizeList(const nlohmann::json &kernel_json) const; | std::vector<size_t> GetWorkspaceSizeList(const nlohmann::json &kernel_json) const; | ||||
| @@ -85,8 +85,8 @@ std::shared_ptr<session::KernelGraph> AscendLaunchAtomicClean::ObtainAtomicClean | |||||
| if (dtype_size == 0) { | if (dtype_size == 0) { | ||||
| MS_LOG(EXCEPTION) << "Divide by zero."; | MS_LOG(EXCEPTION) << "Divide by zero."; | ||||
| } | } | ||||
| int64_t shape = total_size_ / dtype_size; | |||||
| std::vector<std::vector<int64_t>> input_shapes = {{shape}}; | |||||
| auto shape = total_size_ / dtype_size; | |||||
| std::vector<std::vector<int64_t>> input_shapes = {{static_cast<int64_t>(shape)}}; | |||||
| std::vector<std::vector<size_t>> output_shapes = {}; | std::vector<std::vector<size_t>> output_shapes = {}; | ||||
| auto atomic_clean_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp( | auto atomic_clean_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp( | ||||
| kAtomicAddrCleanOpName, input_dtypes, input_shapes, output_dtypes, output_shapes); | kAtomicAddrCleanOpName, input_dtypes, input_shapes, output_dtypes, output_shapes); | ||||
| @@ -47,6 +47,9 @@ void AscendLaunchKernel::KernelSelect(std::shared_ptr<session::KernelGraph> kern | |||||
| void AscendLaunchKernel::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) { | void AscendLaunchKernel::KernelBuild(std::shared_ptr<session::KernelGraph> kernel_graph) { | ||||
| MS_EXCEPTION_IF_NULL(kernel_graph); | MS_EXCEPTION_IF_NULL(kernel_graph); | ||||
| device::ascend::KernelBuild(kernel_graph->execution_order()); | |||||
| auto ret = device::ascend::KernelBuild(kernel_graph->execution_order()); | |||||
| if (!ret) { | |||||
| MS_LOG(ERROR) << "kernel build failed"; | |||||
| } | |||||
| } | } | ||||
| } // namespace mindspore::device::ascend | } // namespace mindspore::device::ascend | ||||
| @@ -111,7 +111,7 @@ void Bucket::CalculateMean() { | |||||
| void Bucket::UpdateTensorOutputAddr(uint8_t *addr) { | void Bucket::UpdateTensorOutputAddr(uint8_t *addr) { | ||||
| uint8_t *tensor_output = addr; | uint8_t *tensor_output = addr; | ||||
| for (size_t i = 0; i < bucket_size_; ++i) { | for (size_t i = 0; i < bucket_size_; ++i) { | ||||
| new_tensor_output_addrs_.emplace_back(tensor_output); | |||||
| (void)new_tensor_output_addrs_.emplace_back(tensor_output); | |||||
| tensor_output += align_size_list_[i]; | tensor_output += align_size_list_[i]; | ||||
| } | } | ||||
| } | } | ||||
| @@ -1016,7 +1016,7 @@ void KernelRuntime::ClearOutputAddress(const std::vector<AnfNodePtr> &inputs, | |||||
| } | } | ||||
| } | } | ||||
| bool KernelRuntime::LaunchTaskBasedOnSingleKernel(kernel::KernelModPtr kernel_mod_ptr, | |||||
| bool KernelRuntime::LaunchTaskBasedOnSingleKernel(const kernel::KernelModPtr &kernel_mod_ptr, | |||||
| const AddressPtrList &kernel_inputs, | const AddressPtrList &kernel_inputs, | ||||
| const AddressPtrList &kernel_outputs, | const AddressPtrList &kernel_outputs, | ||||
| const AddressPtrList &kernel_workspaces) const { | const AddressPtrList &kernel_workspaces) const { | ||||
| @@ -63,7 +63,7 @@ class KernelRuntime { | |||||
| virtual bool GenDynamicKernel(const session::KernelGraph *graph) = 0; | virtual bool GenDynamicKernel(const session::KernelGraph *graph) = 0; | ||||
| virtual bool RunDynamicKernelAsync(const session::KernelGraph *graph) = 0; | virtual bool RunDynamicKernelAsync(const session::KernelGraph *graph) = 0; | ||||
| bool LaunchKernel(const session::KernelGraph *graph); | bool LaunchKernel(const session::KernelGraph *graph); | ||||
| bool LaunchTaskBasedOnSingleKernel(kernel::KernelModPtr kernel_mod_ptr, const AddressPtrList &kernel_inputs, | |||||
| bool LaunchTaskBasedOnSingleKernel(const kernel::KernelModPtr &kernel_mod_ptr, const AddressPtrList &kernel_inputs, | |||||
| const AddressPtrList &kernel_outputs, | const AddressPtrList &kernel_outputs, | ||||
| const AddressPtrList &kernel_workspaces) const; | const AddressPtrList &kernel_workspaces) const; | ||||
| virtual void AssignStaticMemoryInput(const session::KernelGraph *graph); | virtual void AssignStaticMemoryInput(const session::KernelGraph *graph); | ||||