From: @hwjiaorui Reviewed-by: @zhoufeng54,@jjfeing Signed-off-by: @jjfeingpull/14865/MERGE
| @@ -17,6 +17,7 @@ | |||||
| #include "backend/kernel_compiler/kernel_fusion.h" | #include "backend/kernel_compiler/kernel_fusion.h" | ||||
| #include <map> | #include <map> | ||||
| #include <set> | |||||
| #include <string> | #include <string> | ||||
| #include <memory> | #include <memory> | ||||
| #include "backend/kernel_compiler/tbe/tbe_kernel_build.h" | #include "backend/kernel_compiler/tbe/tbe_kernel_build.h" | ||||
| @@ -18,6 +18,7 @@ | |||||
| #include <memory> | #include <memory> | ||||
| #include <vector> | #include <vector> | ||||
| #include <string> | #include <string> | ||||
| #include <utility> | |||||
| #include "backend/optimizer/common/helper.h" | #include "backend/optimizer/common/helper.h" | ||||
| #include "base/core_ops.h" | #include "base/core_ops.h" | ||||
| #include "utils/utils.h" | #include "utils/utils.h" | ||||
| @@ -21,6 +21,7 @@ | |||||
| #include <vector> | #include <vector> | ||||
| #include <string> | #include <string> | ||||
| #include <tuple> | #include <tuple> | ||||
| #include <queue> | |||||
| #include <utility> | #include <utility> | ||||
| #include <memory> | #include <memory> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| @@ -34,7 +34,6 @@ static Buffer ReadFile(const std::string &file) { | |||||
| #else | #else | ||||
| real_path_ret = realpath(common::SafeCStr(file), real_path_mem); | real_path_ret = realpath(common::SafeCStr(file), real_path_mem); | ||||
| #endif | #endif | ||||
| if (real_path_ret == nullptr) { | if (real_path_ret == nullptr) { | ||||
| MS_LOG(ERROR) << "File: " << file << " is not exist."; | MS_LOG(ERROR) << "File: " << file << " is not exist."; | ||||
| return buffer; | return buffer; | ||||
| @@ -82,6 +82,9 @@ std::shared_ptr<session::KernelGraph> AscendLaunchAtomicClean::ObtainAtomicClean | |||||
| std::vector<TypeId> output_dtypes = {}; | std::vector<TypeId> output_dtypes = {}; | ||||
| // obtain input & output shapes | // obtain input & output shapes | ||||
| size_t dtype_size = abstract::TypeIdSize(dtype_); | size_t dtype_size = abstract::TypeIdSize(dtype_); | ||||
| if (dtype_size == 0) { | |||||
| MS_LOG(EXCEPTION) << "Divide by zero."; | |||||
| } | |||||
| int64_t shape = total_size_ / dtype_size; | int64_t shape = total_size_ / dtype_size; | ||||
| std::vector<std::vector<int64_t>> input_shapes = {{shape}}; | std::vector<std::vector<int64_t>> input_shapes = {{shape}}; | ||||
| std::vector<std::vector<size_t>> output_shapes = {}; | std::vector<std::vector<size_t>> output_shapes = {}; | ||||
| @@ -64,8 +64,6 @@ void Bucket::Launch() { | |||||
| MS_LOG(INFO) << "Bucket launch cost:" << (GetTime() - start) * 1e6 << " us"; | MS_LOG(INFO) << "Bucket launch cost:" << (GetTime() - start) * 1e6 << " us"; | ||||
| } | } | ||||
| // TODO(caifubi): float16 grad cast to float32 grad | |||||
| void Bucket::UpdateTensorAddr() { | void Bucket::UpdateTensorAddr() { | ||||
| if (grad_tensor_list_.size() != bucket_size_ || new_tensor_output_addrs_.size() != bucket_size_) { | if (grad_tensor_list_.size() != bucket_size_ || new_tensor_output_addrs_.size() != bucket_size_) { | ||||
| MS_LOG(EXCEPTION) << "grad_tensor_list size:" << grad_tensor_list_.size() | MS_LOG(EXCEPTION) << "grad_tensor_list size:" << grad_tensor_list_.size() | ||||
| @@ -80,7 +78,6 @@ void Bucket::UpdateTensorAddr() { | |||||
| // release old addr and manage addr by this Bucket. | // release old addr and manage addr by this Bucket. | ||||
| MS_EXCEPTION_IF_NULL(device_address); | MS_EXCEPTION_IF_NULL(device_address); | ||||
| auto origin_dev_ptr = device_address->GetMutablePtr(); | auto origin_dev_ptr = device_address->GetMutablePtr(); | ||||
| // FreeDeviceMem(origin_dev_ptr); | |||||
| tensor_old_addr_list_.emplace_back(origin_dev_ptr); | tensor_old_addr_list_.emplace_back(origin_dev_ptr); | ||||
| device_address->from_mem_pool_ = false; | device_address->from_mem_pool_ = false; | ||||
| device_address->set_ptr(new_tensor_output_addrs_[i]); | device_address->set_ptr(new_tensor_output_addrs_[i]); | ||||
| @@ -29,6 +29,9 @@ std::shared_ptr<session::KernelGraph> LaunchMul::ObtainMulKernelGraph() { | |||||
| // obtain input & output shapes | // obtain input & output shapes | ||||
| size_t dtype_size = abstract::TypeIdSize(dtype_); | size_t dtype_size = abstract::TypeIdSize(dtype_); | ||||
| int64_t shape = total_size_ / dtype_size; | int64_t shape = total_size_ / dtype_size; | ||||
| if (dtype_size == 0) { | |||||
| MS_LOG(EXCEPTION) << "Divide by zero."; | |||||
| } | |||||
| std::vector<std::vector<int64_t>> input_shapes = {{shape}, {1}}; | std::vector<std::vector<int64_t>> input_shapes = {{shape}, {1}}; | ||||
| std::vector<std::vector<size_t>> output_shapes = {{static_cast<size_t>(shape)}}; | std::vector<std::vector<size_t>> output_shapes = {{static_cast<size_t>(shape)}}; | ||||
| auto mul_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp( | auto mul_graph = session::SingleKernelGraph::ConstructKernelGraphBasedOnSingleOp( | ||||