| @@ -68,7 +68,6 @@ ParameterPtr CPUSession::CreateNewParameterFromParameter(const AnfNodePtr &anf, | |||
| MS_LOG(EXCEPTION) << "anf[" << anf->DebugString() << "] is not a parameter"; | |||
| } | |||
| auto valid_inputs = graph->MutableValidInputs(); | |||
| MS_EXCEPTION_IF_NULL(valid_inputs); | |||
| auto graph_inputs = graph->MutableInputs(); | |||
| MS_EXCEPTION_IF_NULL(graph_inputs); | |||
| TraceManager::DebugTrace(std::make_shared<TraceCopy>(anf->debug_info())); | |||
| @@ -113,7 +113,10 @@ class RunOpsInGraphTask : public Task { | |||
| class RunOpTask : public Task { | |||
| public: | |||
| RunOpTask() { type_ = kRunOp; } | |||
| ~RunOpTask() override = default; | |||
| ~RunOpTask() override { | |||
| op_run_info_ = nullptr; | |||
| input_tensors_ = nullptr; | |||
| } | |||
| void Run() override; | |||
| OpRunInfo *op_run_info_{nullptr}; | |||
| GraphInfo graph_info_; | |||
| @@ -26,7 +26,7 @@ class Executor; | |||
| class ExecutorManager { | |||
| public: | |||
| static ExecutorManager &Instance() { | |||
| static ExecutorManager instance; | |||
| static ExecutorManager instance{}; | |||
| return instance; | |||
| } | |||
| std::shared_ptr<Executor> GetExecutor(const std::string &device_name, uint32_t device_id); | |||
| @@ -349,8 +349,8 @@ class KernelGraph : public FuncGraph { | |||
| bool HasPostGraph() const { return !post_graphs_.empty(); } | |||
| void IncPreGraphFinishedCount() { pre_graph_finished_count_++; } | |||
| void IncPostGraphFinishedCount() { post_graph_finished_count_++; } | |||
| void IncPreGraphFinishedCount() { ++pre_graph_finished_count_; } | |||
| void IncPostGraphFinishedCount() { ++post_graph_finished_count_; } | |||
| void ResetGraphRunningStatus() { | |||
| first_step_ = false; | |||
| post_graph_finished_count_ = 0; | |||
| @@ -232,13 +232,24 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> { | |||
| virtual void UnifyMindIR(const KernelGraphPtr &graph); | |||
| virtual void FinalOptimize(const KernelGraphPtr &graph) const; | |||
| virtual GraphId CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) { return 0; } | |||
| virtual GraphId CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) { return kInvalidGraphId; } | |||
| virtual GraphId CompileGraphImpl(NotNull<FuncGraphPtr>) { return kInvalidGraphId; } | |||
| virtual void BuildGraphImpl(GraphId) {} | |||
| virtual void PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph, | |||
| const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) {} | |||
| const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| MS_EXCEPTION_IF_NULL(outputs); | |||
| MS_LOG(INFO) << "Call default PreExecuteGraph with input size: " << inputs.size(); | |||
| } | |||
| virtual void PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph, | |||
| const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) {} | |||
| virtual void ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) {} | |||
| const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| MS_EXCEPTION_IF_NULL(outputs); | |||
| MS_LOG(INFO) << "Call default PostExecuteGraph with input size: " << inputs.size(); | |||
| } | |||
| virtual void ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); } | |||
| void RunGraphImpl(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs); | |||
| virtual KernelGraphPtr BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info, | |||
| const std::vector<tensor::TensorPtr> &input_tensors, | |||
| @@ -275,7 +286,11 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> { | |||
| virtual void ExecuteAllTaskInQueue() {} | |||
| virtual void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph, | |||
| const std::vector<tensor::TensorPtr> &inputs_const) const {} | |||
| const std::vector<tensor::TensorPtr> &inputs_const) const { | |||
| MS_EXCEPTION_IF_NULL(kernel_graph); | |||
| MS_LOG(INFO) << "Call default LoadInputData with input size: " << inputs_const.size(); | |||
| } | |||
| void UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_graph, VectorRef *const outputs, | |||
| const std::vector<tensor::TensorPtr> &input_tensors, | |||
| std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) const; | |||
| @@ -33,7 +33,7 @@ | |||
| namespace mindspore { | |||
| namespace common { | |||
| enum Status { FAIL = -1, SUCCESS = 0 }; | |||
| using Task = std::function<int()>; | |||
| using Task = std::function<Status()>; | |||
| struct ThreadContext { | |||
| std::mutex mutex; | |||
| @@ -48,7 +48,7 @@ class ThreadPool { | |||
| ThreadPool &operator=(const ThreadPool &) = delete; | |||
| static ThreadPool &GetInstance(); | |||
| bool SyncRun(const std::vector<Task> &tasks); | |||
| size_t GetSyncRunThreadNum() { return max_thread_num_; } | |||
| size_t GetSyncRunThreadNum() const { return max_thread_num_; } | |||
| void ClearThreadPool(); | |||
| private: | |||
| @@ -172,7 +172,7 @@ struct Address { | |||
| }; | |||
| using AddressPtr = std::shared_ptr<Address>; | |||
| using AddressPtrList = std::vector<AddressPtr>; | |||
| using StreamType = void *; | |||
| // The memory info of kernel launch. | |||
| struct KernelLaunchInfo { | |||
| AddressPtrList inputs_; | |||
| @@ -215,8 +215,8 @@ class KernelMod { | |||
| const std::vector<AddressPtr> &GetInputsAddr() { return inputs_addr_; } | |||
| const std::vector<AddressPtr> &GetWorkSpacesAddr() { return workspaces_addr_; } | |||
| const std::vector<AddressPtr> &GetOutputsAddr() { return outputs_addr_; } | |||
| void SetStream(void *stream) { stream_ = stream; } | |||
| void *GetStream() const { return stream_; } | |||
| void set_stream(StreamType stream) { stream_ = stream; } | |||
| StreamType stream() const { return stream_; } | |||
| void SetAtomicCleanNodes(const std::vector<CNodePtr> &atomic_clean_node); | |||
| protected: | |||
| @@ -226,7 +226,7 @@ class KernelMod { | |||
| std::string unique_name_; | |||
| std::string fullname_; | |||
| bool is_monad_{false}; | |||
| void *stream_{nullptr}; | |||
| StreamType stream_{nullptr}; | |||
| AnfNodeWeakPtr anf_node_; | |||
| std::map<uint32_t, tensor::TensorPtr> depend_tensor_map_; | |||
| std::vector<CNodeWeakPtr> atomic_clean_nodes_; | |||
| @@ -765,9 +765,9 @@ void AscendKernelRuntime::SetKernelModStream(const std::vector<CNodePtr> &kernel | |||
| MS_LOG(EXCEPTION) << "create communication stream failed, ret:" << ret; | |||
| } | |||
| stream_id_map_[stream_id] = stream; | |||
| ascend_kernel_mod->SetStream(stream); | |||
| ascend_kernel_mod->set_stream(stream); | |||
| } else { | |||
| ascend_kernel_mod->SetStream(iter->second); | |||
| ascend_kernel_mod->set_stream(iter->second); | |||
| } | |||
| if (stream_id > 0) { | |||
| last_kernel[stream_id_map_[stream_id]] = i; | |||
| @@ -112,7 +112,7 @@ void ProfilingReporter::ReportStepPoint(const std::vector<std::shared_ptr<StepPo | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| // The tag of this function should report all tags, it will be saved to ts_track.data.<device_id>.slice_<index> | |||
| // The first step index set to 1, here keep same with ge | |||
| rtProfilerTraceEx(1, graph_id_, point->tag(), kernel_mod->GetStream()); | |||
| rtProfilerTraceEx(1, graph_id_, point->tag(), kernel_mod->stream()); | |||
| MS_LOG(INFO) << "Report step point, graph id: " << graph_id_ << ", op name: " << point->op_name() | |||
| << ", stream id: " << GetStreamId(op_name) << ", task id: " << GetTaskId(op_name) | |||
| @@ -765,7 +765,7 @@ void *AscendDeviceContext::GetKernelStream(const CNodePtr &node) const { | |||
| if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) { | |||
| return compute_stream_; | |||
| } else { | |||
| auto stream = kernel_mod->GetStream(); | |||
| auto stream = kernel_mod->stream(); | |||
| if (stream == nullptr) { | |||
| stream = compute_stream_; | |||
| MS_LOG(INFO) << "Assign default compute stream for node " << node->fullname_with_scope(); | |||
| @@ -15,7 +15,6 @@ | |||
| */ | |||
| #include "plugin/device/cpu/hal/device/cpu_memory_manager.h" | |||
| #include <memory> | |||
| #include "backend/common/session/anf_runtime_algorithm.h" | |||
| #include "utils/ms_context.h" | |||
| #include "utils/convert_utils.h" | |||
| @@ -38,8 +38,8 @@ static constexpr float kEps = 1e-30; | |||
| void FusedAdaFactorCpuKernelMod::InitInputOutputSize(const CNodePtr &kernel_node) { | |||
| NativeCpuKernelMod::InitInputOutputSize(kernel_node); | |||
| (void)workspace_size_list_.emplace_back(elem_num_ * kSizeFloat32); | |||
| (void)workspace_size_list_.emplace_back(elem_num_ / last_row_dim_size_ * kSizeFloat32); | |||
| (void)workspace_size_list_.emplace_back(elem_num_ / last_col_dim_size_ * kSizeFloat32); | |||
| (void)workspace_size_list_.emplace_back((elem_num_ / last_row_dim_size_) * kSizeFloat32); | |||
| (void)workspace_size_list_.emplace_back((elem_num_ / last_col_dim_size_) * kSizeFloat32); | |||
| } | |||
| void FusedAdaFactorCpuKernelMod::InitKernel(const CNodePtr &kernel_node) { | |||
| @@ -156,7 +156,7 @@ void FusedAdaFactorCpuKernelMod::FactorUpdate(float *update, const std::vector<A | |||
| task = [&](size_t start, size_t end) { | |||
| for (size_t i = start; i < end; ++i) { | |||
| float row_reduce = 0; | |||
| size_t reduce_start = i / row_dim_size * last_row_col_size + i % row_dim_size; | |||
| size_t reduce_start = (i / row_dim_size) * last_row_col_size + i % row_dim_size; | |||
| for (size_t j = 0; j < col_dim_size; ++j) { | |||
| row_reduce += update[reduce_start + j * row_dim_size]; | |||
| } | |||
| @@ -157,7 +157,7 @@ void ScatterUpdateCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inpu | |||
| size_t once_compute_size = (num_units_ + max_thread_num - 1) / max_thread_num; | |||
| while (start < num_units_) { | |||
| size_t end = (start + once_compute_size) > num_units_ ? num_units_ : (start + once_compute_size); | |||
| auto task = [¶ms, start, end]() -> int { | |||
| auto task = [¶ms, start, end]() { | |||
| Compute<T>(¶ms, start, end); | |||
| return common::SUCCESS; | |||
| }; | |||
| @@ -52,10 +52,10 @@ void ComputeFtrl(MultiThreadComputeParams<T> *input_params, size_t start, size_t | |||
| float y; | |||
| if (lr_power == -0.5) { | |||
| y = std::sqrt(accum_new); | |||
| linear[j] += summed_grad - (y - std::sqrt(accum[j])) / lr * var[j]; | |||
| linear[j] += (summed_grad - (y - std::sqrt(accum[j])) / lr) * var[j]; | |||
| } else { | |||
| y = std::pow(accum_new, -lr_power); | |||
| linear[j] += summed_grad - (y - std::pow(accum[j], -lr_power)) / lr * var[j]; | |||
| linear[j] += (summed_grad - (y - std::pow(accum[j], -lr_power)) / lr) * var[j]; | |||
| } | |||
| accum[j] = accum_new; | |||
| auto x = Sign(linear[j]) * l1 - linear[j]; | |||
| @@ -147,7 +147,8 @@ void StridedSliceCpuKernelMod::InitSliceParam(const CNodePtr &kernel_node, std:: | |||
| slice_param_.num_axes_ = DIMENSION_8D; | |||
| } | |||
| int StridedSliceCpuKernelMod::RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int start_pos) { | |||
| common::Status StridedSliceCpuKernelMod::RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, | |||
| int start_pos) { | |||
| int begin_index = slice_param_.begins_[split_axis_]; | |||
| int inner_size = inner_ * data_size_; | |||
| const uint8_t *cur_in_ptr = input_addr + (start_pos * input_shape_[split_axis_] + begin_index) * inner_size; | |||
| @@ -162,7 +163,8 @@ int StridedSliceCpuKernelMod::RunTaskOnOuter(const uint8_t *input_addr, uint8_t | |||
| return common::SUCCESS; | |||
| } | |||
| int StridedSliceCpuKernelMod::RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int start_pos) { | |||
| common::Status StridedSliceCpuKernelMod::RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, | |||
| int start_pos) { | |||
| int begin_index = slice_param_.begins_[split_axis_]; | |||
| int inner_size = inner_ * data_size_; | |||
| const uint8_t *cur_in_ptr = input_addr + (start_pos * slice_param_.strides_[split_axis_] + begin_index) * inner_size; | |||
| @@ -179,7 +181,7 @@ int StridedSliceCpuKernelMod::RunTaskOnSplitAxis(const uint8_t *input_addr, uint | |||
| void StridedSliceCpuKernelMod::ParallelRun(const uint8_t *input_addr, uint8_t *output_addr, int thread_num) { | |||
| int thread_index = 0; | |||
| std::vector<common::Task> tasks; | |||
| std::function<int(StridedSliceCpuKernelMod *, const uint8_t *, uint8_t *, int)> execute_func; | |||
| std::function<common::Status(StridedSliceCpuKernelMod *, const uint8_t *, uint8_t *, int)> execute_func; | |||
| if (parallel_strategy_ == kOnOuter) { | |||
| execute_func = &StridedSliceCpuKernelMod::RunTaskOnOuter; | |||
| } else if (parallel_strategy_ == kOnSplitAxis) { | |||
| @@ -42,8 +42,8 @@ class StridedSliceCpuKernelMod : public NativeCpuKernelMod { | |||
| bool MatchParallelPattern(); | |||
| void InitParallelParam(); | |||
| void ParallelRun(const uint8_t *input_addr, uint8_t *output_addr, int thread_num); | |||
| int RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int start_pos); | |||
| int RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int start_pos); | |||
| common::Status RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int start_pos); | |||
| common::Status RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int start_pos); | |||
| void ParseMasks(const CNodePtr &kernel_node); | |||
| TypeId dtype_; | |||
| @@ -99,7 +99,7 @@ void UniqueCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs, con | |||
| params->workspace_idx_ = reinterpret_cast<IndexType *>(workspace[2]->addr); | |||
| params->output_ = reinterpret_cast<DataType *>(outputs[0]->addr); | |||
| params->inverse_idx_ = reinterpret_cast<IndexType *>(outputs[1]->addr); | |||
| params->input_size_ = static_cast<IndexType>(input_size_); | |||
| params->input_size_ = input_size_; | |||
| params->output_size_ = 0; | |||
| params->thread_num_ = common::ThreadPool::GetInstance().GetSyncRunThreadNum(); | |||
| @@ -36,8 +36,8 @@ struct UniqueParam { | |||
| IndexType *inverse_idx_{nullptr}; | |||
| DataType *workspace_{nullptr}; | |||
| IndexType *workspace_idx_{nullptr}; | |||
| IndexType input_size_{0}; | |||
| IndexType output_size_{0}; | |||
| size_t input_size_{0}; | |||
| size_t output_size_{0}; | |||
| size_t thread_num_{0}; | |||
| bool need_sort_{true}; | |||
| }; | |||
| @@ -48,15 +48,14 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| ~UniqueCpuKernelMod() override = default; | |||
| void InitKernel(const CNodePtr &kernel_node) override; | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs) override; | |||
| protected: | |||
| void InitInputOutputSize(const CNodePtr &kernel_node) override; | |||
| template <typename DataType, typename IndexType> | |||
| void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, | |||
| const std::vector<AddressPtr> &outputs); | |||
| protected: | |||
| size_t input_size_{0}; | |||
| TypeId dtype_{kTypeUnknown}; | |||
| size_t output_size_{0}; | |||
| @@ -64,16 +63,20 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| CNodeWeakPtr node_wpt_; | |||
| template <typename DataType> | |||
| static size_t BucketId(DataType data, size_t bucket_num) { | |||
| static size_t BucketId(DataType input, size_t bucket_num) { | |||
| if (input < 0) { | |||
| input = -input; | |||
| } | |||
| size_t data = static_cast<size_t>(input); | |||
| if (bucket_num < 1) { | |||
| return static_cast<size_t>(data); | |||
| return data; | |||
| } | |||
| return static_cast<size_t>(data) % bucket_num; | |||
| return data % bucket_num; | |||
| } | |||
| template <typename DataType, typename IndexType> | |||
| static void CalculateEachBucketSize(const std::shared_ptr<UniqueParam<DataType, IndexType>> ¶ms, | |||
| std::vector<IndexType> *each_bucket_size) { | |||
| std::vector<size_t> *each_bucket_size) { | |||
| MS_EXCEPTION_IF_NULL(params); | |||
| MS_EXCEPTION_IF_NULL(params->input_); | |||
| MS_EXCEPTION_IF_NULL(each_bucket_size); | |||
| @@ -81,17 +84,16 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| if (params->input_size_ < 1) { | |||
| return; | |||
| } | |||
| for (IndexType i = 0; i < params->input_size_; ++i) { | |||
| for (size_t i = 0; i < params->input_size_; ++i) { | |||
| auto bucket_id = BucketId(params->input_[i], bucket_num); | |||
| each_bucket_size->at(bucket_id)++; | |||
| } | |||
| } | |||
| template <typename DataType, typename IndexType> | |||
| static void SplitAndCalculateBucketSize( | |||
| const std::shared_ptr<UniqueParam<DataType, IndexType>> ¶ms, | |||
| std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *segments_ptr, | |||
| std::vector<std::shared_ptr<std::vector<IndexType>>> *segment_bucket_sizes_ptr) { | |||
| static void SplitAndCalculateBucketSize(const std::shared_ptr<UniqueParam<DataType, IndexType>> ¶ms, | |||
| std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *segments_ptr, | |||
| std::vector<std::shared_ptr<std::vector<size_t>>> *segment_bucket_sizes_ptr) { | |||
| MS_EXCEPTION_IF_NULL(params); | |||
| MS_EXCEPTION_IF_NULL(params->input_); | |||
| MS_EXCEPTION_IF_NULL(segments_ptr); | |||
| @@ -99,21 +101,21 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| auto &segments = *segments_ptr; | |||
| auto &segment_bucket_sizes = *segment_bucket_sizes_ptr; | |||
| IndexType input_size = params->input_size_; | |||
| size_t input_size = params->input_size_; | |||
| size_t thread_num = params->thread_num_; | |||
| if (thread_num < 1) { | |||
| MS_LOG(EXCEPTION) << "For 'Unique', thread num should be greater than 0, but got " << thread_num; | |||
| } | |||
| IndexType thread_data_size = input_size / thread_num; | |||
| size_t thread_data_size = input_size / thread_num; | |||
| size_t left_data_size = input_size % thread_num; | |||
| segments.reserve(thread_num); | |||
| segment_bucket_sizes.reserve(thread_num); | |||
| IndexType current_offset = 0; | |||
| size_t current_offset = 0; | |||
| std::vector<common::Task> tasks; | |||
| tasks.reserve(thread_num); | |||
| for (size_t i = 0; i < thread_num; ++i) { | |||
| (void)segment_bucket_sizes.emplace_back(std::make_shared<std::vector<IndexType>>(thread_num, 0)); | |||
| IndexType data_size = thread_data_size; | |||
| (void)segment_bucket_sizes.emplace_back(std::make_shared<std::vector<size_t>>(thread_num, 0)); | |||
| size_t data_size = thread_data_size; | |||
| if (i < left_data_size) { | |||
| data_size += 1; | |||
| } | |||
| @@ -132,18 +134,17 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| } | |||
| template <typename DataType, typename IndexType> | |||
| static void SegmentToBuckets(const std::shared_ptr<UniqueParam<DataType, IndexType>> &segment, | |||
| IndexType segment_offset, | |||
| static void SegmentToBuckets(const std::shared_ptr<UniqueParam<DataType, IndexType>> &segment, size_t segment_offset, | |||
| const std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> &buckets) { | |||
| MS_LOG(DEBUG) << "Start"; | |||
| MS_EXCEPTION_IF_NULL(segment); | |||
| MS_EXCEPTION_IF_NULL(segment->input_); | |||
| std::vector<IndexType> bucket_data_num(segment->thread_num_, 0); | |||
| std::vector<size_t> bucket_data_num(segment->thread_num_, 0); | |||
| auto bucket_size = buckets.size(); | |||
| if (segment->input_size_ < 1) { | |||
| return; | |||
| } | |||
| for (IndexType i = 0; i < segment->input_size_; ++i) { | |||
| for (size_t i = 0; i < segment->input_size_; ++i) { | |||
| DataType data = segment->input_[i]; | |||
| auto bucket_id = BucketId(data, segment->thread_num_); | |||
| auto bucket_index = bucket_data_num[bucket_id]; | |||
| @@ -160,7 +161,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| continue; | |||
| } | |||
| bucket->input_[bucket_index] = data; | |||
| bucket->workspace_idx_[bucket_index] = segment_offset + i; | |||
| bucket->workspace_idx_[bucket_index] = static_cast<IndexType>(segment_offset + i); | |||
| bucket_data_num[bucket_id]++; | |||
| } | |||
| MS_LOG(DEBUG) << "End"; | |||
| @@ -169,7 +170,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| template <typename DataType, typename IndexType> | |||
| static void GatherSegmentsToBuckets(const std::shared_ptr<UniqueParam<DataType, IndexType>> ¶ms, | |||
| std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *segments_ptr, | |||
| std::vector<std::shared_ptr<std::vector<IndexType>>> *segment_bucket_sizes_ptr, | |||
| std::vector<std::shared_ptr<std::vector<size_t>>> *segment_bucket_sizes_ptr, | |||
| std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *buckets_ptr) { | |||
| MS_LOG(DEBUG) << "Start"; | |||
| MS_EXCEPTION_IF_NULL(params); | |||
| @@ -186,14 +187,14 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| auto &buckets = *buckets_ptr; | |||
| auto thread_num = segments.size(); | |||
| buckets.reserve(thread_num); | |||
| std::vector<IndexType> bucket_data_size(thread_num, 0); | |||
| std::vector<size_t> bucket_data_size(thread_num, 0); | |||
| for (size_t i = 0; i < thread_num; ++i) { | |||
| for (size_t j = 0; j < thread_num; ++j) { | |||
| bucket_data_size[j] += segment_bucket_sizes[i]->at(j); | |||
| } | |||
| } | |||
| IndexType current_offset = 0; | |||
| size_t current_offset = 0; | |||
| for (size_t i = 0; i < thread_num; ++i) { | |||
| auto bucket = std::make_shared<UniqueParam<DataType, IndexType>>(); | |||
| bucket->input_ = params->output_ + current_offset; | |||
| @@ -205,7 +206,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| current_offset += bucket_data_size[i]; | |||
| (void)buckets.emplace_back(bucket); | |||
| } | |||
| std::vector<IndexType> tmp_bucket_data_size(thread_num, 0); | |||
| std::vector<size_t> tmp_bucket_data_size(thread_num, 0); | |||
| std::vector<std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>>> thread_buckets; | |||
| for (size_t i = 0; i < thread_num; ++i) { | |||
| std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> local_buckets; | |||
| @@ -251,14 +252,14 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| if (params->input_size_ < 1) { | |||
| return; | |||
| } | |||
| if (params->need_sort_) { | |||
| for (IndexType i = 0; i < params->input_size_; ++i) { | |||
| input_idx[i] = i; | |||
| if (params->need_sort_ && !std::is_same<DataType, float>::value) { | |||
| for (size_t i = 0; i < params->input_size_; ++i) { | |||
| input_idx[i] = static_cast<IndexType>(i); | |||
| } | |||
| std::sort(input_idx, input_idx + params->input_size_, | |||
| [&](IndexType left, IndexType right) { return input[left] < input[right]; }); | |||
| [&](size_t left, size_t right) { return input[left] < input[right]; }); | |||
| DataType last = input[0]; | |||
| for (IndexType i = 0; i < params->input_size_; ++i) { | |||
| for (size_t i = 0; i < params->input_size_; ++i) { | |||
| auto curr = input[input_idx[i]]; | |||
| if (i == 0 || curr != last) { | |||
| if (i != 0) { | |||
| @@ -271,11 +272,11 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| inverse_idx[input_idx[i]] = j; | |||
| } | |||
| } | |||
| params->output_size_ = j + 1; | |||
| params->output_size_ = static_cast<size_t>(j + 1); | |||
| } else { | |||
| std::unordered_map<DataType, IndexType> uniq; | |||
| uniq.reserve(params->input_size_); | |||
| for (IndexType i = 0; i < params->input_size_; ++i) { | |||
| for (size_t i = 0; i < params->input_size_; ++i) { | |||
| auto it = uniq.emplace(input[i], j); | |||
| inverse_idx[i] = it.first->second; | |||
| if (it.second) { | |||
| @@ -285,7 +286,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| for (const auto &it : uniq) { | |||
| output[it.second] = it.first; | |||
| } | |||
| params->output_size_ = j; | |||
| params->output_size_ = static_cast<size_t>(j); | |||
| } | |||
| MS_LOG(DEBUG) << "End"; | |||
| } | |||
| @@ -310,7 +311,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| template <typename DataType, typename IndexType> | |||
| static void TransformBucketReverseIndices(const std::shared_ptr<UniqueParam<DataType, IndexType>> &bucket, | |||
| const std::shared_ptr<UniqueParam<DataType, IndexType>> &result, | |||
| IndexType offset) { | |||
| size_t offset) { | |||
| MS_EXCEPTION_IF_NULL(bucket); | |||
| MS_EXCEPTION_IF_NULL(bucket->inverse_idx_); | |||
| MS_EXCEPTION_IF_NULL(bucket->workspace_idx_); | |||
| @@ -319,10 +320,14 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| if (bucket->input_size_ < 1) { | |||
| return; | |||
| } | |||
| for (IndexType i = 0; i < bucket->input_size_; ++i) { | |||
| for (size_t i = 0; i < bucket->input_size_; ++i) { | |||
| auto origin_idx = bucket->workspace_idx_[i]; | |||
| if (origin_idx >= 0 && origin_idx < result->input_size_) { | |||
| result->inverse_idx_[origin_idx] = bucket->inverse_idx_[i] + offset; | |||
| if (origin_idx < 0) { | |||
| continue; | |||
| } | |||
| size_t index = static_cast<size_t>(origin_idx); | |||
| if (index < result->input_size_) { | |||
| result->inverse_idx_[index] = bucket->inverse_idx_[i] + offset; | |||
| } | |||
| } | |||
| } | |||
| @@ -334,8 +339,8 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| MS_EXCEPTION_IF_NULL(result); | |||
| MS_EXCEPTION_IF_NULL(result->output_); | |||
| size_t thread_num = buckets.size(); | |||
| std::vector<IndexType> bucket_offsets(thread_num); | |||
| IndexType current_size = 0; | |||
| std::vector<size_t> bucket_offsets(thread_num); | |||
| size_t current_size = 0; | |||
| for (size_t i = 0; i < thread_num; ++i) { | |||
| auto bucket = buckets[i]; | |||
| MS_EXCEPTION_IF_NULL(bucket); | |||
| @@ -368,7 +373,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod { | |||
| MS_EXCEPTION_IF_NULL(params); | |||
| std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> segments; | |||
| std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> buckets; | |||
| std::vector<std::shared_ptr<std::vector<IndexType>>> segment_bucket_sizes; | |||
| std::vector<std::shared_ptr<std::vector<size_t>>> segment_bucket_sizes; | |||
| SplitAndCalculateBucketSize(params, &segments, &segment_bucket_sizes); | |||
| GatherSegmentsToBuckets(params, &segments, &segment_bucket_sizes, &buckets); | |||
| UniqueEachBucket(buckets); | |||
| @@ -1483,7 +1483,7 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph &graph, const AnfNod | |||
| auto kernel_mod = AnfAlgo::GetKernelMod(kernel); | |||
| MS_EXCEPTION_IF_NULL(kernel_mod); | |||
| KernelLaunchInfo kernel_launch_info; | |||
| auto stream = kernel_mod->GetStream(); | |||
| auto stream = kernel_mod->stream(); | |||
| if (stream == nullptr) { | |||
| if (AnfAlgo::IsCommunicationOp(kernel)) { | |||
| stream = communication_stream_; | |||
| @@ -27,6 +27,8 @@ namespace mindspore { | |||
| namespace device { | |||
| class MemHandler { | |||
| public: | |||
| MemHandler() = default; | |||
| virtual ~MemHandler() = default; | |||
| virtual size_t GetAvailableMemSize() = 0; | |||
| virtual void *MallocDevice(size_t mem_size) = 0; | |||
| virtual void FreeDevice(void *ptr) = 0; | |||
| @@ -18,12 +18,12 @@ | |||
| namespace mindspore { | |||
| MsException &MsException::Instance() { | |||
| static MsException instance; | |||
| static MsException instance{}; | |||
| return instance; | |||
| } | |||
| StaticAnalysisException &StaticAnalysisException::Instance() { | |||
| static StaticAnalysisException instance; | |||
| static StaticAnalysisException instance{}; | |||
| return instance; | |||
| } | |||
| } // namespace mindspore | |||