Browse Source

clean code

feature/build-system-rewrite
kswang 4 years ago
parent
commit
f26870d437
21 changed files with 104 additions and 79 deletions
  1. +0
    -1
      mindspore/ccsrc/backend/common/session/cpu_session.cc
  2. +4
    -1
      mindspore/ccsrc/backend/common/session/executor.h
  3. +1
    -1
      mindspore/ccsrc/backend/common/session/executor_manager.h
  4. +2
    -2
      mindspore/ccsrc/backend/common/session/kernel_graph.h
  5. +20
    -5
      mindspore/ccsrc/backend/common/session/session_basic.h
  6. +2
    -2
      mindspore/ccsrc/common/thread_pool.h
  7. +4
    -4
      mindspore/ccsrc/kernel/kernel.h
  8. +2
    -2
      mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc
  9. +1
    -1
      mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc
  10. +1
    -1
      mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_context.cc
  11. +0
    -1
      mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_memory_manager.cc
  12. +3
    -3
      mindspore/ccsrc/plugin/device/cpu/kernel/fused_ada_factor_cpu_kernel.cc
  13. +1
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_update_cpu_kernel.cc
  14. +2
    -2
      mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.cc
  15. +5
    -3
      mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.cc
  16. +2
    -2
      mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.h
  17. +1
    -1
      mindspore/ccsrc/plugin/device/cpu/kernel/unique_cpu_kernel.cc
  18. +48
    -43
      mindspore/ccsrc/plugin/device/cpu/kernel/unique_cpu_kernel.h
  19. +1
    -1
      mindspore/ccsrc/runtime/device/kernel_runtime.cc
  20. +2
    -0
      mindspore/ccsrc/runtime/device/memory_scheduler.h
  21. +2
    -2
      mindspore/core/utils/ms_exception.cc

+ 0
- 1
mindspore/ccsrc/backend/common/session/cpu_session.cc View File

@@ -68,7 +68,6 @@ ParameterPtr CPUSession::CreateNewParameterFromParameter(const AnfNodePtr &anf,
MS_LOG(EXCEPTION) << "anf[" << anf->DebugString() << "] is not a parameter";
}
auto valid_inputs = graph->MutableValidInputs();
MS_EXCEPTION_IF_NULL(valid_inputs);
auto graph_inputs = graph->MutableInputs();
MS_EXCEPTION_IF_NULL(graph_inputs);
TraceManager::DebugTrace(std::make_shared<TraceCopy>(anf->debug_info()));


+ 4
- 1
mindspore/ccsrc/backend/common/session/executor.h View File

@@ -113,7 +113,10 @@ class RunOpsInGraphTask : public Task {
class RunOpTask : public Task {
public:
RunOpTask() { type_ = kRunOp; }
~RunOpTask() override = default;
~RunOpTask() override {
op_run_info_ = nullptr;
input_tensors_ = nullptr;
}
void Run() override;
OpRunInfo *op_run_info_{nullptr};
GraphInfo graph_info_;


+ 1
- 1
mindspore/ccsrc/backend/common/session/executor_manager.h View File

@@ -26,7 +26,7 @@ class Executor;
class ExecutorManager {
public:
static ExecutorManager &Instance() {
static ExecutorManager instance;
static ExecutorManager instance{};
return instance;
}
std::shared_ptr<Executor> GetExecutor(const std::string &device_name, uint32_t device_id);


+ 2
- 2
mindspore/ccsrc/backend/common/session/kernel_graph.h View File

@@ -349,8 +349,8 @@ class KernelGraph : public FuncGraph {

bool HasPostGraph() const { return !post_graphs_.empty(); }

void IncPreGraphFinishedCount() { pre_graph_finished_count_++; }
void IncPostGraphFinishedCount() { post_graph_finished_count_++; }
void IncPreGraphFinishedCount() { ++pre_graph_finished_count_; }
void IncPostGraphFinishedCount() { ++post_graph_finished_count_; }
void ResetGraphRunningStatus() {
first_step_ = false;
post_graph_finished_count_ = 0;


+ 20
- 5
mindspore/ccsrc/backend/common/session/session_basic.h View File

@@ -232,13 +232,24 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
virtual void UnifyMindIR(const KernelGraphPtr &graph);
virtual void FinalOptimize(const KernelGraphPtr &graph) const;
virtual GraphId CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) { return 0; }
virtual GraphId CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) { return kInvalidGraphId; }
virtual GraphId CompileGraphImpl(NotNull<FuncGraphPtr>) { return kInvalidGraphId; }
virtual void BuildGraphImpl(GraphId) {}
virtual void PreExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) {}
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) {
MS_EXCEPTION_IF_NULL(kernel_graph);
MS_EXCEPTION_IF_NULL(outputs);
MS_LOG(INFO) << "Call default PreExecuteGraph with input size: " << inputs.size();
}

virtual void PostExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) {}
virtual void ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) {}
const std::vector<tensor::TensorPtr> &inputs, VectorRef *const outputs) {
MS_EXCEPTION_IF_NULL(kernel_graph);
MS_EXCEPTION_IF_NULL(outputs);
MS_LOG(INFO) << "Call default PostExecuteGraph with input size: " << inputs.size();
}

virtual void ExecuteGraph(const std::shared_ptr<KernelGraph> &kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); }

void RunGraphImpl(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs);
virtual KernelGraphPtr BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
const std::vector<tensor::TensorPtr> &input_tensors,
@@ -275,7 +286,11 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
virtual void ExecuteAllTaskInQueue() {}

virtual void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
const std::vector<tensor::TensorPtr> &inputs_const) const {}
const std::vector<tensor::TensorPtr> &inputs_const) const {
MS_EXCEPTION_IF_NULL(kernel_graph);
MS_LOG(INFO) << "Call default LoadInputData with input size: " << inputs_const.size();
}

void UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_graph, VectorRef *const outputs,
const std::vector<tensor::TensorPtr> &input_tensors,
std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) const;


+ 2
- 2
mindspore/ccsrc/common/thread_pool.h View File

@@ -33,7 +33,7 @@
namespace mindspore {
namespace common {
enum Status { FAIL = -1, SUCCESS = 0 };
using Task = std::function<int()>;
using Task = std::function<Status()>;

struct ThreadContext {
std::mutex mutex;
@@ -48,7 +48,7 @@ class ThreadPool {
ThreadPool &operator=(const ThreadPool &) = delete;
static ThreadPool &GetInstance();
bool SyncRun(const std::vector<Task> &tasks);
size_t GetSyncRunThreadNum() { return max_thread_num_; }
size_t GetSyncRunThreadNum() const { return max_thread_num_; }
void ClearThreadPool();

private:


+ 4
- 4
mindspore/ccsrc/kernel/kernel.h View File

@@ -172,7 +172,7 @@ struct Address {
};
using AddressPtr = std::shared_ptr<Address>;
using AddressPtrList = std::vector<AddressPtr>;
using StreamType = void *;
// The memory info of kernel launch.
struct KernelLaunchInfo {
AddressPtrList inputs_;
@@ -215,8 +215,8 @@ class KernelMod {
const std::vector<AddressPtr> &GetInputsAddr() { return inputs_addr_; }
const std::vector<AddressPtr> &GetWorkSpacesAddr() { return workspaces_addr_; }
const std::vector<AddressPtr> &GetOutputsAddr() { return outputs_addr_; }
void SetStream(void *stream) { stream_ = stream; }
void *GetStream() const { return stream_; }
void set_stream(StreamType stream) { stream_ = stream; }
StreamType stream() const { return stream_; }
void SetAtomicCleanNodes(const std::vector<CNodePtr> &atomic_clean_node);

protected:
@@ -226,7 +226,7 @@ class KernelMod {
std::string unique_name_;
std::string fullname_;
bool is_monad_{false};
void *stream_{nullptr};
StreamType stream_{nullptr};
AnfNodeWeakPtr anf_node_;
std::map<uint32_t, tensor::TensorPtr> depend_tensor_map_;
std::vector<CNodeWeakPtr> atomic_clean_nodes_;


+ 2
- 2
mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_kernel_runtime.cc View File

@@ -765,9 +765,9 @@ void AscendKernelRuntime::SetKernelModStream(const std::vector<CNodePtr> &kernel
MS_LOG(EXCEPTION) << "create communication stream failed, ret:" << ret;
}
stream_id_map_[stream_id] = stream;
ascend_kernel_mod->SetStream(stream);
ascend_kernel_mod->set_stream(stream);
} else {
ascend_kernel_mod->SetStream(iter->second);
ascend_kernel_mod->set_stream(iter->second);
}
if (stream_id > 0) {
last_kernel[stream_id_map_[stream_id]] = i;


+ 1
- 1
mindspore/ccsrc/plugin/device/ascend/hal/device/profiling/profiling_reporter.cc View File

@@ -112,7 +112,7 @@ void ProfilingReporter::ReportStepPoint(const std::vector<std::shared_ptr<StepPo
MS_EXCEPTION_IF_NULL(kernel_mod);
// The tag of this function should report all tags, it will be saved to ts_track.data.<device_id>.slice_<index>
// The first step index set to 1, here keep same with ge
rtProfilerTraceEx(1, graph_id_, point->tag(), kernel_mod->GetStream());
rtProfilerTraceEx(1, graph_id_, point->tag(), kernel_mod->stream());

MS_LOG(INFO) << "Report step point, graph id: " << graph_id_ << ", op name: " << point->op_name()
<< ", stream id: " << GetStreamId(op_name) << ", task id: " << GetTaskId(op_name)


+ 1
- 1
mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_device_context.cc View File

@@ -765,7 +765,7 @@ void *AscendDeviceContext::GetKernelStream(const CNodePtr &node) const {
if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
return compute_stream_;
} else {
auto stream = kernel_mod->GetStream();
auto stream = kernel_mod->stream();
if (stream == nullptr) {
stream = compute_stream_;
MS_LOG(INFO) << "Assign default compute stream for node " << node->fullname_with_scope();


+ 0
- 1
mindspore/ccsrc/plugin/device/cpu/hal/device/cpu_memory_manager.cc View File

@@ -15,7 +15,6 @@
*/

#include "plugin/device/cpu/hal/device/cpu_memory_manager.h"
#include <memory>
#include "backend/common/session/anf_runtime_algorithm.h"
#include "utils/ms_context.h"
#include "utils/convert_utils.h"


+ 3
- 3
mindspore/ccsrc/plugin/device/cpu/kernel/fused_ada_factor_cpu_kernel.cc View File

@@ -38,8 +38,8 @@ static constexpr float kEps = 1e-30;
void FusedAdaFactorCpuKernelMod::InitInputOutputSize(const CNodePtr &kernel_node) {
NativeCpuKernelMod::InitInputOutputSize(kernel_node);
(void)workspace_size_list_.emplace_back(elem_num_ * kSizeFloat32);
(void)workspace_size_list_.emplace_back(elem_num_ / last_row_dim_size_ * kSizeFloat32);
(void)workspace_size_list_.emplace_back(elem_num_ / last_col_dim_size_ * kSizeFloat32);
(void)workspace_size_list_.emplace_back((elem_num_ / last_row_dim_size_) * kSizeFloat32);
(void)workspace_size_list_.emplace_back((elem_num_ / last_col_dim_size_) * kSizeFloat32);
}

void FusedAdaFactorCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
@@ -156,7 +156,7 @@ void FusedAdaFactorCpuKernelMod::FactorUpdate(float *update, const std::vector<A
task = [&](size_t start, size_t end) {
for (size_t i = start; i < end; ++i) {
float row_reduce = 0;
size_t reduce_start = i / row_dim_size * last_row_col_size + i % row_dim_size;
size_t reduce_start = (i / row_dim_size) * last_row_col_size + i % row_dim_size;
for (size_t j = 0; j < col_dim_size; ++j) {
row_reduce += update[reduce_start + j * row_dim_size];
}


+ 1
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/scatter_nd_update_cpu_kernel.cc View File

@@ -157,7 +157,7 @@ void ScatterUpdateCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inpu
size_t once_compute_size = (num_units_ + max_thread_num - 1) / max_thread_num;
while (start < num_units_) {
size_t end = (start + once_compute_size) > num_units_ ? num_units_ : (start + once_compute_size);
auto task = [&params, start, end]() -> int {
auto task = [&params, start, end]() {
Compute<T>(&params, start, end);
return common::SUCCESS;
};


+ 2
- 2
mindspore/ccsrc/plugin/device/cpu/kernel/sparse_apply_ftrl_cpu_kernel.cc View File

@@ -52,10 +52,10 @@ void ComputeFtrl(MultiThreadComputeParams<T> *input_params, size_t start, size_t
float y;
if (lr_power == -0.5) {
y = std::sqrt(accum_new);
linear[j] += summed_grad - (y - std::sqrt(accum[j])) / lr * var[j];
linear[j] += (summed_grad - (y - std::sqrt(accum[j])) / lr) * var[j];
} else {
y = std::pow(accum_new, -lr_power);
linear[j] += summed_grad - (y - std::pow(accum[j], -lr_power)) / lr * var[j];
linear[j] += (summed_grad - (y - std::pow(accum[j], -lr_power)) / lr) * var[j];
}
accum[j] = accum_new;
auto x = Sign(linear[j]) * l1 - linear[j];


+ 5
- 3
mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.cc View File

@@ -147,7 +147,8 @@ void StridedSliceCpuKernelMod::InitSliceParam(const CNodePtr &kernel_node, std::
slice_param_.num_axes_ = DIMENSION_8D;
}

int StridedSliceCpuKernelMod::RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int start_pos) {
common::Status StridedSliceCpuKernelMod::RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr,
int start_pos) {
int begin_index = slice_param_.begins_[split_axis_];
int inner_size = inner_ * data_size_;
const uint8_t *cur_in_ptr = input_addr + (start_pos * input_shape_[split_axis_] + begin_index) * inner_size;
@@ -162,7 +163,8 @@ int StridedSliceCpuKernelMod::RunTaskOnOuter(const uint8_t *input_addr, uint8_t
return common::SUCCESS;
}

int StridedSliceCpuKernelMod::RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int start_pos) {
common::Status StridedSliceCpuKernelMod::RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr,
int start_pos) {
int begin_index = slice_param_.begins_[split_axis_];
int inner_size = inner_ * data_size_;
const uint8_t *cur_in_ptr = input_addr + (start_pos * slice_param_.strides_[split_axis_] + begin_index) * inner_size;
@@ -179,7 +181,7 @@ int StridedSliceCpuKernelMod::RunTaskOnSplitAxis(const uint8_t *input_addr, uint
void StridedSliceCpuKernelMod::ParallelRun(const uint8_t *input_addr, uint8_t *output_addr, int thread_num) {
int thread_index = 0;
std::vector<common::Task> tasks;
std::function<int(StridedSliceCpuKernelMod *, const uint8_t *, uint8_t *, int)> execute_func;
std::function<common::Status(StridedSliceCpuKernelMod *, const uint8_t *, uint8_t *, int)> execute_func;
if (parallel_strategy_ == kOnOuter) {
execute_func = &StridedSliceCpuKernelMod::RunTaskOnOuter;
} else if (parallel_strategy_ == kOnSplitAxis) {


+ 2
- 2
mindspore/ccsrc/plugin/device/cpu/kernel/stridedslice_cpu_kernel.h View File

@@ -42,8 +42,8 @@ class StridedSliceCpuKernelMod : public NativeCpuKernelMod {
bool MatchParallelPattern();
void InitParallelParam();
void ParallelRun(const uint8_t *input_addr, uint8_t *output_addr, int thread_num);
int RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int start_pos);
int RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int start_pos);
common::Status RunTaskOnOuter(const uint8_t *input_addr, uint8_t *output_addr, int start_pos);
common::Status RunTaskOnSplitAxis(const uint8_t *input_addr, uint8_t *output_addr, int start_pos);
void ParseMasks(const CNodePtr &kernel_node);

TypeId dtype_;


+ 1
- 1
mindspore/ccsrc/plugin/device/cpu/kernel/unique_cpu_kernel.cc View File

@@ -99,7 +99,7 @@ void UniqueCpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs, con
params->workspace_idx_ = reinterpret_cast<IndexType *>(workspace[2]->addr);
params->output_ = reinterpret_cast<DataType *>(outputs[0]->addr);
params->inverse_idx_ = reinterpret_cast<IndexType *>(outputs[1]->addr);
params->input_size_ = static_cast<IndexType>(input_size_);
params->input_size_ = input_size_;
params->output_size_ = 0;

params->thread_num_ = common::ThreadPool::GetInstance().GetSyncRunThreadNum();


+ 48
- 43
mindspore/ccsrc/plugin/device/cpu/kernel/unique_cpu_kernel.h View File

@@ -36,8 +36,8 @@ struct UniqueParam {
IndexType *inverse_idx_{nullptr};
DataType *workspace_{nullptr};
IndexType *workspace_idx_{nullptr};
IndexType input_size_{0};
IndexType output_size_{0};
size_t input_size_{0};
size_t output_size_{0};
size_t thread_num_{0};
bool need_sort_{true};
};
@@ -48,15 +48,14 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
~UniqueCpuKernelMod() override = default;

void InitKernel(const CNodePtr &kernel_node) override;
void InitInputOutputSize(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;

protected:
void InitInputOutputSize(const CNodePtr &kernel_node) override;
template <typename DataType, typename IndexType>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs);

protected:
size_t input_size_{0};
TypeId dtype_{kTypeUnknown};
size_t output_size_{0};
@@ -64,16 +63,20 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
CNodeWeakPtr node_wpt_;

template <typename DataType>
static size_t BucketId(DataType data, size_t bucket_num) {
static size_t BucketId(DataType input, size_t bucket_num) {
if (input < 0) {
input = -input;
}
size_t data = static_cast<size_t>(input);
if (bucket_num < 1) {
return static_cast<size_t>(data);
return data;
}
return static_cast<size_t>(data) % bucket_num;
return data % bucket_num;
}

template <typename DataType, typename IndexType>
static void CalculateEachBucketSize(const std::shared_ptr<UniqueParam<DataType, IndexType>> &params,
std::vector<IndexType> *each_bucket_size) {
std::vector<size_t> *each_bucket_size) {
MS_EXCEPTION_IF_NULL(params);
MS_EXCEPTION_IF_NULL(params->input_);
MS_EXCEPTION_IF_NULL(each_bucket_size);
@@ -81,17 +84,16 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
if (params->input_size_ < 1) {
return;
}
for (IndexType i = 0; i < params->input_size_; ++i) {
for (size_t i = 0; i < params->input_size_; ++i) {
auto bucket_id = BucketId(params->input_[i], bucket_num);
each_bucket_size->at(bucket_id)++;
}
}

template <typename DataType, typename IndexType>
static void SplitAndCalculateBucketSize(
const std::shared_ptr<UniqueParam<DataType, IndexType>> &params,
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *segments_ptr,
std::vector<std::shared_ptr<std::vector<IndexType>>> *segment_bucket_sizes_ptr) {
static void SplitAndCalculateBucketSize(const std::shared_ptr<UniqueParam<DataType, IndexType>> &params,
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *segments_ptr,
std::vector<std::shared_ptr<std::vector<size_t>>> *segment_bucket_sizes_ptr) {
MS_EXCEPTION_IF_NULL(params);
MS_EXCEPTION_IF_NULL(params->input_);
MS_EXCEPTION_IF_NULL(segments_ptr);
@@ -99,21 +101,21 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
auto &segments = *segments_ptr;
auto &segment_bucket_sizes = *segment_bucket_sizes_ptr;

IndexType input_size = params->input_size_;
size_t input_size = params->input_size_;
size_t thread_num = params->thread_num_;
if (thread_num < 1) {
MS_LOG(EXCEPTION) << "For 'Unique', thread num should be greater than 0, but got " << thread_num;
}
IndexType thread_data_size = input_size / thread_num;
size_t thread_data_size = input_size / thread_num;
size_t left_data_size = input_size % thread_num;
segments.reserve(thread_num);
segment_bucket_sizes.reserve(thread_num);
IndexType current_offset = 0;
size_t current_offset = 0;
std::vector<common::Task> tasks;
tasks.reserve(thread_num);
for (size_t i = 0; i < thread_num; ++i) {
(void)segment_bucket_sizes.emplace_back(std::make_shared<std::vector<IndexType>>(thread_num, 0));
IndexType data_size = thread_data_size;
(void)segment_bucket_sizes.emplace_back(std::make_shared<std::vector<size_t>>(thread_num, 0));
size_t data_size = thread_data_size;
if (i < left_data_size) {
data_size += 1;
}
@@ -132,18 +134,17 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
}

template <typename DataType, typename IndexType>
static void SegmentToBuckets(const std::shared_ptr<UniqueParam<DataType, IndexType>> &segment,
IndexType segment_offset,
static void SegmentToBuckets(const std::shared_ptr<UniqueParam<DataType, IndexType>> &segment, size_t segment_offset,
const std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> &buckets) {
MS_LOG(DEBUG) << "Start";
MS_EXCEPTION_IF_NULL(segment);
MS_EXCEPTION_IF_NULL(segment->input_);
std::vector<IndexType> bucket_data_num(segment->thread_num_, 0);
std::vector<size_t> bucket_data_num(segment->thread_num_, 0);
auto bucket_size = buckets.size();
if (segment->input_size_ < 1) {
return;
}
for (IndexType i = 0; i < segment->input_size_; ++i) {
for (size_t i = 0; i < segment->input_size_; ++i) {
DataType data = segment->input_[i];
auto bucket_id = BucketId(data, segment->thread_num_);
auto bucket_index = bucket_data_num[bucket_id];
@@ -160,7 +161,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
continue;
}
bucket->input_[bucket_index] = data;
bucket->workspace_idx_[bucket_index] = segment_offset + i;
bucket->workspace_idx_[bucket_index] = static_cast<IndexType>(segment_offset + i);
bucket_data_num[bucket_id]++;
}
MS_LOG(DEBUG) << "End";
@@ -169,7 +170,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
template <typename DataType, typename IndexType>
static void GatherSegmentsToBuckets(const std::shared_ptr<UniqueParam<DataType, IndexType>> &params,
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *segments_ptr,
std::vector<std::shared_ptr<std::vector<IndexType>>> *segment_bucket_sizes_ptr,
std::vector<std::shared_ptr<std::vector<size_t>>> *segment_bucket_sizes_ptr,
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *buckets_ptr) {
MS_LOG(DEBUG) << "Start";
MS_EXCEPTION_IF_NULL(params);
@@ -186,14 +187,14 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
auto &buckets = *buckets_ptr;
auto thread_num = segments.size();
buckets.reserve(thread_num);
std::vector<IndexType> bucket_data_size(thread_num, 0);
std::vector<size_t> bucket_data_size(thread_num, 0);
for (size_t i = 0; i < thread_num; ++i) {
for (size_t j = 0; j < thread_num; ++j) {
bucket_data_size[j] += segment_bucket_sizes[i]->at(j);
}
}

IndexType current_offset = 0;
size_t current_offset = 0;
for (size_t i = 0; i < thread_num; ++i) {
auto bucket = std::make_shared<UniqueParam<DataType, IndexType>>();
bucket->input_ = params->output_ + current_offset;
@@ -205,7 +206,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
current_offset += bucket_data_size[i];
(void)buckets.emplace_back(bucket);
}
std::vector<IndexType> tmp_bucket_data_size(thread_num, 0);
std::vector<size_t> tmp_bucket_data_size(thread_num, 0);
std::vector<std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>>> thread_buckets;
for (size_t i = 0; i < thread_num; ++i) {
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> local_buckets;
@@ -251,14 +252,14 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
if (params->input_size_ < 1) {
return;
}
if (params->need_sort_) {
for (IndexType i = 0; i < params->input_size_; ++i) {
input_idx[i] = i;
if (params->need_sort_ && !std::is_same<DataType, float>::value) {
for (size_t i = 0; i < params->input_size_; ++i) {
input_idx[i] = static_cast<IndexType>(i);
}
std::sort(input_idx, input_idx + params->input_size_,
[&](IndexType left, IndexType right) { return input[left] < input[right]; });
[&](size_t left, size_t right) { return input[left] < input[right]; });
DataType last = input[0];
for (IndexType i = 0; i < params->input_size_; ++i) {
for (size_t i = 0; i < params->input_size_; ++i) {
auto curr = input[input_idx[i]];
if (i == 0 || curr != last) {
if (i != 0) {
@@ -271,11 +272,11 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
inverse_idx[input_idx[i]] = j;
}
}
params->output_size_ = j + 1;
params->output_size_ = static_cast<size_t>(j + 1);
} else {
std::unordered_map<DataType, IndexType> uniq;
uniq.reserve(params->input_size_);
for (IndexType i = 0; i < params->input_size_; ++i) {
for (size_t i = 0; i < params->input_size_; ++i) {
auto it = uniq.emplace(input[i], j);
inverse_idx[i] = it.first->second;
if (it.second) {
@@ -285,7 +286,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
for (const auto &it : uniq) {
output[it.second] = it.first;
}
params->output_size_ = j;
params->output_size_ = static_cast<size_t>(j);
}
MS_LOG(DEBUG) << "End";
}
@@ -310,7 +311,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
template <typename DataType, typename IndexType>
static void TransformBucketReverseIndices(const std::shared_ptr<UniqueParam<DataType, IndexType>> &bucket,
const std::shared_ptr<UniqueParam<DataType, IndexType>> &result,
IndexType offset) {
size_t offset) {
MS_EXCEPTION_IF_NULL(bucket);
MS_EXCEPTION_IF_NULL(bucket->inverse_idx_);
MS_EXCEPTION_IF_NULL(bucket->workspace_idx_);
@@ -319,10 +320,14 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
if (bucket->input_size_ < 1) {
return;
}
for (IndexType i = 0; i < bucket->input_size_; ++i) {
for (size_t i = 0; i < bucket->input_size_; ++i) {
auto origin_idx = bucket->workspace_idx_[i];
if (origin_idx >= 0 && origin_idx < result->input_size_) {
result->inverse_idx_[origin_idx] = bucket->inverse_idx_[i] + offset;
if (origin_idx < 0) {
continue;
}
size_t index = static_cast<size_t>(origin_idx);
if (index < result->input_size_) {
result->inverse_idx_[index] = bucket->inverse_idx_[i] + offset;
}
}
}
@@ -334,8 +339,8 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
MS_EXCEPTION_IF_NULL(result);
MS_EXCEPTION_IF_NULL(result->output_);
size_t thread_num = buckets.size();
std::vector<IndexType> bucket_offsets(thread_num);
IndexType current_size = 0;
std::vector<size_t> bucket_offsets(thread_num);
size_t current_size = 0;
for (size_t i = 0; i < thread_num; ++i) {
auto bucket = buckets[i];
MS_EXCEPTION_IF_NULL(bucket);
@@ -368,7 +373,7 @@ class UniqueCpuKernelMod : public NativeCpuKernelMod {
MS_EXCEPTION_IF_NULL(params);
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> segments;
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> buckets;
std::vector<std::shared_ptr<std::vector<IndexType>>> segment_bucket_sizes;
std::vector<std::shared_ptr<std::vector<size_t>>> segment_bucket_sizes;
SplitAndCalculateBucketSize(params, &segments, &segment_bucket_sizes);
GatherSegmentsToBuckets(params, &segments, &segment_bucket_sizes, &buckets);
UniqueEachBucket(buckets);


+ 1
- 1
mindspore/ccsrc/runtime/device/kernel_runtime.cc View File

@@ -1483,7 +1483,7 @@ bool KernelRuntime::LaunchKernel(const session::KernelGraph &graph, const AnfNod
auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
MS_EXCEPTION_IF_NULL(kernel_mod);
KernelLaunchInfo kernel_launch_info;
auto stream = kernel_mod->GetStream();
auto stream = kernel_mod->stream();
if (stream == nullptr) {
if (AnfAlgo::IsCommunicationOp(kernel)) {
stream = communication_stream_;


+ 2
- 0
mindspore/ccsrc/runtime/device/memory_scheduler.h View File

@@ -27,6 +27,8 @@ namespace mindspore {
namespace device {
class MemHandler {
public:
MemHandler() = default;
virtual ~MemHandler() = default;
virtual size_t GetAvailableMemSize() = 0;
virtual void *MallocDevice(size_t mem_size) = 0;
virtual void FreeDevice(void *ptr) = 0;


+ 2
- 2
mindspore/core/utils/ms_exception.cc View File

@@ -18,12 +18,12 @@

namespace mindspore {
MsException &MsException::Instance() {
static MsException instance;
static MsException instance{};
return instance;
}

StaticAnalysisException &StaticAnalysisException::Instance() {
static StaticAnalysisException instance;
static StaticAnalysisException instance{};
return instance;
}
} // namespace mindspore

Loading…
Cancel
Save