Browse Source

Support reference for gpu and cpu for parameter update

feature/build-system-rewrite
ZPaC 4 years ago
parent
commit
04ee8f4dd9
15 changed files with 113 additions and 13 deletions
  1. +18
    -5
      mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc
  2. +3
    -0
      mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.h
  3. +4
    -0
      mindspore/ccsrc/include/common/utils/utils.h
  4. +8
    -0
      mindspore/ccsrc/plugin/device/cpu/hal/device/kernel_select_cpu.h
  5. +9
    -0
      mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel_factory.cc
  6. +4
    -0
      mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel_factory.h
  7. +7
    -3
      mindspore/ccsrc/plugin/device/cpu/kernel/rpc/rpc_recv_kernel.cc
  8. +9
    -0
      mindspore/ccsrc/plugin/device/gpu/hal/device/kernel_info_setter.h
  9. +9
    -0
      mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel_factory.cc
  10. +4
    -0
      mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel_factory.h
  11. +8
    -4
      mindspore/ccsrc/plugin/device/gpu/kernel/nn/momentum_gpu_kernel.cc
  12. +1
    -1
      mindspore/ccsrc/plugin/device/gpu/kernel/nn/momentum_gpu_kernel.h
  13. +5
    -0
      mindspore/ccsrc/runtime/device/kernel_info.h
  14. +21
    -0
      mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc
  15. +3
    -0
      mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.h

+ 18
- 5
mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.cc View File

@@ -300,6 +300,8 @@ CNodePtr GraphSplitter::GenerateRecvNode(const AnfNodePtr &input, const AnfNodeP
MS_EXCEPTION_IF_NULL(peer);

std::vector<AnfNodePtr> recv_inputs = {NewValueNode(std::make_shared<Primitive>(kRpcRecvOpName))};
CNodePtr recv_node = nullptr;
AbstractBasePtr recv_node_abs = nullptr;
if (IsPrimitiveCNode(input, prim::kPrimUpdateState)) {
ValuePtr monad_value = nullptr;
if (HasAbstractUMonad(input)) {
@@ -312,14 +314,25 @@ CNodePtr GraphSplitter::GenerateRecvNode(const AnfNodePtr &input, const AnfNodeP
auto monad_input = NewValueNode(monad_value);
monad_input->set_abstract(monad_value->ToAbstract());
recv_inputs.push_back(monad_input);
recv_node_abs = input->abstract();
} else {
auto mock_value = GenerateMockValueNode(true, input);
MS_EXCEPTION_IF_NULL(mock_value);
recv_inputs.push_back(mock_value);
if (input->isa<CNode>() && common::AnfAlgo::HasNodeAttr(kAttrUpdateParameter, input->cast<CNodePtr>()) &&
common::AnfAlgo::HasNodeAttr(kAttrParameterInputIndex, input->cast<CNodePtr>())) {
int64_t parameter_index = common::AnfAlgo::GetNodeAttr<int64_t>(input, kAttrParameterInputIndex);
auto kernel_with_index = common::AnfAlgo::VisitKernel(input, LongToUlong(parameter_index));
auto param_node = kernel_with_index.first;
recv_inputs.push_back(param_node);
recv_node_abs = param_node->abstract();
} else {
auto mock_value = GenerateMockValueNode(true, input);
MS_EXCEPTION_IF_NULL(mock_value);
recv_inputs.push_back(mock_value);
recv_node_abs = input->abstract();
}
}
CNodePtr recv_node = func_graph_->NewCNode(recv_inputs);
recv_node = func_graph_->NewCNode(recv_inputs);
MS_EXCEPTION_IF_NULL(recv_node);
recv_node->set_abstract(input->abstract());
recv_node->set_abstract(recv_node_abs);

// The label should be the same as the node which Receives the 'input'.
node_labels_[recv_node] = node_labels_[peer];


+ 3
- 0
mindspore/ccsrc/frontend/parallel/graph_util/graph_splitter.h View File

@@ -81,6 +81,9 @@ struct InterProcessOpEdge {
using InterProcessOpPair = std::tuple<CNodePtr, CNodePtr, CNodePtr, int>;
using InterProcessOpEdgesInfo = std::map<InterProcessOpEdge, InterProcessOpPair>;

constexpr char kAttrUpdateParameter[] = "update_parameter";
constexpr char kAttrParameterInputIndex[] = "parameter_input_index";

// The class is used as an action in pipeline. It will process the graph and split the nodes to each process in the
// cluster.
class GraphSplitter {


+ 4
- 0
mindspore/ccsrc/include/common/utils/utils.h View File

@@ -784,6 +784,10 @@ const std::set<std::string> DynamicShapeConstInputToAttrGPU = {
kCastOpName, kExpandDimsOpName, kReshapeOpName, kEmbeddingLookupOpName, kTransposeOpName, kReduceSumOpName,
kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName};

// The map between kernel's output and input ref relationship.
// Key is the output index while the value is input index which will be used as the reference of output.
using OutputInputRefMap = std::map<size_t, size_t>;

static inline void ChangeFileMode(const std::string &file_name, mode_t mode) {
if (access(file_name.c_str(), F_OK) == -1) {
return;


+ 8
- 0
mindspore/ccsrc/plugin/device/cpu/hal/device/kernel_select_cpu.h View File

@@ -54,6 +54,11 @@ class KernelAttr {
return *this;
}

KernelAttr &AddOutInRef(size_t output_index, size_t input_index) {
out_in_ref_map_[output_index] = input_index;
return *this;
}

const DataType &GetInputAttr(const size_t index) const { return input_type_[index]; }
const DataType &GetOutputAttr(const size_t index) const { return output_type_[index]; }
bool GetAllSame() const { return all_same_; }
@@ -63,10 +68,13 @@ class KernelAttr {

size_t GetInputSize() const { return input_type_.size(); }
size_t GetOutputSize() const { return output_type_.size(); }
const OutputInputRefMap &GetOutInRefMap() const { return out_in_ref_map_; }

private:
std::vector<DataType> input_type_;
std::vector<DataType> output_type_;
// The map between kernel's output and input ref relationship.
OutputInputRefMap out_in_ref_map_;
bool all_same_;
};
} // namespace cpu


+ 9
- 0
mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel_factory.cc View File

@@ -48,6 +48,7 @@ std::shared_ptr<NativeCpuKernelMod> NativeCpuKernelModFactory::Create(const std:
MS_EXCEPTION_IF_NULL(kernel_build_Info);
std::pair<bool, size_t> ret_pair = CPUKernelAttrCheck(kernel_name, *kernel_build_Info);
if (ret_pair.first) {
SetRefMapToKernelInfo(kernel_name, ret_pair.second, kernel_info);
return (name_to_attr_creator_.find(kernel_name)->second)[ret_pair.second].second();
}
return nullptr;
@@ -163,6 +164,14 @@ bool NativeCpuKernelModFactory::CPUKernelSingleAttrCheck(const KernelAttr &kerne
return true;
}

void NativeCpuKernelModFactory::SetRefMapToKernelInfo(const std::string &kernel_name, size_t index,
device::KernelInfo *kernel_info) {
const auto &kernel_attr = (name_to_attr_creator_.find(kernel_name)->second)[index].first;
if (!kernel_attr.GetOutInRefMap().empty()) {
kernel_info->set_ref_map(kernel_attr.GetOutInRefMap());
}
}

std::vector<KernelAttr> NativeCpuKernelModFactory::GetSupportedKernelAttrList(const std::string &kernel_name) {
std::vector<KernelAttr> result;
auto iter = name_to_attr_creator_.find(kernel_name);


+ 4
- 0
mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel_factory.h View File

@@ -51,6 +51,10 @@ class NativeCpuKernelModFactory {
DISABLE_COPY_AND_ASSIGN(NativeCpuKernelModFactory)
std::pair<bool, size_t> CPUKernelAttrCheck(const std::string &kernel_name, const KernelBuildInfo &kernel_info);
bool CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info) const;

// Set output and input ref map to kernel info which will be used by graph compiler.
void SetRefMapToKernelInfo(const std::string &kernel_name, size_t index, device::KernelInfo *kernel_info);

std::map<std::string, std::vector<std::pair<KernelAttr, NativeCpuKernelModCreator>>> name_to_attr_creator_;
};



+ 7
- 3
mindspore/ccsrc/plugin/device/cpu/kernel/rpc/rpc_recv_kernel.cc View File

@@ -18,8 +18,12 @@

namespace mindspore {
namespace kernel {
MS_REG_CPU_KERNEL_T(
RpcRecv, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32).SetAllSameAttr(true),
RpcRecvKernelMod, float);
MS_REG_CPU_KERNEL_T(RpcRecv,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.SetAllSameAttr(true)
.AddOutInRef(0, 0),
RpcRecvKernelMod, float);
} // namespace kernel
} // namespace mindspore

+ 9
- 0
mindspore/ccsrc/plugin/device/gpu/hal/device/kernel_info_setter.h View File

@@ -109,16 +109,25 @@ class KernelAttr {
return *this;
}

KernelAttr &AddOutInRef(size_t output_index, size_t input_index) {
out_in_ref_map_[output_index] = input_index;
return *this;
}

const DataType &GetInputAttr(const size_t index) const { return input_type_[index]; }
const DataType &GetOutputAttr(const size_t index) const { return output_type_[index]; }
const bool &GetAllSame() const { return all_same_; }

size_t GetInputSize() const { return input_type_.size(); }
size_t GetOutputSize() const { return output_type_.size(); }
const OutputInputRefMap &GetOutInRefMap() const { return out_in_ref_map_; }

private:
std::vector<DataType> input_type_;
std::vector<DataType> output_type_;

// The map between kernel's output and input ref relationship.
OutputInputRefMap out_in_ref_map_;
bool all_same_;
};
} // namespace gpu


+ 9
- 0
mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel_factory.cc View File

@@ -120,6 +120,14 @@ void NativeGpuKernelModFactory::CheckSM(const KernelBuildInfo *kernel_info, cons
}
}

void NativeGpuKernelModFactory::SetRefMapToKernelInfo(const std::string &kernel_name, size_t index,
device::KernelInfo *kernel_info) {
const auto &kernel_attr = (map_kernel_name_to_creater_.find(kernel_name)->second)[index].first;
if (!kernel_attr.GetOutInRefMap().empty()) {
kernel_info->set_ref_map(kernel_attr.GetOutInRefMap());
}
}

std::pair<bool, size_t> NativeGpuKernelModFactory::GpuKernelAttrCheck(const std::string &kernel_name,
const KernelBuildInfo *kernel_info) {
auto iter = map_kernel_name_to_creater_.find(kernel_name);
@@ -181,6 +189,7 @@ NativeGpuKernelMod *NativeGpuKernelModFactory::Create(const std::string &kernel_
MS_EXCEPTION_IF_NULL(kernel_build_Info);
std::pair<bool, size_t> ret_pair = GpuKernelAttrCheck(kernel_name, kernel_build_Info);
if (ret_pair.first) {
SetRefMapToKernelInfo(kernel_name, ret_pair.second, kernel_info);
return (map_kernel_name_to_creater_.find(kernel_name)->second)[ret_pair.second].second();
}
return nullptr;


+ 4
- 0
mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel_factory.h View File

@@ -60,6 +60,10 @@ class NativeGpuKernelModFactory {
void CheckSM(const KernelBuildInfo *kernel_info, const size_t &input_index);
bool CheckIOParam(const std::string &kernel_name, const KernelBuildInfo *kernel_info,
std::vector<std::pair<KernelAttr, NativeGpuKernelModCreater>> *iter_second, size_t attr_index);

// Set output and input ref map to kernel info which will be used by graph compiler.
void SetRefMapToKernelInfo(const std::string &kernel_name, size_t index, device::KernelInfo *kernel_info);

// map to maintain kernel and creator, KernelAttr object and creator must be registered as a pair.
std::map<std::string, std::vector<std::pair<KernelAttr, NativeGpuKernelModCreater>>> map_kernel_name_to_creater_;
};


+ 8
- 4
mindspore/ccsrc/plugin/device/gpu/kernel/nn/momentum_gpu_kernel.cc View File

@@ -25,7 +25,8 @@ MS_REG_GPU_KERNEL_THREE(ApplyMomentum,
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
.AddOutputAttr(kNumberTypeFloat32)
.AddOutInRef(0, 0),
MomentumGpuKernelMod, float, float, float)
MS_REG_GPU_KERNEL_THREE(ApplyMomentum,
KernelAttr()
@@ -34,7 +35,8 @@ MS_REG_GPU_KERNEL_THREE(ApplyMomentum,
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
.AddOutputAttr(kNumberTypeFloat16)
.AddOutInRef(0, 0),
MomentumGpuKernelMod, half, half, half)
MS_REG_GPU_KERNEL_THREE(ApplyMomentum,
KernelAttr()
@@ -43,7 +45,8 @@ MS_REG_GPU_KERNEL_THREE(ApplyMomentum,
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat16),
.AddOutputAttr(kNumberTypeFloat16)
.AddOutInRef(0, 0),
MomentumGpuKernelMod, half, float, half)
MS_REG_GPU_KERNEL_THREE(ApplyMomentum,
KernelAttr()
@@ -52,7 +55,8 @@ MS_REG_GPU_KERNEL_THREE(ApplyMomentum,
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
.AddOutputAttr(kNumberTypeFloat32)
.AddOutInRef(0, 0),
MomentumGpuKernelMod, float, float, half)
} // namespace kernel
} // namespace mindspore

+ 1
- 1
mindspore/ccsrc/plugin/device/gpu/kernel/nn/momentum_gpu_kernel.h View File

@@ -99,7 +99,7 @@ class MomentumGpuKernelMod : public NativeGpuKernelMod {
input_size_list_.push_back(learning_rate_size_);
input_size_list_.push_back(gradient_size_);
input_size_list_.push_back(momentum_size_);
output_size_list_.push_back(0);
output_size_list_.push_back(variable_size_);
}

private:


+ 5
- 0
mindspore/ccsrc/runtime/device/kernel_info.h View File

@@ -73,6 +73,9 @@ class KernelInfo : public KernelInfoDevice {
const std::vector<std::shared_ptr<DeviceAddress>> &output_address_list() const { return output_address_list_; }
const std::vector<std::shared_ptr<DeviceAddress>> &workspace_address_list() const { return workspace_address_list_; }

void set_ref_map(const OutputInputRefMap &ref_map) { out_in_ref_map_ = ref_map; }
const OutputInputRefMap &out_in_ref_map() const { return out_in_ref_map_; }

private:
bool is_feature_map_;
kernel::KernelBuildInfoPtr select_kernel_build_info_;
@@ -85,6 +88,8 @@ class KernelInfo : public KernelInfoDevice {
uint32_t stream_distinction_label_;
// record which graph the node belong to
uint32_t graph_id_;
// The map between kernel's output and input ref relationship.
OutputInputRefMap out_in_ref_map_;
};
} // namespace device
} // namespace mindspore


+ 21
- 0
mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.cc View File

@@ -469,6 +469,8 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic
// Execute optimization pass.
device_context->OptimizeGraph(graph);

AddOutInRefToGraph(graph);

// Generate 'KernelMod' for all kernels and set 'KernelMod' into kernel,
// 'KernelMod' is real executive object of kernel.
device_context->CreateKernel(graph->execution_order());
@@ -586,6 +588,25 @@ KernelGraphPtr GraphCompiler::Fetch(const GraphInfo &graph_info) const {
return iter->second;
}

void GraphCompiler::AddOutInRefToGraph(const KernelGraphPtr &graph) const {
MS_EXCEPTION_IF_NULL(graph);
for (const auto &cnode : graph->execution_order()) {
MS_EXCEPTION_IF_NULL(cnode);
auto kernel_info = dynamic_cast<device::KernelInfo *>(cnode->kernel_info());
MS_EXCEPTION_IF_NULL(kernel_info);
for (const auto &ref : kernel_info->out_in_ref_map()) {
size_t output_index = ref.first;
size_t input_index = ref.second;
auto final_pair = std::make_pair(cnode, output_index);
auto origin_pair = common::AnfAlgo::VisitKernel(common::AnfAlgo::GetInputNode(cnode, input_index), 0);
MS_LOG(INFO) << "The reference relation output " << final_pair.first->fullname_with_scope()
<< ", output index: " << final_pair.second << " to input "
<< origin_pair.first->fullname_with_scope() << ", output index: " << origin_pair.second;
graph->AddRefCorrespondPairs(final_pair, origin_pair);
}
}
}

void GraphCompiler::CreateDeviceAddress(const KernelGraphPtr &graph, const DeviceContext *device_context,
bool is_gradient_out) const {
MS_LOG(INFO) << "Status record: start create device address. graph id: " << graph->graph_id();


+ 3
- 0
mindspore/ccsrc/runtime/graph_scheduler/graph_compiler.h View File

@@ -189,6 +189,9 @@ class GraphCompiler {
// setting operator info, creating kernel and transforming kernel graph to ActorSet.
GraphId CompileGraphImpl(const KernelGraphPtr &graph, const DeviceContext *device_context) const;

// Add operators' output and input reference map to the graph.
void AddOutInRefToGraph(const KernelGraphPtr &graph) const;

// Create device address for all anf nodes of graph.
void CreateDeviceAddress(const KernelGraphPtr &graph, const DeviceContext *device_context,
bool is_gradient_out) const;


Loading…
Cancel
Save