| @@ -300,6 +300,8 @@ CNodePtr GraphSplitter::GenerateRecvNode(const AnfNodePtr &input, const AnfNodeP | |||
| MS_EXCEPTION_IF_NULL(peer); | |||
| std::vector<AnfNodePtr> recv_inputs = {NewValueNode(std::make_shared<Primitive>(kRpcRecvOpName))}; | |||
| CNodePtr recv_node = nullptr; | |||
| AbstractBasePtr recv_node_abs = nullptr; | |||
| if (IsPrimitiveCNode(input, prim::kPrimUpdateState)) { | |||
| ValuePtr monad_value = nullptr; | |||
| if (HasAbstractUMonad(input)) { | |||
| @@ -312,14 +314,25 @@ CNodePtr GraphSplitter::GenerateRecvNode(const AnfNodePtr &input, const AnfNodeP | |||
| auto monad_input = NewValueNode(monad_value); | |||
| monad_input->set_abstract(monad_value->ToAbstract()); | |||
| recv_inputs.push_back(monad_input); | |||
| recv_node_abs = input->abstract(); | |||
| } else { | |||
| auto mock_value = GenerateMockValueNode(true, input); | |||
| MS_EXCEPTION_IF_NULL(mock_value); | |||
| recv_inputs.push_back(mock_value); | |||
| if (input->isa<CNode>() && common::AnfAlgo::HasNodeAttr(kAttrUpdateParameter, input->cast<CNodePtr>()) && | |||
| common::AnfAlgo::HasNodeAttr(kAttrParameterInputIndex, input->cast<CNodePtr>())) { | |||
| int64_t parameter_index = common::AnfAlgo::GetNodeAttr<int64_t>(input, kAttrParameterInputIndex); | |||
| auto kernel_with_index = common::AnfAlgo::VisitKernel(input, LongToUlong(parameter_index)); | |||
| auto param_node = kernel_with_index.first; | |||
| recv_inputs.push_back(param_node); | |||
| recv_node_abs = param_node->abstract(); | |||
| } else { | |||
| auto mock_value = GenerateMockValueNode(true, input); | |||
| MS_EXCEPTION_IF_NULL(mock_value); | |||
| recv_inputs.push_back(mock_value); | |||
| recv_node_abs = input->abstract(); | |||
| } | |||
| } | |||
| CNodePtr recv_node = func_graph_->NewCNode(recv_inputs); | |||
| recv_node = func_graph_->NewCNode(recv_inputs); | |||
| MS_EXCEPTION_IF_NULL(recv_node); | |||
| recv_node->set_abstract(input->abstract()); | |||
| recv_node->set_abstract(recv_node_abs); | |||
| // The label should be the same as the node which Receives the 'input'. | |||
| node_labels_[recv_node] = node_labels_[peer]; | |||
| @@ -81,6 +81,9 @@ struct InterProcessOpEdge { | |||
| using InterProcessOpPair = std::tuple<CNodePtr, CNodePtr, CNodePtr, int>; | |||
| using InterProcessOpEdgesInfo = std::map<InterProcessOpEdge, InterProcessOpPair>; | |||
| constexpr char kAttrUpdateParameter[] = "update_parameter"; | |||
| constexpr char kAttrParameterInputIndex[] = "parameter_input_index"; | |||
| // The class is used as an action in pipeline. It will process the graph and split the nodes to each process in the | |||
| // cluster. | |||
| class GraphSplitter { | |||
| @@ -784,6 +784,10 @@ const std::set<std::string> DynamicShapeConstInputToAttrGPU = { | |||
| kCastOpName, kExpandDimsOpName, kReshapeOpName, kEmbeddingLookupOpName, kTransposeOpName, kReduceSumOpName, | |||
| kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName}; | |||
| // The map between kernel's output and input ref relationship. | |||
| // Key is the output index while the value is input index which will be used as the reference of output. | |||
| using OutputInputRefMap = std::map<size_t, size_t>; | |||
| static inline void ChangeFileMode(const std::string &file_name, mode_t mode) { | |||
| if (access(file_name.c_str(), F_OK) == -1) { | |||
| return; | |||
| @@ -54,6 +54,11 @@ class KernelAttr { | |||
| return *this; | |||
| } | |||
| KernelAttr &AddOutInRef(size_t output_index, size_t input_index) { | |||
| out_in_ref_map_[output_index] = input_index; | |||
| return *this; | |||
| } | |||
| const DataType &GetInputAttr(const size_t index) const { return input_type_[index]; } | |||
| const DataType &GetOutputAttr(const size_t index) const { return output_type_[index]; } | |||
| bool GetAllSame() const { return all_same_; } | |||
| @@ -63,10 +68,13 @@ class KernelAttr { | |||
| size_t GetInputSize() const { return input_type_.size(); } | |||
| size_t GetOutputSize() const { return output_type_.size(); } | |||
| const OutputInputRefMap &GetOutInRefMap() const { return out_in_ref_map_; } | |||
| private: | |||
| std::vector<DataType> input_type_; | |||
| std::vector<DataType> output_type_; | |||
| // The map between kernel's output and input ref relationship. | |||
| OutputInputRefMap out_in_ref_map_; | |||
| bool all_same_; | |||
| }; | |||
| } // namespace cpu | |||
| @@ -48,6 +48,7 @@ std::shared_ptr<NativeCpuKernelMod> NativeCpuKernelModFactory::Create(const std: | |||
| MS_EXCEPTION_IF_NULL(kernel_build_Info); | |||
| std::pair<bool, size_t> ret_pair = CPUKernelAttrCheck(kernel_name, *kernel_build_Info); | |||
| if (ret_pair.first) { | |||
| SetRefMapToKernelInfo(kernel_name, ret_pair.second, kernel_info); | |||
| return (name_to_attr_creator_.find(kernel_name)->second)[ret_pair.second].second(); | |||
| } | |||
| return nullptr; | |||
| @@ -163,6 +164,14 @@ bool NativeCpuKernelModFactory::CPUKernelSingleAttrCheck(const KernelAttr &kerne | |||
| return true; | |||
| } | |||
| void NativeCpuKernelModFactory::SetRefMapToKernelInfo(const std::string &kernel_name, size_t index, | |||
| device::KernelInfo *kernel_info) { | |||
| const auto &kernel_attr = (name_to_attr_creator_.find(kernel_name)->second)[index].first; | |||
| if (!kernel_attr.GetOutInRefMap().empty()) { | |||
| kernel_info->set_ref_map(kernel_attr.GetOutInRefMap()); | |||
| } | |||
| } | |||
| std::vector<KernelAttr> NativeCpuKernelModFactory::GetSupportedKernelAttrList(const std::string &kernel_name) { | |||
| std::vector<KernelAttr> result; | |||
| auto iter = name_to_attr_creator_.find(kernel_name); | |||
| @@ -51,6 +51,10 @@ class NativeCpuKernelModFactory { | |||
| DISABLE_COPY_AND_ASSIGN(NativeCpuKernelModFactory) | |||
| std::pair<bool, size_t> CPUKernelAttrCheck(const std::string &kernel_name, const KernelBuildInfo &kernel_info); | |||
| bool CPUKernelSingleAttrCheck(const KernelAttr &kernel_attr, const KernelBuildInfo &kernel_info) const; | |||
| // Set output and input ref map to kernel info which will be used by graph compiler. | |||
| void SetRefMapToKernelInfo(const std::string &kernel_name, size_t index, device::KernelInfo *kernel_info); | |||
| std::map<std::string, std::vector<std::pair<KernelAttr, NativeCpuKernelModCreator>>> name_to_attr_creator_; | |||
| }; | |||
| @@ -18,8 +18,12 @@ | |||
| namespace mindspore { | |||
| namespace kernel { | |||
| MS_REG_CPU_KERNEL_T( | |||
| RpcRecv, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32).SetAllSameAttr(true), | |||
| RpcRecvKernelMod, float); | |||
| MS_REG_CPU_KERNEL_T(RpcRecv, | |||
| KernelAttr() | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .SetAllSameAttr(true) | |||
| .AddOutInRef(0, 0), | |||
| RpcRecvKernelMod, float); | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -109,16 +109,25 @@ class KernelAttr { | |||
| return *this; | |||
| } | |||
| KernelAttr &AddOutInRef(size_t output_index, size_t input_index) { | |||
| out_in_ref_map_[output_index] = input_index; | |||
| return *this; | |||
| } | |||
| const DataType &GetInputAttr(const size_t index) const { return input_type_[index]; } | |||
| const DataType &GetOutputAttr(const size_t index) const { return output_type_[index]; } | |||
| const bool &GetAllSame() const { return all_same_; } | |||
| size_t GetInputSize() const { return input_type_.size(); } | |||
| size_t GetOutputSize() const { return output_type_.size(); } | |||
| const OutputInputRefMap &GetOutInRefMap() const { return out_in_ref_map_; } | |||
| private: | |||
| std::vector<DataType> input_type_; | |||
| std::vector<DataType> output_type_; | |||
| // The map between kernel's output and input ref relationship. | |||
| OutputInputRefMap out_in_ref_map_; | |||
| bool all_same_; | |||
| }; | |||
| } // namespace gpu | |||
| @@ -120,6 +120,14 @@ void NativeGpuKernelModFactory::CheckSM(const KernelBuildInfo *kernel_info, cons | |||
| } | |||
| } | |||
| void NativeGpuKernelModFactory::SetRefMapToKernelInfo(const std::string &kernel_name, size_t index, | |||
| device::KernelInfo *kernel_info) { | |||
| const auto &kernel_attr = (map_kernel_name_to_creater_.find(kernel_name)->second)[index].first; | |||
| if (!kernel_attr.GetOutInRefMap().empty()) { | |||
| kernel_info->set_ref_map(kernel_attr.GetOutInRefMap()); | |||
| } | |||
| } | |||
| std::pair<bool, size_t> NativeGpuKernelModFactory::GpuKernelAttrCheck(const std::string &kernel_name, | |||
| const KernelBuildInfo *kernel_info) { | |||
| auto iter = map_kernel_name_to_creater_.find(kernel_name); | |||
| @@ -181,6 +189,7 @@ NativeGpuKernelMod *NativeGpuKernelModFactory::Create(const std::string &kernel_ | |||
| MS_EXCEPTION_IF_NULL(kernel_build_Info); | |||
| std::pair<bool, size_t> ret_pair = GpuKernelAttrCheck(kernel_name, kernel_build_Info); | |||
| if (ret_pair.first) { | |||
| SetRefMapToKernelInfo(kernel_name, ret_pair.second, kernel_info); | |||
| return (map_kernel_name_to_creater_.find(kernel_name)->second)[ret_pair.second].second(); | |||
| } | |||
| return nullptr; | |||
| @@ -60,6 +60,10 @@ class NativeGpuKernelModFactory { | |||
| void CheckSM(const KernelBuildInfo *kernel_info, const size_t &input_index); | |||
| bool CheckIOParam(const std::string &kernel_name, const KernelBuildInfo *kernel_info, | |||
| std::vector<std::pair<KernelAttr, NativeGpuKernelModCreater>> *iter_second, size_t attr_index); | |||
| // Set output and input ref map to kernel info which will be used by graph compiler. | |||
| void SetRefMapToKernelInfo(const std::string &kernel_name, size_t index, device::KernelInfo *kernel_info); | |||
| // map to maintain kernel and creator, KernelAttr object and creator must be registered as a pair. | |||
| std::map<std::string, std::vector<std::pair<KernelAttr, NativeGpuKernelModCreater>>> map_kernel_name_to_creater_; | |||
| }; | |||
| @@ -25,7 +25,8 @@ MS_REG_GPU_KERNEL_THREE(ApplyMomentum, | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutInRef(0, 0), | |||
| MomentumGpuKernelMod, float, float, float) | |||
| MS_REG_GPU_KERNEL_THREE(ApplyMomentum, | |||
| KernelAttr() | |||
| @@ -34,7 +35,8 @@ MS_REG_GPU_KERNEL_THREE(ApplyMomentum, | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddOutputAttr(kNumberTypeFloat16), | |||
| .AddOutputAttr(kNumberTypeFloat16) | |||
| .AddOutInRef(0, 0), | |||
| MomentumGpuKernelMod, half, half, half) | |||
| MS_REG_GPU_KERNEL_THREE(ApplyMomentum, | |||
| KernelAttr() | |||
| @@ -43,7 +45,8 @@ MS_REG_GPU_KERNEL_THREE(ApplyMomentum, | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat16), | |||
| .AddOutputAttr(kNumberTypeFloat16) | |||
| .AddOutInRef(0, 0), | |||
| MomentumGpuKernelMod, half, float, half) | |||
| MS_REG_GPU_KERNEL_THREE(ApplyMomentum, | |||
| KernelAttr() | |||
| @@ -52,7 +55,8 @@ MS_REG_GPU_KERNEL_THREE(ApplyMomentum, | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddInputAttr(kNumberTypeFloat16) | |||
| .AddInputAttr(kNumberTypeFloat32) | |||
| .AddOutputAttr(kNumberTypeFloat32), | |||
| .AddOutputAttr(kNumberTypeFloat32) | |||
| .AddOutInRef(0, 0), | |||
| MomentumGpuKernelMod, float, float, half) | |||
| } // namespace kernel | |||
| } // namespace mindspore | |||
| @@ -99,7 +99,7 @@ class MomentumGpuKernelMod : public NativeGpuKernelMod { | |||
| input_size_list_.push_back(learning_rate_size_); | |||
| input_size_list_.push_back(gradient_size_); | |||
| input_size_list_.push_back(momentum_size_); | |||
| output_size_list_.push_back(0); | |||
| output_size_list_.push_back(variable_size_); | |||
| } | |||
| private: | |||
| @@ -73,6 +73,9 @@ class KernelInfo : public KernelInfoDevice { | |||
| const std::vector<std::shared_ptr<DeviceAddress>> &output_address_list() const { return output_address_list_; } | |||
| const std::vector<std::shared_ptr<DeviceAddress>> &workspace_address_list() const { return workspace_address_list_; } | |||
| void set_ref_map(const OutputInputRefMap &ref_map) { out_in_ref_map_ = ref_map; } | |||
| const OutputInputRefMap &out_in_ref_map() const { return out_in_ref_map_; } | |||
| private: | |||
| bool is_feature_map_; | |||
| kernel::KernelBuildInfoPtr select_kernel_build_info_; | |||
| @@ -85,6 +88,8 @@ class KernelInfo : public KernelInfoDevice { | |||
| uint32_t stream_distinction_label_; | |||
| // record which graph the node belong to | |||
| uint32_t graph_id_; | |||
| // The map between kernel's output and input ref relationship. | |||
| OutputInputRefMap out_in_ref_map_; | |||
| }; | |||
| } // namespace device | |||
| } // namespace mindspore | |||
| @@ -469,6 +469,8 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic | |||
| // Execute optimization pass. | |||
| device_context->OptimizeGraph(graph); | |||
| AddOutInRefToGraph(graph); | |||
| // Generate 'KernelMod' for all kernels and set 'KernelMod' into kernel, | |||
| // 'KernelMod' is real executive object of kernel. | |||
| device_context->CreateKernel(graph->execution_order()); | |||
| @@ -586,6 +588,25 @@ KernelGraphPtr GraphCompiler::Fetch(const GraphInfo &graph_info) const { | |||
| return iter->second; | |||
| } | |||
| void GraphCompiler::AddOutInRefToGraph(const KernelGraphPtr &graph) const { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| for (const auto &cnode : graph->execution_order()) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| auto kernel_info = dynamic_cast<device::KernelInfo *>(cnode->kernel_info()); | |||
| MS_EXCEPTION_IF_NULL(kernel_info); | |||
| for (const auto &ref : kernel_info->out_in_ref_map()) { | |||
| size_t output_index = ref.first; | |||
| size_t input_index = ref.second; | |||
| auto final_pair = std::make_pair(cnode, output_index); | |||
| auto origin_pair = common::AnfAlgo::VisitKernel(common::AnfAlgo::GetInputNode(cnode, input_index), 0); | |||
| MS_LOG(INFO) << "The reference relation output " << final_pair.first->fullname_with_scope() | |||
| << ", output index: " << final_pair.second << " to input " | |||
| << origin_pair.first->fullname_with_scope() << ", output index: " << origin_pair.second; | |||
| graph->AddRefCorrespondPairs(final_pair, origin_pair); | |||
| } | |||
| } | |||
| } | |||
| void GraphCompiler::CreateDeviceAddress(const KernelGraphPtr &graph, const DeviceContext *device_context, | |||
| bool is_gradient_out) const { | |||
| MS_LOG(INFO) << "Status record: start create device address. graph id: " << graph->graph_id(); | |||
| @@ -189,6 +189,9 @@ class GraphCompiler { | |||
| // setting operator info, creating kernel and transforming kernel graph to ActorSet. | |||
| GraphId CompileGraphImpl(const KernelGraphPtr &graph, const DeviceContext *device_context) const; | |||
| // Add operators' output and input reference map to the graph. | |||
| void AddOutInRefToGraph(const KernelGraphPtr &graph) const; | |||
| // Create device address for all anf nodes of graph. | |||
| void CreateDeviceAddress(const KernelGraphPtr &graph, const DeviceContext *device_context, | |||
| bool is_gradient_out) const; | |||