| @@ -19,15 +19,214 @@ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <vector> | |||
| #include <unordered_map> | |||
| #include "ir/func_graph.h" | |||
| #include "frontend/parallel/ops_info/operator_info.h" | |||
| #include "frontend/parallel/graph_util/graph_info.h" | |||
| #include "frontend/parallel/strategy.h" | |||
| #include "frontend/parallel/tensor_layout/tensor_layout.h" | |||
| #include "frontend/parallel/ops_info/ops_utils.h" | |||
| namespace mindspore { | |||
| namespace parallel { | |||
| namespace { | |||
| constexpr char INPUTS[] = "inputs"; | |||
| constexpr char ATTRS[] = "attrs"; | |||
| using FuncGraphNameMap = const std::unordered_map<FuncGraphPtr, std::string>; | |||
| static std::unordered_map<std::string, size_t> op_count; | |||
| static std::unordered_map<CNodePtr, std::string> name_map; | |||
| // Extract the op name and the topology number of the same node in the graph | |||
| // e.g, Default/Mul-op32 -> Mul-op0, Default/Mul-op35 -> Mul-op1 | |||
| std::string GetNodeNameWithCount(const CNodePtr &cnode) { | |||
| if (name_map.find(cnode) != name_map.end()) { | |||
| return name_map[cnode]; | |||
| } | |||
| std::string node_name; | |||
| auto is_call_fullname_with_scope = [](const CNodePtr &cnode) { | |||
| auto value_ptr = cnode->input(0)->cast<ValueNodePtr>(); | |||
| ValuePtr input_value = nullptr; | |||
| if (value_ptr != nullptr) { | |||
| input_value = value_ptr->value(); | |||
| } | |||
| if (input_value != nullptr && input_value->cast<PrimitivePtr>() == nullptr && | |||
| input_value->cast<FuncGraphPtr>() == nullptr) { | |||
| return false; | |||
| } | |||
| return true; | |||
| }; | |||
| if (is_call_fullname_with_scope(cnode)) { | |||
| auto node_name_with_scope = cnode->fullname_with_scope(); | |||
| size_t left = node_name_with_scope.rfind('/'); | |||
| size_t right = node_name_with_scope.find("-op"); | |||
| node_name = node_name_with_scope.substr(left + 1, right - left - 1); | |||
| } else { | |||
| node_name = cnode->ToString(); | |||
| } | |||
| std::ostringstream oss; | |||
| oss << node_name << '-' << op_count[node_name]; | |||
| name_map[cnode] = oss.str(); | |||
| ++op_count[node_name]; | |||
| return name_map[cnode]; | |||
| } | |||
| // Renames sub-graphs according to the topology order, e.g, @5_construct.395 -> @graph_0 | |||
| FuncGraphNameMap GetAllFuncGraphNameMap(const FuncGraphPtr &graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| auto anf_nodes = TopoSort(graph->get_return(), SuccDeeperSimple, AlwaysInclude); | |||
| std::unordered_map<FuncGraphPtr, std::string> graph_name_map; | |||
| size_t graph_count = 0; | |||
| for (const auto &anf_node : anf_nodes) { | |||
| auto belong_graph = anf_node->func_graph(); | |||
| if (belong_graph == nullptr) { | |||
| continue; | |||
| } | |||
| if (graph_name_map.find(belong_graph) == graph_name_map.end()) { | |||
| std::ostringstream oss; | |||
| oss << "@graph_" << graph_count++; | |||
| graph_name_map[belong_graph] = oss.str(); | |||
| oss.clear(); | |||
| } | |||
| } | |||
| return graph_name_map; | |||
| } | |||
| // Extract operator name from cnode | |||
| std::string GetCNodeOperatorNameWithCount(const CNodePtr &cnode, const FuncGraphNameMap &func_name_map) { | |||
| AnfNodePtr op = cnode->input(0); | |||
| MS_EXCEPTION_IF_NULL(op); | |||
| std::string op_name; | |||
| if (IsValueNode<FuncGraph>(op)) { | |||
| const FuncGraphPtr fg = GetValueNode<FuncGraphPtr>(op); | |||
| op_name = "call " + func_name_map.at(fg); | |||
| } else { | |||
| op_name = GetNodeNameWithCount(cnode); | |||
| name_map[cnode] = op_name; | |||
| } | |||
| return op_name; | |||
| } | |||
| py::int_ GetPyIntValueFromIntegerImm(const ValuePtr &value_node) { | |||
| MS_EXCEPTION_IF_NULL(value_node); | |||
| if (!value_node->isa<IntegerImm>()) { | |||
| MS_LOG(EXCEPTION) << "value_node is not IntegerImm"; | |||
| } | |||
| TypePtr data_type = value_node->type(); | |||
| MS_EXCEPTION_IF_NULL(data_type); | |||
| TypeId type_id = data_type->type_id(); | |||
| switch (type_id) { | |||
| case kNumberTypeInt8: | |||
| return py::int_(GetValue<int8_t>(value_node)); | |||
| case kNumberTypeInt16: | |||
| return py::int_(GetValue<int16_t>(value_node)); | |||
| case kNumberTypeInt32: | |||
| return py::int_(GetValue<int32_t>(value_node)); | |||
| case kNumberTypeInt64: | |||
| return py::int_(GetValue<int64_t>(value_node)); | |||
| case kNumberTypeUInt8: | |||
| return py::int_(GetValue<uint8_t>(value_node)); | |||
| case kNumberTypeUInt16: | |||
| return py::int_(GetValue<uint16_t>(value_node)); | |||
| case kNumberTypeUInt32: | |||
| return py::int_(GetValue<uint32_t>(value_node)); | |||
| case kNumberTypeUInt64: | |||
| return py::int_(GetValue<uint64_t>(value_node)); | |||
| default: | |||
| MS_LOG(EXCEPTION) << "The data type: " << data_type << " is invalid."; | |||
| } | |||
| } | |||
| // Extract the list of operand names from cnode | |||
| py::list GetCNodeOperandNameList(const CNodePtr &cnode, const FuncGraphNameMap &func_name_map) { | |||
| MS_EXCEPTION_IF_NULL(cnode); | |||
| py::list cnode_inputs_name_list; | |||
| auto cnode_inputs = cnode->inputs(); | |||
| // Skip cnode_inputs[0] which is Primitive value node | |||
| for (size_t i = 1; i < cnode_inputs.size(); ++i) { | |||
| const AnfNodePtr &input = cnode_inputs[i]; | |||
| MS_EXCEPTION_IF_NULL(input); | |||
| if (input->isa<Parameter>()) { | |||
| cnode_inputs_name_list.append(py::str(std::static_pointer_cast<Parameter>(input)->name())); | |||
| } else if (IsValueNode<FuncGraph>(input)) { | |||
| FuncGraphPtr fg = GetValueNode<FuncGraphPtr>(input); | |||
| cnode_inputs_name_list.append(func_name_map.at(fg)); | |||
| } else if (input->isa<CNode>()) { | |||
| cnode_inputs_name_list.append(py::str(GetNodeNameWithCount(input->cast<CNodePtr>()))); | |||
| } else if (input->isa<ValueNode>()) { | |||
| auto value_node = GetValueNode(input); | |||
| if (value_node->isa<IntegerImm>()) { | |||
| cnode_inputs_name_list.append(GetPyIntValueFromIntegerImm(value_node)); | |||
| } else if (value_node->isa<FP32Imm>()) { | |||
| cnode_inputs_name_list.append(GetValue<float>(value_node)); | |||
| } else if (value_node->isa<FP64Imm>()) { | |||
| cnode_inputs_name_list.append(GetValue<double>(value_node)); | |||
| } else if (value_node->isa<BoolImm>()) { | |||
| cnode_inputs_name_list.append(GetValue<bool>(value_node)); | |||
| } else if (value_node->isa<StringImm>()) { | |||
| cnode_inputs_name_list.append(py::str(GetValue<std::string>(value_node))); | |||
| } else { | |||
| cnode_inputs_name_list.append(py::str(input->ToString())); | |||
| } | |||
| } else { | |||
| cnode_inputs_name_list.append(py::str(input->ToString())); | |||
| } | |||
| } | |||
| return cnode_inputs_name_list; | |||
| } | |||
| py::dict GetCNodeAttrs(const CNodePtr &cnode) { | |||
| AnfNodePtr op = cnode->input(0); | |||
| if (op == nullptr || !IsValueNode<Primitive>(op)) { | |||
| return py::dict(); | |||
| } | |||
| PrimitivePtr primitive = GetValueNode<PrimitivePtr>(op); | |||
| auto attrs = primitive->attrs(); | |||
| py::dict cnode_attrs_dict; | |||
| for (const auto &attr : attrs) { | |||
| auto key = attr.first; | |||
| auto value = attr.second; | |||
| if (value->isa<BoolImm>()) { | |||
| cnode_attrs_dict[py::str(key)] = GetValue<bool>(value); | |||
| } else if (value->isa<IntegerImm>()) { | |||
| cnode_attrs_dict[py::str(key)] = GetPyIntValueFromIntegerImm(value); | |||
| } else if (value->isa<FP32Imm>()) { | |||
| cnode_attrs_dict[py::str(key)] = GetValue<float>(value); | |||
| } else if (value->isa<FP64Imm>()) { | |||
| cnode_attrs_dict[py::str(key)] = GetValue<double>(value); | |||
| } else { | |||
| cnode_attrs_dict[py::str(attr.first)] = py::str(attr.second->ToString()); | |||
| } | |||
| } | |||
| return cnode_attrs_dict; | |||
| } | |||
| // Get cnode info dict in subgraph. | |||
| py::dict GetParallelCNodeInfoFromSubGraph(const FuncGraphPtr &sub_graph, const FuncGraphNameMap &func_name_map) { | |||
| MS_EXCEPTION_IF_NULL(sub_graph); | |||
| op_count.clear(); | |||
| name_map.clear(); | |||
| py::dict cnode_info_dict; | |||
| auto cnodes = sub_graph->GetOrderedCnodes(); | |||
| for (const auto &cnode : cnodes) { | |||
| std::string op_name_with_count = GetCNodeOperatorNameWithCount(cnode, func_name_map); | |||
| py::dict cnode_info; | |||
| cnode_info[INPUTS] = GetCNodeOperandNameList(cnode, func_name_map); | |||
| cnode_info[ATTRS] = GetCNodeAttrs(cnode); | |||
| cnode_info_dict[py::str(op_name_with_count)] = cnode_info; | |||
| } | |||
| return cnode_info_dict; | |||
| } | |||
| } // namespace | |||
| py::dict GetParameterLayoutFromGraph(const FuncGraphPtr &graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| py::dict dict; | |||
| @@ -124,5 +323,21 @@ py::list GetParallelParameterNameListFromResource(const pipeline::ResourcePtr &r | |||
| } | |||
| return parallel_parameter_name_list; | |||
| } | |||
| py::dict GetParallelCNodeInfoFromGraph(const FuncGraphPtr &graph) { | |||
| MS_EXCEPTION_IF_NULL(graph); | |||
| // Search and mapping all subgraph names | |||
| auto func_name_map = GetAllFuncGraphNameMap(graph); | |||
| py::dict parallel_cnode_info_dict; | |||
| // Get cnode info dict in each subgraph in turn | |||
| for (const auto &kv : func_name_map) { | |||
| auto sub_graph_cnode_info_dict = GetParallelCNodeInfoFromSubGraph(kv.first, func_name_map); | |||
| parallel_cnode_info_dict[py::str(kv.second)] = sub_graph_cnode_info_dict; | |||
| } | |||
| op_count.clear(); | |||
| name_map.clear(); | |||
| return parallel_cnode_info_dict; | |||
| } | |||
| } // namespace parallel | |||
| } // namespace mindspore | |||
| @@ -30,6 +30,7 @@ py::dict GetParameterLayoutFromResource(const pipeline::ResourcePtr &resource); | |||
| py::dict GetAllreduceFusion(const FuncGraphPtr &graph); | |||
| py::list GetParallelParameterNameListFromGraph(const FuncGraphPtr &graph); | |||
| py::list GetParallelParameterNameListFromResource(const pipeline::ResourcePtr &resource); | |||
| py::dict GetParallelCNodeInfoFromGraph(const FuncGraphPtr &graph); | |||
| } // namespace parallel | |||
| } // namespace mindspore | |||
| @@ -86,6 +86,8 @@ PYBIND11_MODULE(_c_expression, m) { | |||
| py::arg("params"), "Fetch the inputs of Conv or Matmul for quant export.") | |||
| .def("get_parameter_layout", &GraphExecutorPy::GetParameterLayout, py::arg("phase") = py::str("train"), | |||
| "Get Parameter Tensor Layout Dictionary.") | |||
| .def("get_parallel_graph_info", &GraphExecutorPy::GetParallelGraphInfo, py::arg("phase") = py::str("train"), | |||
| "Get graph info in step_parallel stage.") | |||
| .def("get_parallel_parameter_name_list", &GraphExecutorPy::GetParallelParameterNameList, | |||
| py::arg("phase") = py::str("train"), "Get Parallel Parameter Name List.") | |||
| .def("get_strategy", &GraphExecutorPy::GetCNodeStrategy, py::arg("phase") = py::str("train"), | |||
| @@ -426,6 +426,17 @@ py::bytes GraphExecutorPy::GetOptimizeGraphProto(const std::string &phase) { | |||
| void GraphExecutorPy::SetJitConfig(const py::dict &jit_config) { jit_config_ = GenerateJitConfigMap(jit_config); } | |||
| py::dict GraphExecutorPy::GetParallelGraphInfo(const std::string &phase) { | |||
| MS_LOG(DEBUG) << "GetParallelGraphInfo!"; | |||
| std::string parallel_phase = phase + kStepParallelGraph; | |||
| auto graph = GetFuncGraph(parallel_phase); | |||
| if (graph == nullptr) { | |||
| MS_LOG(EXCEPTION) << "Can not access FuncGraph according to phase: " << parallel_phase; | |||
| } | |||
| return mindspore::parallel::GetParallelCNodeInfoFromGraph(graph); | |||
| } | |||
| py::dict GraphExecutorPy::GetParameterLayout(const std::string &phase) { | |||
| MS_LOG(DEBUG) << "GetParameterLayout!"; | |||
| std::string layout_graph = phase + kStepParallelGraph; | |||
| @@ -104,6 +104,8 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> { | |||
| void PyExePath(const py::object &py_exe_path); | |||
| void KernelBuildServerDir(const py::object &kernel_build_server_dir); | |||
| py::dict GetParameterLayout(const std::string &phase); | |||
| // Get CNode name, input node name and attribute from each graph | |||
| py::dict GetParallelGraphInfo(const std::string &phase); | |||
| py::dict GetCNodeStrategy(const std::string &phase); | |||
| py::list GetParallelParameterNameList(const std::string &phase); | |||
| void SetCNodeStrategy(const std::string &name, const parallel::Strategys &strategy); | |||
| @@ -275,7 +275,7 @@ tensor::TensorPtr ScalarToTensor(const ScalarPtr &scalar) { | |||
| case kNumberTypeFloat64: | |||
| return std::make_shared<tensor::Tensor>(GetValue<double>(scalar), data_type); | |||
| default: | |||
| MS_LOG(EXCEPTION) << "When convert scalar to tensor, the scalar type: " << data_type << "is valid."; | |||
| MS_LOG(EXCEPTION) << "When convert scalar to tensor, the scalar type: " << data_type << " is invalid."; | |||
| } | |||
| } | |||
| @@ -20,6 +20,8 @@ from mindspore.common.api import _cell_graph_executor | |||
| from mindspore.nn import Cell | |||
| from mindspore.ops import operations as P | |||
| from parallel.utils.utils import ParallelValidator | |||
| POOLED_HEIGHT = 2 | |||
| POOLED_WIDTH = 2 | |||
| SPATIAL_SCALE = 0.5 | |||
| @@ -47,8 +49,9 @@ class Net(Cell): | |||
| def compile_net(net: Cell, *inputs): | |||
| net.set_auto_parallel() | |||
| net.set_train() | |||
| _cell_graph_executor.compile(net, *inputs) | |||
| phase, _ = _cell_graph_executor.compile(net, *inputs, auto_parallel_mode=True) | |||
| context.reset_auto_parallel_context() | |||
| return phase | |||
| def test_roi_align_auto_parallel(): | |||
| @@ -86,3 +89,36 @@ def test_roi_align_strategy_error(): | |||
| with pytest.raises(RuntimeError): | |||
| compile_net(net, _features, _rois) | |||
| context.reset_auto_parallel_context() | |||
| def test_roi_align_layout(): | |||
| """ | |||
| Features: ROIAlignInfo | |||
| Description: validate layout and structure | |||
| Expectation: No raise RuntimeError | |||
| """ | |||
| context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0) | |||
| strategy = ((4, 1, 1, 1), (2, 1)) | |||
| net = Net(POOLED_HEIGHT, POOLED_WIDTH, SPATIAL_SCALE, strategy) | |||
| phase = compile_net(net, _features, _rois) | |||
| validator = ParallelValidator(net, phase) | |||
| # check layout | |||
| features_expect_layout = ([4, 2], [1, -1, -1, -1], [8, 3, 256, 256], 0, True, '') | |||
| assert validator.check_parameter_layout('features', features_expect_layout) | |||
| # check attrs | |||
| roi_expect_attrs = {'pooled_height': POOLED_HEIGHT, 'pooled_width': POOLED_WIDTH, 'spatial_scale': SPATIAL_SCALE} | |||
| assert validator.check_node_attrs('ROIAlign-0', roi_expect_attrs) | |||
| # check inputs | |||
| roi_expect_inputs = ['features', 'TensorScatterUpdate-0'] | |||
| assert validator.check_node_inputs('ROIAlign-0', roi_expect_inputs) | |||
| # check sub_graph | |||
| sub_graph = { | |||
| 'ROIAlign-0': ['features', 'TensorScatterUpdate-0'], | |||
| 'MaskedFill-0': ['ROIAlign-0', 'ExpandDims-2', 0.0], | |||
| 'AllReduce-0': ['MaskedFill-0'] | |||
| } | |||
| assert validator.check_graph_structure(sub_graph) | |||
| @@ -0,0 +1,117 @@ | |||
| # Copyright 2022 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| from mindspore.common.api import _cell_graph_executor | |||
| class ParallelValidator: | |||
| """ | |||
| Validator for distribute operator. | |||
| Args: | |||
| net (Cell): `auto_parallel_mode` = True for networks where compile has been executed. | |||
| Examples: | |||
| >>> from mindspore.common.api import _cell_graph_executor | |||
| >>> from parallel.util.utils import ParallelValidator | |||
| >>> net = Net() | |||
| >>> net.set_auto_parallel() | |||
| >>> net.set_train() | |||
| >>> phase, _ = _cell_graph_executor.compile(net, *inputs, auto_parallel_mode=True) | |||
| >>> validator = ParallelValidator(net, phase) # Init validator by net and phase | |||
| >>> assert validator.check_parameter_shape("x", [8, 3, 256, 256]) # Check parameter slice shape | |||
| >>> # expect_layout: (device_arrangement, tensor_map, slice_shape, field_size, uniform_split, opt_shard_group) | |||
| >>> expect_layout = ([4, 2], [1, -1, -1, -1], [8, 3, 256, 256], 0, True, '') | |||
| >>> assert validator.check_parameter_laytout("x", expect_layout) | |||
| >>> # check attrs for "ROIAlign-0" from graph_1 | |||
| >>> expect_attrs = {'pooled_height': POOLED_HEIGHT, 'pooled_width': POOLED_WIDTH} | |||
| >>> assert validator.check_node_attrs("ROIAlign-0", expect_attrs, graph_id=1) | |||
| >>> # check node inputs for "ROIAlign-0 from graph_0 (default graph_id) | |||
| >>> expect_inputs = ['features', 'TensorScatterUpdate-0'] | |||
| >>> assert validator.check_node_inputs('ROIAlign-0', 'features', 'TensorScatterUpdate-0') | |||
| >>> # check sub graph structure from graph_1 | |||
| >>> sub_graph = { | |||
| ... 'ROIAlign-0': ['features', 'TensorScatterUpdate-0'], | |||
| ... 'MaskedFill-0': ['ROIAlign-0', 'ExpandDims-2', 0.0], | |||
| ... 'AllReduce-0': ['MaskedFill-0'] | |||
| ... } | |||
| >>> assert validator.check_graph_structure(sub_graph, graph_id=1) | |||
| """ | |||
| def __init__(self, net, phase): | |||
| self._parameter_layout_dict = net.parameter_layout_dict | |||
| self._graph_info_dict = _cell_graph_executor._graph_executor.get_parallel_graph_info(phase) | |||
| @property | |||
| def parameter_layout_dict(self): | |||
| return self._parameter_layout_dict | |||
| @property | |||
| def graph_info_dict(self): | |||
| return self._graph_info_dict | |||
| def check_parameter_layout(self, param_name: str, layout: [tuple, list]) -> bool: | |||
| """Verify parameter layout.""" | |||
| if not isinstance(layout, (tuple, list)): | |||
| raise TypeError("Type of expect_inputs must be list or tuple, but got {}".format(type(layout))) | |||
| if param_name not in self._parameter_layout_dict.keys(): | |||
| return False | |||
| return self._parameter_layout_dict[param_name] == layout | |||
| def check_parameter_shape(self, param_name: str, shape: [tuple, list]) -> bool: | |||
| """Verify parameter shape""" | |||
| if not isinstance(shape, (tuple, list)): | |||
| raise TypeError("Type of expect_inputs must be list or tuple, but got {}".format(type(shape))) | |||
| if param_name not in self._parameter_layout_dict.keys(): | |||
| return False | |||
| return self._parameter_layout_dict[param_name][2] == shape | |||
| def check_node_attrs(self, node_name: str, expect_attrs: dict, graph_id=0) -> bool: | |||
| if not isinstance(expect_attrs, dict): | |||
| raise TypeError("Type of expect_attrs must be dict, but got {}".format(type(expect_attrs))) | |||
| cnode_info_dict = self._get_graph_cnode_info(graph_id) | |||
| if node_name not in cnode_info_dict.keys(): | |||
| return False | |||
| attrs = cnode_info_dict[node_name]['attrs'] | |||
| for attr_name in expect_attrs.keys(): | |||
| if attr_name not in attrs.keys() or attrs[attr_name] != expect_attrs[attr_name]: | |||
| return False | |||
| return True | |||
| def check_node_inputs(self, node_name: str, expect_inputs: [tuple, list], graph_id=0) -> bool: | |||
| if not isinstance(expect_inputs, (tuple, list)): | |||
| raise TypeError("Type of expect_inputs must be list or tuple, but got {}".format(type(expect_inputs))) | |||
| cnode_info_dict = self._get_graph_cnode_info(graph_id) | |||
| expect_inputs = list(expect_inputs) | |||
| if node_name not in cnode_info_dict.keys(): | |||
| return False | |||
| inputs = cnode_info_dict[node_name]['inputs'] | |||
| return inputs == expect_inputs | |||
| def check_graph_structure(self, nodes_dict: dict, graph_id=0) -> bool: | |||
| if not isinstance(nodes_dict, dict): | |||
| raise TypeError("Type of nodes_dict must be dict, but got {}".format(type(nodes_dict))) | |||
| for name, inputs in nodes_dict.items(): | |||
| if not self.check_node_inputs(name, inputs, graph_id): | |||
| return False | |||
| return True | |||
| def _get_graph_cnode_info(self, graph_id): | |||
| graph_name = "@graph_" + str(graph_id) | |||
| if graph_name not in self._graph_info_dict.keys(): | |||
| raise ValueError("{} is not exist".format(graph_name)) | |||
| return self._graph_info_dict[graph_name] | |||