Browse Source

Add validate function for parallel ut

r1.7
liuluobin 4 years ago
parent
commit
b797a410cc
8 changed files with 386 additions and 2 deletions
  1. +215
    -0
      mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc
  2. +1
    -0
      mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.h
  3. +2
    -0
      mindspore/ccsrc/pipeline/jit/init.cc
  4. +11
    -0
      mindspore/ccsrc/pipeline/jit/pipeline.cc
  5. +2
    -0
      mindspore/ccsrc/pipeline/jit/pipeline.h
  6. +1
    -1
      mindspore/ccsrc/utils/convert_utils.cc
  7. +37
    -1
      tests/ut/python/parallel/test_roi_align.py
  8. +117
    -0
      tests/ut/python/parallel/utils/utils.py

+ 215
- 0
mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.cc View File

@@ -19,15 +19,214 @@
#include <memory>
#include <string>
#include <vector>
#include <unordered_map>

#include "ir/func_graph.h"
#include "frontend/parallel/ops_info/operator_info.h"
#include "frontend/parallel/graph_util/graph_info.h"
#include "frontend/parallel/strategy.h"
#include "frontend/parallel/tensor_layout/tensor_layout.h"
#include "frontend/parallel/ops_info/ops_utils.h"

namespace mindspore {
namespace parallel {
namespace {
constexpr char INPUTS[] = "inputs";
constexpr char ATTRS[] = "attrs";
using FuncGraphNameMap = const std::unordered_map<FuncGraphPtr, std::string>;
static std::unordered_map<std::string, size_t> op_count;
static std::unordered_map<CNodePtr, std::string> name_map;

// Extract the op name and the topology number of the same node in the graph
// e.g, Default/Mul-op32 -> Mul-op0, Default/Mul-op35 -> Mul-op1
std::string GetNodeNameWithCount(const CNodePtr &cnode) {
if (name_map.find(cnode) != name_map.end()) {
return name_map[cnode];
}

std::string node_name;
auto is_call_fullname_with_scope = [](const CNodePtr &cnode) {
auto value_ptr = cnode->input(0)->cast<ValueNodePtr>();
ValuePtr input_value = nullptr;
if (value_ptr != nullptr) {
input_value = value_ptr->value();
}
if (input_value != nullptr && input_value->cast<PrimitivePtr>() == nullptr &&
input_value->cast<FuncGraphPtr>() == nullptr) {
return false;
}
return true;
};
if (is_call_fullname_with_scope(cnode)) {
auto node_name_with_scope = cnode->fullname_with_scope();
size_t left = node_name_with_scope.rfind('/');
size_t right = node_name_with_scope.find("-op");
node_name = node_name_with_scope.substr(left + 1, right - left - 1);
} else {
node_name = cnode->ToString();
}

std::ostringstream oss;
oss << node_name << '-' << op_count[node_name];
name_map[cnode] = oss.str();
++op_count[node_name];
return name_map[cnode];
}

// Renames sub-graphs according to the topology order, e.g, @5_construct.395 -> @graph_0
FuncGraphNameMap GetAllFuncGraphNameMap(const FuncGraphPtr &graph) {
MS_EXCEPTION_IF_NULL(graph);
auto anf_nodes = TopoSort(graph->get_return(), SuccDeeperSimple, AlwaysInclude);
std::unordered_map<FuncGraphPtr, std::string> graph_name_map;
size_t graph_count = 0;
for (const auto &anf_node : anf_nodes) {
auto belong_graph = anf_node->func_graph();
if (belong_graph == nullptr) {
continue;
}
if (graph_name_map.find(belong_graph) == graph_name_map.end()) {
std::ostringstream oss;
oss << "@graph_" << graph_count++;
graph_name_map[belong_graph] = oss.str();
oss.clear();
}
}
return graph_name_map;
}

// Extract operator name from cnode
std::string GetCNodeOperatorNameWithCount(const CNodePtr &cnode, const FuncGraphNameMap &func_name_map) {
AnfNodePtr op = cnode->input(0);
MS_EXCEPTION_IF_NULL(op);
std::string op_name;
if (IsValueNode<FuncGraph>(op)) {
const FuncGraphPtr fg = GetValueNode<FuncGraphPtr>(op);
op_name = "call " + func_name_map.at(fg);
} else {
op_name = GetNodeNameWithCount(cnode);
name_map[cnode] = op_name;
}
return op_name;
}

py::int_ GetPyIntValueFromIntegerImm(const ValuePtr &value_node) {
MS_EXCEPTION_IF_NULL(value_node);
if (!value_node->isa<IntegerImm>()) {
MS_LOG(EXCEPTION) << "value_node is not IntegerImm";
}

TypePtr data_type = value_node->type();
MS_EXCEPTION_IF_NULL(data_type);
TypeId type_id = data_type->type_id();
switch (type_id) {
case kNumberTypeInt8:
return py::int_(GetValue<int8_t>(value_node));
case kNumberTypeInt16:
return py::int_(GetValue<int16_t>(value_node));
case kNumberTypeInt32:
return py::int_(GetValue<int32_t>(value_node));
case kNumberTypeInt64:
return py::int_(GetValue<int64_t>(value_node));
case kNumberTypeUInt8:
return py::int_(GetValue<uint8_t>(value_node));
case kNumberTypeUInt16:
return py::int_(GetValue<uint16_t>(value_node));
case kNumberTypeUInt32:
return py::int_(GetValue<uint32_t>(value_node));
case kNumberTypeUInt64:
return py::int_(GetValue<uint64_t>(value_node));
default:
MS_LOG(EXCEPTION) << "The data type: " << data_type << " is invalid.";
}
}

// Extract the list of operand names from cnode
py::list GetCNodeOperandNameList(const CNodePtr &cnode, const FuncGraphNameMap &func_name_map) {
MS_EXCEPTION_IF_NULL(cnode);

py::list cnode_inputs_name_list;
auto cnode_inputs = cnode->inputs();

// Skip cnode_inputs[0] which is Primitive value node
for (size_t i = 1; i < cnode_inputs.size(); ++i) {
const AnfNodePtr &input = cnode_inputs[i];
MS_EXCEPTION_IF_NULL(input);

if (input->isa<Parameter>()) {
cnode_inputs_name_list.append(py::str(std::static_pointer_cast<Parameter>(input)->name()));
} else if (IsValueNode<FuncGraph>(input)) {
FuncGraphPtr fg = GetValueNode<FuncGraphPtr>(input);
cnode_inputs_name_list.append(func_name_map.at(fg));
} else if (input->isa<CNode>()) {
cnode_inputs_name_list.append(py::str(GetNodeNameWithCount(input->cast<CNodePtr>())));
} else if (input->isa<ValueNode>()) {
auto value_node = GetValueNode(input);
if (value_node->isa<IntegerImm>()) {
cnode_inputs_name_list.append(GetPyIntValueFromIntegerImm(value_node));
} else if (value_node->isa<FP32Imm>()) {
cnode_inputs_name_list.append(GetValue<float>(value_node));
} else if (value_node->isa<FP64Imm>()) {
cnode_inputs_name_list.append(GetValue<double>(value_node));
} else if (value_node->isa<BoolImm>()) {
cnode_inputs_name_list.append(GetValue<bool>(value_node));
} else if (value_node->isa<StringImm>()) {
cnode_inputs_name_list.append(py::str(GetValue<std::string>(value_node)));
} else {
cnode_inputs_name_list.append(py::str(input->ToString()));
}
} else {
cnode_inputs_name_list.append(py::str(input->ToString()));
}
}
return cnode_inputs_name_list;
}

py::dict GetCNodeAttrs(const CNodePtr &cnode) {
AnfNodePtr op = cnode->input(0);
if (op == nullptr || !IsValueNode<Primitive>(op)) {
return py::dict();
}

PrimitivePtr primitive = GetValueNode<PrimitivePtr>(op);
auto attrs = primitive->attrs();
py::dict cnode_attrs_dict;
for (const auto &attr : attrs) {
auto key = attr.first;
auto value = attr.second;
if (value->isa<BoolImm>()) {
cnode_attrs_dict[py::str(key)] = GetValue<bool>(value);
} else if (value->isa<IntegerImm>()) {
cnode_attrs_dict[py::str(key)] = GetPyIntValueFromIntegerImm(value);
} else if (value->isa<FP32Imm>()) {
cnode_attrs_dict[py::str(key)] = GetValue<float>(value);
} else if (value->isa<FP64Imm>()) {
cnode_attrs_dict[py::str(key)] = GetValue<double>(value);
} else {
cnode_attrs_dict[py::str(attr.first)] = py::str(attr.second->ToString());
}
}
return cnode_attrs_dict;
}

// Get cnode info dict in subgraph.
py::dict GetParallelCNodeInfoFromSubGraph(const FuncGraphPtr &sub_graph, const FuncGraphNameMap &func_name_map) {
MS_EXCEPTION_IF_NULL(sub_graph);
op_count.clear();
name_map.clear();

py::dict cnode_info_dict;
auto cnodes = sub_graph->GetOrderedCnodes();
for (const auto &cnode : cnodes) {
std::string op_name_with_count = GetCNodeOperatorNameWithCount(cnode, func_name_map);
py::dict cnode_info;
cnode_info[INPUTS] = GetCNodeOperandNameList(cnode, func_name_map);
cnode_info[ATTRS] = GetCNodeAttrs(cnode);
cnode_info_dict[py::str(op_name_with_count)] = cnode_info;
}
return cnode_info_dict;
}
} // namespace

py::dict GetParameterLayoutFromGraph(const FuncGraphPtr &graph) {
MS_EXCEPTION_IF_NULL(graph);
py::dict dict;
@@ -124,5 +323,21 @@ py::list GetParallelParameterNameListFromResource(const pipeline::ResourcePtr &r
}
return parallel_parameter_name_list;
}

py::dict GetParallelCNodeInfoFromGraph(const FuncGraphPtr &graph) {
MS_EXCEPTION_IF_NULL(graph);
// Search and mapping all subgraph names
auto func_name_map = GetAllFuncGraphNameMap(graph);
py::dict parallel_cnode_info_dict;

// Get cnode info dict in each subgraph in turn
for (const auto &kv : func_name_map) {
auto sub_graph_cnode_info_dict = GetParallelCNodeInfoFromSubGraph(kv.first, func_name_map);
parallel_cnode_info_dict[py::str(kv.second)] = sub_graph_cnode_info_dict;
}
op_count.clear();
name_map.clear();
return parallel_cnode_info_dict;
}
} // namespace parallel
} // namespace mindspore

+ 1
- 0
mindspore/ccsrc/frontend/parallel/graph_util/get_parallel_info.h View File

@@ -30,6 +30,7 @@ py::dict GetParameterLayoutFromResource(const pipeline::ResourcePtr &resource);
py::dict GetAllreduceFusion(const FuncGraphPtr &graph);
py::list GetParallelParameterNameListFromGraph(const FuncGraphPtr &graph);
py::list GetParallelParameterNameListFromResource(const pipeline::ResourcePtr &resource);
py::dict GetParallelCNodeInfoFromGraph(const FuncGraphPtr &graph);
} // namespace parallel
} // namespace mindspore



+ 2
- 0
mindspore/ccsrc/pipeline/jit/init.cc View File

@@ -86,6 +86,8 @@ PYBIND11_MODULE(_c_expression, m) {
py::arg("params"), "Fetch the inputs of Conv or Matmul for quant export.")
.def("get_parameter_layout", &GraphExecutorPy::GetParameterLayout, py::arg("phase") = py::str("train"),
"Get Parameter Tensor Layout Dictionary.")
.def("get_parallel_graph_info", &GraphExecutorPy::GetParallelGraphInfo, py::arg("phase") = py::str("train"),
"Get graph info in step_parallel stage.")
.def("get_parallel_parameter_name_list", &GraphExecutorPy::GetParallelParameterNameList,
py::arg("phase") = py::str("train"), "Get Parallel Parameter Name List.")
.def("get_strategy", &GraphExecutorPy::GetCNodeStrategy, py::arg("phase") = py::str("train"),


+ 11
- 0
mindspore/ccsrc/pipeline/jit/pipeline.cc View File

@@ -426,6 +426,17 @@ py::bytes GraphExecutorPy::GetOptimizeGraphProto(const std::string &phase) {

void GraphExecutorPy::SetJitConfig(const py::dict &jit_config) { jit_config_ = GenerateJitConfigMap(jit_config); }

py::dict GraphExecutorPy::GetParallelGraphInfo(const std::string &phase) {
MS_LOG(DEBUG) << "GetParallelGraphInfo!";
std::string parallel_phase = phase + kStepParallelGraph;
auto graph = GetFuncGraph(parallel_phase);
if (graph == nullptr) {
MS_LOG(EXCEPTION) << "Can not access FuncGraph according to phase: " << parallel_phase;
}

return mindspore::parallel::GetParallelCNodeInfoFromGraph(graph);
}

py::dict GraphExecutorPy::GetParameterLayout(const std::string &phase) {
MS_LOG(DEBUG) << "GetParameterLayout!";
std::string layout_graph = phase + kStepParallelGraph;


+ 2
- 0
mindspore/ccsrc/pipeline/jit/pipeline.h View File

@@ -104,6 +104,8 @@ class GraphExecutorPy : public std::enable_shared_from_this<GraphExecutorPy> {
void PyExePath(const py::object &py_exe_path);
void KernelBuildServerDir(const py::object &kernel_build_server_dir);
py::dict GetParameterLayout(const std::string &phase);
// Get CNode name, input node name and attribute from each graph
py::dict GetParallelGraphInfo(const std::string &phase);
py::dict GetCNodeStrategy(const std::string &phase);
py::list GetParallelParameterNameList(const std::string &phase);
void SetCNodeStrategy(const std::string &name, const parallel::Strategys &strategy);


+ 1
- 1
mindspore/ccsrc/utils/convert_utils.cc View File

@@ -275,7 +275,7 @@ tensor::TensorPtr ScalarToTensor(const ScalarPtr &scalar) {
case kNumberTypeFloat64:
return std::make_shared<tensor::Tensor>(GetValue<double>(scalar), data_type);
default:
MS_LOG(EXCEPTION) << "When convert scalar to tensor, the scalar type: " << data_type << "is valid.";
MS_LOG(EXCEPTION) << "When convert scalar to tensor, the scalar type: " << data_type << " is invalid.";
}
}



+ 37
- 1
tests/ut/python/parallel/test_roi_align.py View File

@@ -20,6 +20,8 @@ from mindspore.common.api import _cell_graph_executor
from mindspore.nn import Cell
from mindspore.ops import operations as P

from parallel.utils.utils import ParallelValidator

POOLED_HEIGHT = 2
POOLED_WIDTH = 2
SPATIAL_SCALE = 0.5
@@ -47,8 +49,9 @@ class Net(Cell):
def compile_net(net: Cell, *inputs):
net.set_auto_parallel()
net.set_train()
_cell_graph_executor.compile(net, *inputs)
phase, _ = _cell_graph_executor.compile(net, *inputs, auto_parallel_mode=True)
context.reset_auto_parallel_context()
return phase


def test_roi_align_auto_parallel():
@@ -86,3 +89,36 @@ def test_roi_align_strategy_error():
with pytest.raises(RuntimeError):
compile_net(net, _features, _rois)
context.reset_auto_parallel_context()


def test_roi_align_layout():
"""
Features: ROIAlignInfo
Description: validate layout and structure
Expectation: No raise RuntimeError
"""
context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
strategy = ((4, 1, 1, 1), (2, 1))
net = Net(POOLED_HEIGHT, POOLED_WIDTH, SPATIAL_SCALE, strategy)
phase = compile_net(net, _features, _rois)

validator = ParallelValidator(net, phase)
# check layout
features_expect_layout = ([4, 2], [1, -1, -1, -1], [8, 3, 256, 256], 0, True, '')
assert validator.check_parameter_layout('features', features_expect_layout)

# check attrs
roi_expect_attrs = {'pooled_height': POOLED_HEIGHT, 'pooled_width': POOLED_WIDTH, 'spatial_scale': SPATIAL_SCALE}
assert validator.check_node_attrs('ROIAlign-0', roi_expect_attrs)

# check inputs
roi_expect_inputs = ['features', 'TensorScatterUpdate-0']
assert validator.check_node_inputs('ROIAlign-0', roi_expect_inputs)

# check sub_graph
sub_graph = {
'ROIAlign-0': ['features', 'TensorScatterUpdate-0'],
'MaskedFill-0': ['ROIAlign-0', 'ExpandDims-2', 0.0],
'AllReduce-0': ['MaskedFill-0']
}
assert validator.check_graph_structure(sub_graph)

+ 117
- 0
tests/ut/python/parallel/utils/utils.py View File

@@ -0,0 +1,117 @@
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from mindspore.common.api import _cell_graph_executor


class ParallelValidator:
"""
Validator for distribute operator.

Args:
net (Cell): `auto_parallel_mode` = True for networks where compile has been executed.

Examples:
>>> from mindspore.common.api import _cell_graph_executor
>>> from parallel.util.utils import ParallelValidator
>>> net = Net()
>>> net.set_auto_parallel()
>>> net.set_train()
>>> phase, _ = _cell_graph_executor.compile(net, *inputs, auto_parallel_mode=True)
>>> validator = ParallelValidator(net, phase) # Init validator by net and phase
>>> assert validator.check_parameter_shape("x", [8, 3, 256, 256]) # Check parameter slice shape
>>> # expect_layout: (device_arrangement, tensor_map, slice_shape, field_size, uniform_split, opt_shard_group)
>>> expect_layout = ([4, 2], [1, -1, -1, -1], [8, 3, 256, 256], 0, True, '')
>>> assert validator.check_parameter_laytout("x", expect_layout)
>>> # check attrs for "ROIAlign-0" from graph_1
>>> expect_attrs = {'pooled_height': POOLED_HEIGHT, 'pooled_width': POOLED_WIDTH}
>>> assert validator.check_node_attrs("ROIAlign-0", expect_attrs, graph_id=1)
>>> # check node inputs for "ROIAlign-0 from graph_0 (default graph_id)
>>> expect_inputs = ['features', 'TensorScatterUpdate-0']
>>> assert validator.check_node_inputs('ROIAlign-0', 'features', 'TensorScatterUpdate-0')
>>> # check sub graph structure from graph_1
>>> sub_graph = {
... 'ROIAlign-0': ['features', 'TensorScatterUpdate-0'],
... 'MaskedFill-0': ['ROIAlign-0', 'ExpandDims-2', 0.0],
... 'AllReduce-0': ['MaskedFill-0']
... }
>>> assert validator.check_graph_structure(sub_graph, graph_id=1)

"""
def __init__(self, net, phase):
self._parameter_layout_dict = net.parameter_layout_dict
self._graph_info_dict = _cell_graph_executor._graph_executor.get_parallel_graph_info(phase)

@property
def parameter_layout_dict(self):
return self._parameter_layout_dict

@property
def graph_info_dict(self):
return self._graph_info_dict

def check_parameter_layout(self, param_name: str, layout: [tuple, list]) -> bool:
"""Verify parameter layout."""
if not isinstance(layout, (tuple, list)):
raise TypeError("Type of expect_inputs must be list or tuple, but got {}".format(type(layout)))

if param_name not in self._parameter_layout_dict.keys():
return False
return self._parameter_layout_dict[param_name] == layout

def check_parameter_shape(self, param_name: str, shape: [tuple, list]) -> bool:
"""Verify parameter shape"""
if not isinstance(shape, (tuple, list)):
raise TypeError("Type of expect_inputs must be list or tuple, but got {}".format(type(shape)))

if param_name not in self._parameter_layout_dict.keys():
return False
return self._parameter_layout_dict[param_name][2] == shape

def check_node_attrs(self, node_name: str, expect_attrs: dict, graph_id=0) -> bool:
if not isinstance(expect_attrs, dict):
raise TypeError("Type of expect_attrs must be dict, but got {}".format(type(expect_attrs)))

cnode_info_dict = self._get_graph_cnode_info(graph_id)
if node_name not in cnode_info_dict.keys():
return False
attrs = cnode_info_dict[node_name]['attrs']
for attr_name in expect_attrs.keys():
if attr_name not in attrs.keys() or attrs[attr_name] != expect_attrs[attr_name]:
return False
return True

def check_node_inputs(self, node_name: str, expect_inputs: [tuple, list], graph_id=0) -> bool:
if not isinstance(expect_inputs, (tuple, list)):
raise TypeError("Type of expect_inputs must be list or tuple, but got {}".format(type(expect_inputs)))

cnode_info_dict = self._get_graph_cnode_info(graph_id)
expect_inputs = list(expect_inputs)
if node_name not in cnode_info_dict.keys():
return False
inputs = cnode_info_dict[node_name]['inputs']
return inputs == expect_inputs

def check_graph_structure(self, nodes_dict: dict, graph_id=0) -> bool:
if not isinstance(nodes_dict, dict):
raise TypeError("Type of nodes_dict must be dict, but got {}".format(type(nodes_dict)))
for name, inputs in nodes_dict.items():
if not self.check_node_inputs(name, inputs, graph_id):
return False
return True

def _get_graph_cnode_info(self, graph_id):
graph_name = "@graph_" + str(graph_id)
if graph_name not in self._graph_info_dict.keys():
raise ValueError("{} is not exist".format(graph_name))
return self._graph_info_dict[graph_name]

Loading…
Cancel
Save