Browse Source

Adjust CPU optimize graph pass

tags/v1.3.0
lizhenyu 4 years ago
parent
commit
efcf910b80
4 changed files with 36 additions and 7 deletions
  1. +4
    -0
      mindspore/ccsrc/runtime/framework/graph_compiler.cc
  2. +24
    -7
      mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc
  3. +3
    -0
      mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h
  4. +5
    -0
      mindspore/ccsrc/runtime/hardware/device_context.h

+ 4
- 0
mindspore/ccsrc/runtime/framework/graph_compiler.cc View File

@@ -319,6 +319,8 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic
// 'KernelMod' is real executive object of kernel.
device_context->CreateKernel(graph->execution_order());

device_context->PreprocessBeforeRunGraph(graph);

if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {
// Create device address for all anf nodes of graph.
CreateDeviceAddress(graph, device_context);
@@ -376,6 +378,8 @@ GraphId GraphCompiler::CompileGraph(const session::OpRunInfo &op_run_info, const
// Generate 'KernelMod' for kernel in graph.
device_context->CreateKernel(graph->execution_order());

device_context->PreprocessBeforeRunSingleOpGraph(graph);

// Create device address for all anf nodes of graph.
CreateDeviceAddress(graph, device_context);



+ 24
- 7
mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc View File

@@ -88,11 +88,6 @@ void CPUDeviceContext::OptimizeGraph(const KernelGraphPtr &graph) const {

// Run final optimization.
opt::CommonFinalOptimization(graph);

// Remove reorder after PS feature finish adapting push/pull in auto_monad.
auto execution_order = graph->execution_order();
AnfAlgo::ReorderPosteriorExecList(NOT_NULL(&execution_order));
graph->set_execution_order(execution_order);
}

void CPUDeviceContext::OptimizeSingleOpGraph(const KernelGraphPtr &graph) const {
@@ -104,9 +99,7 @@ void CPUDeviceContext::OptimizeSingleOpGraph(const KernelGraphPtr &graph) const
void CPUDeviceContext::OptimizeGraphImpl(const KernelGraphPtr &graph) const {
auto optimizer = std::make_shared<opt::GraphOptimizer>();
auto pm = std::make_shared<opt::PassManager>();
pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast_cpu"));
pm->AddPass(std::make_shared<opt::InsertFormatTransformOpCPU>("insert_format_transform_op_cpu"));
pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
optimizer->AddPassManager(pm);
(void)optimizer->Optimize(graph);
graph->SetExecOrderByDefault();
@@ -142,6 +135,30 @@ void CPUDeviceContext::CreateKernel(const std::vector<CNodePtr> &nodes) const {
}
}

namespace {
void ProcessCast(const KernelGraphPtr &graph) {
auto optimizer = std::make_shared<opt::GraphOptimizer>();
auto pm = std::make_shared<opt::PassManager>();
pm->AddPass(std::make_shared<opt::InsertCastCPU>("insert_cast_cpu"));
MS_LOG(INFO) << "Insert cast pass";
pm->AddPass(std::make_shared<opt::EraseVisitAttr>());
optimizer->AddPassManager(pm);
(void)optimizer->Optimize(graph);
graph->SetExecOrderByDefault();
}
} // namespace

void CPUDeviceContext::PreprocessBeforeRunGraph(const KernelGraphPtr &graph) const {
ProcessCast(graph);

// Remove reorder after PS feature finish adapting push/pull in auto_monad.
auto execution_order = graph->execution_order();
AnfAlgo::ReorderPosteriorExecList(NOT_NULL(&execution_order));
graph->set_execution_order(execution_order);
}

void CPUDeviceContext::PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr &graph) const { ProcessCast(graph); }

bool CPUDeviceContext::LaunchKernel(const CNodePtr &kernel, const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace, const std::vector<AddressPtr> &outputs,
bool) const {


+ 3
- 0
mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h View File

@@ -48,6 +48,9 @@ class CPUDeviceContext : public DeviceContext {
void SetOperatorInfo(const std::vector<CNodePtr> &nodes) const override;
void CreateKernel(const std::vector<CNodePtr> &nodes) const override;

void PreprocessBeforeRunGraph(const KernelGraphPtr &graph) const override;
void PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr &graph) const override;

bool LaunchKernel(const CNodePtr &kernel, const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &workspace, const std::vector<AddressPtr> &outputs,
bool is_dynamic_shape = false) const override;


+ 5
- 0
mindspore/ccsrc/runtime/hardware/device_context.h View File

@@ -86,6 +86,11 @@ class DeviceContext {
// 'KernelMod' is real executive object of kernel.
virtual void CreateKernel(const std::vector<CNodePtr> &nodes) const = 0;

// Adjust kernel graph before run graph, used in Graph Mode.
virtual void PreprocessBeforeRunGraph(const KernelGraphPtr &graph) const {}
// Adjust single op kernel graph before run graph, used in PyNative Mode.
virtual void PreprocessBeforeRunSingleOpGraph(const KernelGraphPtr &graph) const {}

// Infer kernel shape and update abstract info for dynamic shape kernel.
virtual void UpdateDynamicShape(const CNodePtr &kernel) const { AnfAlgo::InferShape(kernel); }



Loading…
Cancel
Save