ascend control use vm

4 years ago · 5b1639ad56
--- a/mindspore/ccsrc/backend/session/ascend_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_session.cc
@@ -381,7 +381,7 @@ void AscendSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_gra
        MS_LOG(EXCEPTION) << "SyncHostToDevice failed.";
      }
      if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode ||
          AnfAlgo::IsParameterWeight(input_param)) {
          AnfAlgo::IsParameterWeight(input_param) || kernel_graph->IsUpdatedParameter(input_param)) {
        tensor->set_device_address(device_address);
      }
      if (kernel_graph->IsUpdatedParameter(input_param)) {
@@ -523,30 +523,14 @@ void AscendSession::BuildGraphImpl(GraphId graph_id) {
  InitRuntimeResource();
  // multiple graph handle
  if (graph_id == final_graph_id_) {
    if (!graph->executable()) {
      return;
    }
    SetFinalGraphSummaryFlag(graph);
    // OptChildGraphs
    auto graph_order = GetGraphOrder(final_graph_id_);
    auto &graph_type = GetGraphOrderType(final_graph_id_);
    for (size_t i = 0; i < graph_order.size(); i++) {
      if (!(graph_type[i] == BRANCH_END || graph_type[i] == BRANCH_START)) {
        auto child_graph = GetGraph(graph_order[i]);
        CompileChildGraph(child_graph);
      }
    }
    SetSummaryNodes(graph.get());
    // merge child graph
    MergeGraphExecOrder();
  } else {
    auto single_graph = GetGraph(graph_id);
    MS_EXCEPTION_IF_NULL(single_graph);
    CompileChildGraph(single_graph);
    // set the distinction label of single graph
    single_graph->set_stream_distinction_label(graph_id);
    single_graph->UpdateExecuteKernelStreamLabel();
  }
    MS_LOG(EXCEPTION) << "Unexpected graph id:" << graph_id << ", final_graph_id_:" << final_graph_id_;
  }
  auto single_graph = GetGraph(graph_id);
  MS_EXCEPTION_IF_NULL(single_graph);
  CompileChildGraph(single_graph);
  // set the distinction label of single graph
  single_graph->set_stream_distinction_label(graph_id);
  single_graph->UpdateExecuteKernelStreamLabel();
  // adjust execution order because  merge child graph and other special operations
  AdjustKernel(graph);
 #if ENABLE_CPU && ENABLE_D
--- a/mindspore/ccsrc/backend/session/kernel_graph.cc
+++ b/mindspore/ccsrc/backend/session/kernel_graph.cc
@@ -1362,7 +1362,9 @@ void KernelGraph::SetOptimizerFlag() {
        continue;
      }
      auto param = real_node->cast<ParameterPtr>();
      if (AnfAlgo::IsParameterWeight(param)) {
      auto abstract = param->abstract();
      MS_EXCEPTION_IF_NULL(abstract);
      if (abstract->isa<abstract::AbstractRef>()) {
        has_optimizer_ = true;
        (void)updated_parameters_.insert(param);
      }
--- a/mindspore/ccsrc/backend/session/session_basic.cc
+++ b/mindspore/ccsrc/backend/session/session_basic.cc
@@ -447,8 +447,6 @@ void UpdateGraphAquireGilAttr(const NotNull<KernelGraphPtr> &root_graph) {
  return;
 }

 // 1. Convert the node to make_tuple if the node is a ValueNode<ValueTuple> and it's the input of 'return' node.
 // 2. Set the return of graph if node is "Return" node.
 void SetReturnNode(const AnfNodePtr &node, KernelGraph *graph) {
  MS_EXCEPTION_IF_NULL(graph);
  MS_EXCEPTION_IF_NULL(node);
@@ -468,6 +466,25 @@ void SetReturnNode(const AnfNodePtr &node, KernelGraph *graph) {
    }
  }
 }

 bool NoPartialInPartialGraph(const AnfNodePtr &partial_node) {
  MS_EXCEPTION_IF_NULL(partial_node);
  auto partial_cnode = partial_node->cast<CNodePtr>();
  MS_EXCEPTION_IF_NULL(partial_cnode);
  auto partial_graph = GetValueNode<FuncGraphPtr>(partial_cnode->input(kFirstDataInputIndex));
  MS_EXCEPTION_IF_NULL(partial_graph);
  auto graph_nodes = TopoSort(partial_graph->get_return());
  for (auto &node : graph_nodes) {
    if (AnfAlgo::CheckPrimitiveType(node, prim::kPrimPartial) || AnfAlgo::CheckPrimitiveType(node, prim::kPrimCall) ||
        AnfAlgo::CheckPrimitiveType(node, prim::kPrimSwitch)) {
      return false;
    }
    if (node->isa<CNode>() && IsValueNode<FuncGraph>(node->cast<CNodePtr>()->input(kAnfPrimitiveIndex))) {
      return false;
    }
  }
  return true;
 }
 }  // namespace

 GraphId SessionBasic::graph_sum_ = 0;
@@ -1977,7 +1994,8 @@ void SessionBasic::HandleInternalOutput(const AnfNodePtr &input_front_node, cons
  if (internal_output) {
    auto users = ExtendNodeUsers(front_func_graph_manager, front_node);
    for (auto &user : users) {
      if (AnfAlgo::CheckPrimitiveType(user, prim::kPrimPartial) && kernel_target != kGPUDevice) {
      if (AnfAlgo::CheckPrimitiveType(user, prim::kPrimPartial) && kernel_target != kGPUDevice &&
          NoPartialInPartialGraph(user)) {
        auto partial_target = AddPartialParametersMap(front_func_graph_manager, user);
        if (partial_target != kNoTarget && partial_target != kernel_target) {
          unique_target = false;
--- a/mindspore/ccsrc/pipeline/jit/action.cc
+++ b/mindspore/ccsrc/pipeline/jit/action.cc
@@ -593,9 +593,18 @@ bool TaskEmitAction(const ResourcePtr &res) {
    context_ptr->set_param<bool>(MS_CTX_ENABLE_LOOP_SINK, false);
  } else if (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
    std::string device_target = context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET);
    if (device_target == kAscendDevice && backend != kMsVm) {
    auto manager = func_graph->manager();
    auto graphs = manager->func_graphs();
    bool exist_while =
      std::any_of(graphs.cbegin(), graphs.cend(), [](const FuncGraphPtr &fg) { return fg->recursive(); });
    if (device_target == kAscendDevice && backend != kMsVm && !exist_while) {
      MS_LOG(INFO) << "Run graph mode with multigraph sink.";
      bc_ptr->set_is_multi_graph_sink(true);
      context_ptr->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, true);
    } else {
      MS_LOG(INFO) << "Run graph mode with vm.";
      bc_ptr->set_is_multi_graph_sink(false);
      context_ptr->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, false);
    }
  }

--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@@ -142,20 +142,21 @@ std::string GetCompileExceptionInfo() {
  return oss.str();
 }

 void SetGpuLoopSink(const ResourcePtr &resource) {
 void SetLoopCount(const ResourcePtr &resource) {
  MS_EXCEPTION_IF_NULL(resource);
  auto func_graph = resource->func_graph();
  if (func_graph != nullptr && func_graph->manager() != nullptr) {
    auto manager = func_graph->manager();
    size_t graph_nums = manager->func_graphs().size();
    int64_t sinksize = ConfigManager::GetInstance().iter_num();
    if (graph_nums == 1 || MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
      resource->set_gpu_loopsink(true, sinksize);
    } else {
      resource->set_gpu_loopsink(false, sinksize);
    int64_t loop_size = ConfigManager::GetInstance().iter_num();
    const auto context_ptr = MsContext::GetInstance();
    if (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice) {
      resource->set_vm_loop(!context_ptr->get_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK), loop_size);
    } else if (context_ptr->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice) {
      bool run_with_mind_rt = graph_nums == 1 || context_ptr->get_param<bool>(MS_CTX_ENABLE_MINDRT);
      resource->set_vm_loop(!run_with_mind_rt, loop_size);
    }
    MS_LOG(INFO) << "Change gpu_loopsink_flag_ to " << resource->gpu_loopsink_flag() << ", set loopsink size to "
                 << sinksize;
    MS_LOG(INFO) << "Change vm_loop_flag to " << resource->vm_loop_flag() << ", set loop_size to " << loop_size;
  }
 }

@@ -826,7 +827,7 @@ void Pipeline::Run(const std::string &phase_s) {
        MS_LOG(DEBUG) << "Action " << action.first << " end.";
      };
      if (action.first == "task_emit") {
        SetGpuLoopSink(resource_);
        SetLoopCount(resource_);
      } else if (action.first == "validate") {
        CacheValidateFuncGraph(phase_s, resource_);
      }
@@ -1002,13 +1003,17 @@ py::object ExecutorPy::Run(const py::tuple &args, const py::object &phase) {
    MS_LOG(EXCEPTION) << "Can't find run graph func for " << phase_s;
  }
  // Set loopsink size for each phase.
  bool is_loopsink = info_[phase_s]->resource->gpu_loopsink_flag();
  int64_t sinksize = info_[phase_s]->resource->gpu_loopsink_size();
  ConfigManager::GetInstance().set_gpu_loopsink_size(is_loopsink ? sinksize : 1);
  // If target is not gpu or is loopsink, keep vmloop 1.
  bool g = (MsContext::GetInstance()->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice);
  int64_t vm_loop = (!g || is_loopsink) ? 1 : sinksize;
  MS_LOG(INFO) << "VM loop size " << vm_loop << ", loopsink size " << (is_loopsink ? sinksize : 1);
  bool vm_loop_flag = info_[phase_s]->resource->vm_loop_flag();
  int64_t loop_size = info_[phase_s]->resource->loop_size();
  int64_t vm_loop = 1;
  if (vm_loop_flag) {
    vm_loop = loop_size;
  } else {
    // Set the loop size in config if graphs nums is 1(is_loop_sin=True), then there will be a loop embrace
    // 'Execute(graph)' in GPUSession.
    ConfigManager::GetInstance().set_gpu_loopsink_size(loop_size);
  }
  MS_LOG(INFO) << "VM loop size " << vm_loop << ", loopsink size " << vm_loop;
  py::object ret;
  MS_LOG(DEBUG) << "Eval run" << backend;
  for (int64_t i = 0; i < vm_loop; i++) {
@@ -1158,9 +1163,6 @@ bool InitExecDatasetVm(const std::string &queue_name, int64_t size, int64_t batc
  // Convert CNodeList to LinConvertResult.
  auto segment = std::make_shared<GraphSegment>(std::vector<AnfNodePtr>{app_init}, false);
  auto runner = convert_fn(segment, "");
  if (MsContext::GetInstance()->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
    backend->Link(runner.graph_id);
  }
  ConfigManager::GetInstance().set_iter_num(size);
  // PS cache does not support loop sink.
 #if ((defined ENABLE_CPU) && (!defined _WIN32))
--- a/mindspore/ccsrc/pipeline/jit/resource.h
+++ b/mindspore/ccsrc/pipeline/jit/resource.h
@@ -75,12 +75,12 @@ class Resource : public ResourceBase {
  const abstract::AbstractBasePtrList &args_spec() const { return args_spec_; }
  void set_args_spec(const abstract::AbstractBasePtrList &args_spec) { args_spec_ = args_spec; }

  void set_gpu_loopsink(const bool &flag, const int64_t size) {
    gpu_loopsink_flag_ = flag;
    gpu_loopsink_size_ = size;
  void set_vm_loop(const bool &flag, const int64_t size) {
    vm_loop_flag_ = flag;
    loop_size_ = size;
  }
  bool gpu_loopsink_flag() { return gpu_loopsink_flag_; }
  int64_t gpu_loopsink_size() { return gpu_loopsink_size_; }
  bool vm_loop_flag() { return vm_loop_flag_; }
  int64_t loop_size() { return loop_size_; }
  // Reclaim resource and clear the cache.
  // ExecutorPy::Compile() can be called multiple times, so cache
  // should be cleared.
@@ -92,8 +92,8 @@ class Resource : public ResourceBase {
  abstract::AbstractBasePtrList args_spec_;
  py::object input_;
  bool is_cleaned_;
  bool gpu_loopsink_flag_{false};
  int64_t gpu_loopsink_size_{1};
  bool vm_loop_flag_{false};
  int64_t loop_size_{1};
 };

 using ResourcePtr = std::shared_ptr<pipeline::Resource>;
--- a/mindspore/ccsrc/vm/backend.cc
+++ b/mindspore/ccsrc/vm/backend.cc
@@ -289,14 +289,6 @@ VectorRef MsBackend::MsRunGraph(const GraphId &g, const VectorRef &args, const s
  return outputs;
 }

 void MsBackend::Link(GraphId graph_id) {
  MS_EXCEPTION_IF_NULL(target_sess_);
  if (graph_id == kInvalidGraphId) {
    graph_id = target_sess_->GetFinalRunGraph();
  }
  target_sess_->BuildGraph(graph_id);
 }

 MsBackend::MsBackend(const std::string &name, const std::string &target, uint32_t device_id) : Backend(name) {
  convert_fn_ = std::bind(&MsBackend::MsConvert, this, std::placeholders::_1, std::placeholders::_2);
  target_sess_ = session::SessionFactory::Get().Create(target);
--- a/mindspore/ccsrc/vm/backend.h
+++ b/mindspore/ccsrc/vm/backend.h
@@ -61,7 +61,6 @@ class Backend {
  virtual bool GetCond(const BaseRef &c, bool *value);
  virtual bool GetIndex(const BaseRef &c, int64_t *value);
  virtual GraphId CompileGraph(NotNull<FuncGraphPtr> fg) { return kInvalidGraphId; }
  virtual void Link(GraphId) {}
  virtual void SetDebugger() {}

  bool is_multi_graph_sink() const { return is_multi_graph_sink_; }
@@ -82,7 +81,6 @@ class MsBackend : public Backend {
  VectorRef MsRunGraph(const GraphId &g, const VectorRef &args, const std::string &target = "");

  VectorRef MsSimuRunGraph(const GraphId &g);
  void Link(GraphId) override;
  GraphId CompileGraph(NotNull<FuncGraphPtr> fg) override;
  VectorRef RunGraph(GraphId graph_id, const VectorRef &args);
  void ClearSessionGraphs();
--- a/mindspore/ccsrc/vm/transform.cc
+++ b/mindspore/ccsrc/vm/transform.cc
@@ -580,9 +580,6 @@ BackendPtr CreateBackend() {
      if (MsContext::GetInstance()->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
        backend->set_is_multi_graph_sink(false);
        context_ptr->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, false);
      } else {
        backend->set_is_multi_graph_sink(true);
        context_ptr->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, true);
      }
    }
    return backend;
--- a/mindspore/core/ir/func_graph_cloner.cc
+++ b/mindspore/core/ir/func_graph_cloner.cc
@@ -759,10 +759,8 @@ FuncGraphPtr TransformableClone(const FuncGraphPtr &func_graph, const TraceInfoP
    new_func_graph->set_param_default_value(item.first, cloner[item.second]);
  }

  if (MsContext::GetInstance()->get_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK)) {
    if (func_graph->has_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES)) {
      new_func_graph->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true);
    }
  if (func_graph->has_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES)) {
    new_func_graph->set_flag(FUNC_GRAPH_FLAG_IGNORE_VALUES, true);
  }

  if (func_graph->has_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)) {
--- a/tests/st/control/inner/test_002_single_for.py
+++ b/tests/st/control/inner/test_002_single_for.py
@@ -52,16 +52,20 @@ def test_single_for_01():

    # graph mode
    context.set_context(mode=context.GRAPH_MODE)
    for_net_foward = SingleForNet()
    graph_forward_res = for_net_foward(x, y, z)

    for_net = SingleForNet()
    net = GradNet(for_net)
    graph_forward_res = for_net(x, y, z)
    graph_backward_res = net(x, y, z)

    # pynative mode
    context.set_context(mode=context.PYNATIVE_MODE)
    for_net_foward = SingleForNet()
    pynative_forward_res = for_net_foward(x, y, z)

    for_net = SingleForNet()
    net = GradNet(for_net)
    pynative_forward_res = for_net(x, y, z)
    pynative_backward_res = net(x, y, z)

    assert graph_forward_res == pynative_forward_res
--- a/tests/st/control/inner/test_030_for_in_if.py
+++ b/tests/st/control/inner/test_030_for_in_if.py
@@ -23,6 +23,7 @@ from mindspore.common import dtype as mstype
 grad_all = C.GradOperation(get_all=True)
 context.set_context(device_target="Ascend")


 def test_for_in_if_01():
    class ForInIfNet(nn.Cell):
        def __init__(self):
@@ -69,6 +70,7 @@ def test_for_in_if_01():
    assert graph_forward_res == pynative_forward_res
    assert graph_backward_res == pynative_backward_res


 def test_for_in_if_02():
    class ForInIfNet(nn.Cell):
        def __init__(self):
@@ -100,7 +102,7 @@ def test_for_in_if_02():
        def construct(self, *inputs):
            return grad_all(self.net)(*inputs)

    x = Tensor([10], mstype.int32)
    x = Tensor([10], mstype.float32)

    # graph mode
    context.set_context(mode=context.GRAPH_MODE)
@@ -152,7 +154,7 @@ def test_for_in_if_03():
        def construct(self, *inputs):
            return grad_all(self.net)(*inputs)

    x = Tensor([10], mstype.int32)
    x = Tensor([10], mstype.float32)

    # graph mode
    context.set_context(mode=context.GRAPH_MODE)
--- a/tests/st/control/inner/test_031_for_in_while.py
+++ b/tests/st/control/inner/test_031_for_in_while.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
 import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -23,6 +24,7 @@ from mindspore.common import dtype as mstype
 grad_all = C.GradOperation(get_all=True)
 context.set_context(device_target="Ascend")

@pytest.mark.skip(reason="not supported for in while")
 def test_for_in_while_01():
    class ForInWhileNet(nn.Cell):
        def __init__(self):
@@ -74,7 +76,7 @@ def test_for_in_while_01():
    assert graph_forward_res == pynative_forward_res
    assert graph_backward_res == pynative_backward_res


@pytest.mark.skip(reason="not supported for in while")
 def test_for_in_while_02():
    class ForInWhileNet(nn.Cell):
        def __init__(self):
--- a/tests/st/control/inner/test_100_if_after_if.py
+++ b/tests/st/control/inner/test_100_if_after_if.py
@@ -105,16 +105,20 @@ class GradNet(nn.Cell):
 def control_flow_if_after_if(input_net, x, y):
    # graph mode
    context.set_context(mode=context.GRAPH_MODE)
    forward_net = input_net()

    net = input_net()
    grad_net = GradNet(net)
    graph_forward_res = net(x, y)
    graph_forward_res = forward_net(x, y)
    graph_backward_res = grad_net(x, y)

    # pynative mode
    context.set_context(mode=context.PYNATIVE_MODE)
    forward_net = input_net()

    net = input_net()
    grad_net = GradNet(net)
    pynative_forward_res = net(x, y)
    pynative_forward_res = forward_net(x, y)
    pynative_backward_res = grad_net(x, y)

    assert graph_forward_res == pynative_forward_res
--- a/tests/st/control/inner/test_131_if_after_for_in_while.py
+++ b/tests/st/control/inner/test_131_if_after_for_in_while.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -21,6 +22,7 @@ from mindspore.common.parameter import Parameter
 grad_all = C.GradOperation(get_all=True)
 context.set_context(device_target="Ascend")

@pytest.mark.skip(reason="not supported for in while")
 def test_if_after_for_in_while():
    class IfAfterForInWhileNet(nn.Cell):
        def __init__(self):
--- a/tests/st/control/inner/test_231_while_for_while.py
+++ b/tests/st/control/inner/test_231_while_for_while.py
@@ -14,6 +14,7 @@
 # ============================================================================

 import numpy as np
 import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -54,7 +55,7 @@ class BackwardNet(nn.Cell):
        grads = self.grad(self.forward_net)(*inputs)
        return grads


@pytest.mark.skip(reason="not supported for in while")
 def test_forward():
    x = Tensor(np.array(1), mstype.int32)
    y = Tensor(np.array(3), mstype.int32)
@@ -62,7 +63,7 @@ def test_forward():
    out = forward_net(x, y)
    print("forward out:", out)


@pytest.mark.skip(reason="not supported for in while")
 def test_backward():
    x = Tensor(np.array(1), mstype.int32)
    y = Tensor(np.array(3), mstype.int32)
--- a/tests/st/control/inner/test_331_for_after_for_in_while.py
+++ b/tests/st/control/inner/test_331_for_after_for_in_while.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
 import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -22,7 +23,7 @@ from mindspore.common import dtype as mstype

 grad_all = C.GradOperation(get_all=True)
 context.set_context(device_target="Ascend")

@pytest.mark.skip(reason="not supported for in while")
 def test_for_after_for_in_while_01():
    class ForAfterForInWhileNet(nn.Cell):
        def __init__(self):
@@ -87,7 +88,7 @@ def test_for_after_for_in_while_01():
    assert graph_forward_res == pynative_forward_res
    assert graph_backward_res == pynative_backward_res


@pytest.mark.skip(reason="not supported for in while")
 def test_for_after_for_in_while_02():
    class ForAfterForInWhileNet(nn.Cell):
        def __init__(self):