pynative-support-dynamic-shape

5 years ago · fd5be43598
--- a/mindspore/ccsrc/backend/session/ascend_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_session.cc
@@ -390,6 +390,8 @@ void AscendSession::RunOpImpl(const OpRunInfo &op_run_info, const GraphInfo &gra
  MS_LOG(INFO) << "Run op " << op_run_info.op_name << " start!";
  // malloc mem
  RunOpMemoryAlloc(op_run_info.value, input_tensors, graph.get());
  // Build dynamic kernel
  BuildDynamicKernel(graph);
  // load input data to device
  LoadInputData(graph, input_tensors);
  // run op
@@ -510,6 +512,17 @@ void AscendSession::BuildKernel(const std::shared_ptr<KernelGraph> &kernel_graph
  MS_LOG(INFO) << "Finish!";
 }

 void AscendSession::BuildDynamicKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const {
  MS_LOG(INFO) << "Start!";
  MS_EXCEPTION_IF_NULL(kernel_graph);
  auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id_);
  MS_EXCEPTION_IF_NULL(runtime_instance);
  if (!runtime_instance->GenDynamicKernel(kernel_graph.get())) {
    MS_LOG(DEBUG) << "Graph:" << kernel_graph->graph_id() << " failed to generate dynamic kernel!";
  }
  MS_LOG(INFO) << "Finish!";
 }

 void AscendSession::MemoryAlloc(KernelGraph *kernel_graph) const {
  MS_LOG(INFO) << "Start!";
  MS_EXCEPTION_IF_NULL(kernel_graph);
--- a/mindspore/ccsrc/backend/session/ascend_session.h
+++ b/mindspore/ccsrc/backend/session/ascend_session.h
@@ -90,6 +90,7 @@ class AscendSession : public SessionBasic {
  void RunOpAdjustKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const;
  void AssignStream(NotNull<KernelGraphPtr> kernel_graph) const;
  void BuildKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const;
  void BuildDynamicKernel(const std::shared_ptr<KernelGraph> &kernel_graph) const;
  void MemoryAlloc(KernelGraph *kernel_graph) const;
  void RunOpMemoryAlloc(const ValuePtr &pre_output_value, const std::vector<tensor::TensorPtr> &input_tensors,
                        KernelGraph *kernel_graph) const;
--- a/mindspore/ccsrc/backend/session/session_basic.cc
+++ b/mindspore/ccsrc/backend/session/session_basic.cc
@@ -1315,6 +1315,8 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructSingleOpGraph(const OpRunInf
                                                                  const std::vector<tensor::TensorPtr> &input_tensors,
                                                                  const std::vector<int> &tensors_mask) {
  auto graph = std::make_shared<KernelGraph>();
  graph->set_graph_id(run_op_graph_id_);
  run_op_graph_id_++;
  std::vector<AnfNodePtr> inputs;
  // set input[0]
  PrimitivePtr op_prim = op_run_info.primitive;
@@ -1343,9 +1345,12 @@ std::shared_ptr<KernelGraph> SessionBasic::ConstructSingleOpGraph(const OpRunInf
  MS_EXCEPTION_IF_NULL(cnode);
  // set abstract,which include inferred shapes and types
  cnode->set_abstract(op_run_info.abstract);
  // get output dynamic shape info
  AnfAlgo::SetNodeAttr(kAttrOutputIsDynamicShape, MakeValue(op_run_info.is_dynamic_shape), cnode);
  // set execution order
  std::vector<CNodePtr> exe_order = {cnode};
  graph->set_execution_order(exe_order);
  graph->UpdateGraphDynamicAttr();
  // set output
  CreateOutputNode(cnode, graph);
  graph->SetInputNodes();
--- a/mindspore/ccsrc/backend/session/session_basic.h
+++ b/mindspore/ccsrc/backend/session/session_basic.h
@@ -50,12 +50,13 @@ struct OpRunInfo {
  PrimitivePtr primitive;
  AbstractBasePtr abstract;
  ValuePtr value = nullptr;
  bool is_dynamic_shape = false;
 };
 using OpRunInfoPtr = std::shared_ptr<OpRunInfo>;
 class Executor;
 class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
 public:
  SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0) {
  SessionBasic() : context_(nullptr), summary_callback_(nullptr), device_id_(0), run_op_graph_id_(0) {
 #if !defined(_WIN32) && !defined(_WIN64)
    debugger_ = nullptr;
 #endif
@@ -182,6 +183,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
  CallBackFunc summary_callback_;
  static GraphId graph_sum_;
  uint32_t device_id_;
  uint32_t run_op_graph_id_;
  std::shared_ptr<Executor> executor_;
 #if !defined(_WIN32) && !defined(_WIN64)
  std::shared_ptr<Debugger> debugger_;
--- a/mindspore/ccsrc/pipeline/pynative/base.h
+++ b/mindspore/ccsrc/pipeline/pynative/base.h
@@ -53,8 +53,9 @@ struct OpExecInfo {
  std::string prim_id;
  PrimitivePyPtr py_primitive;
  AbstractBasePtr abstract;
  ValuePtr value = nullptr;
  bool is_dynamic_shape = false;

  ValuePtr value = nullptr;
  py::list op_inputs;
  py::dict op_attrs;
  std::vector<bool> inputs_mask;
--- a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
@@ -758,6 +758,13 @@ py::tuple PynativeExecutor::RunOpInner(const py::args &args) {
    cnode->set_abstract(op_exec_info->abstract);
  }

  // get output dynamic shape info
  MS_EXCEPTION_IF_NULL(op_exec_info->abstract);
  auto abstract_info = op_exec_info->abstract->ToString();
  if (abstract_info.find("-1") != string::npos) {
    op_exec_info->is_dynamic_shape = true;
  }

  op_exec_info->inputs_mask = op_masks;
  MS_EXCEPTION_IF_NULL(op_exec_info);
  if (op_exec_info->abstract != nullptr) {
@@ -1301,7 +1308,7 @@ py::object PynativeExecutor::RunOpInMs(const OpExecInfoPtr &op_exec_info, Pynati
  // get graph info for checking it whether existing in the cache
  std::string graph_info = GetSingleOpGraphInfo(op_exec_info, input_tensors);
  session::OpRunInfo op_run_info = {op_exec_info->op_name, op_exec_info->py_primitive, op_exec_info->abstract,
                                    op_exec_info->value};
                                    op_exec_info->value, op_exec_info->is_dynamic_shape};
  session->BuildOp(&op_run_info, graph_info, input_tensors, tensors_mask);
  EraseValueNodeTensor(tensors_mask, &input_tensors);
  VectorRef outputs;
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
@@ -358,16 +358,13 @@ bool AscendKernelRuntime::GenDynamicKernel(const session::KernelGraph *graph) {
    MS_EXCEPTION_IF_NULL(cnode);
    MS_LOG(INFO) << "Generate node:" << cnode->fullname_with_scope() << " dynamic kernel";
    auto kernel_mod = AnfAlgo::GetKernelMod(cnode);
    MS_EXCEPTION_IF_NULL(kernel_mod);
    auto dynamic_kernel = kernel_mod->GenDynamicKernel(cnode, stream_);
    MS_EXCEPTION_IF_NULL(dynamic_kernel);
    dynamic_kernel->Initialize();
    dynamic_kernels.emplace_back(dynamic_kernel);
  }
  auto ret = graph_dynamic_kernel_map_.try_emplace(graph->graph_id(), dynamic_kernels);
  if (!ret.second) {
    MS_LOG(ERROR) << "Graph:" << graph->graph_id() << " already generator executor";
    return false;
  }
  graph_dynamic_kernel_map_[graph->graph_id()] = dynamic_kernels;
  MS_LOG(INFO) << "GenDynamicKernel end";
  return true;
 }
--- a/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/executor/dynamic_kernel.cc
@@ -28,7 +28,7 @@ void DynamicKernel::Initialize() {
  MS_LOG(INFO) << "Init Start";
  is_dynamic_shape_ = AnfAlgo::IsDynamicShape(cnode_ptr_);
  if (!is_dynamic_shape_) {
    MS_LOG(INFO) << "cnode is not dynamic shape:" << cnode_ptr_->fullname_with_scope();
    MS_LOG(DEBUG) << "cnode is not dynamic shape:" << cnode_ptr_->fullname_with_scope();
    return;
  }

@@ -37,7 +37,7 @@ void DynamicKernel::Initialize() {

  auto have_depends = AnfAlgo::HasNodeAttr(kDynamicShapeDepends, cnode_ptr_);
  if (!have_depends) {
    MS_LOG(WARNING) << "No dynamic_shape_depends found";
    MS_LOG(DEBUG) << "No dynamic_shape_depends found";
    return;
  }
  MS_LOG(INFO) << "Have depends";
--- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
@@ -799,18 +799,38 @@ void KernelRuntime::GenAddrCleanLaunchArgs(const CNodePtr &cnode, AddressPtrList

 bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) {
  auto &kernels = graph.execution_order();
  for (const auto &kernel : kernels) {
    auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
    MS_EXCEPTION_IF_NULL(kernel_mod);

    AddressPtrList kernel_inputs;
    AddressPtrList kernel_workspaces;
    AddressPtrList kernel_outputs;
    GenLaunchArgs(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs);
    auto ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
    if (!ret) {
      MS_LOG(ERROR) << "Launch kernel failed.";
      return false;
  std::vector<DynamicKernelPtr> dynamic_kernel_list;
  auto iter = graph_dynamic_kernel_map_.find(graph.graph_id());
  if (iter != graph_dynamic_kernel_map_.end()) {
    dynamic_kernel_list = iter->second;
  }
  if (!dynamic_kernel_list.empty() && dynamic_kernel_list.size() != kernels.size()) {
    MS_LOG(EXCEPTION) << "The size of dynamic kernels " << dynamic_kernel_list.size()
                      << " should be equal to the size of kernels " << kernels.size();
  }
  for (size_t i = 0; i < kernels.size(); ++i) {
    if (!dynamic_kernel_list.empty() && dynamic_kernel_list[i] != nullptr &&
        dynamic_kernel_list[i]->is_dynamic_shape()) {
      dynamic_kernel_list[i]->InferShape();
      dynamic_kernel_list[i]->UpdateArgs();
      dynamic_kernel_list[i]->Execute();
      if (!SyncStream()) {
        MS_LOG(ERROR) << "SyncStream failed";
        return false;
      }
      dynamic_kernel_list[i]->PostExecute();
    } else {
      auto kernel_mod = AnfAlgo::GetKernelMod(kernels[i]);
      MS_EXCEPTION_IF_NULL(kernel_mod);
      AddressPtrList kernel_inputs;
      AddressPtrList kernel_workspaces;
      AddressPtrList kernel_outputs;
      GenLaunchArgs(*kernel_mod, kernels[i], &kernel_inputs, &kernel_workspaces, &kernel_outputs);
      auto ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, stream_);
      if (!ret) {
        MS_LOG(ERROR) << "Launch kernel failed.";
        return false;
      }
    }
  }
  return true;
--- a/tests/st/pynative/dynamic_shape/test_pynative_ftrl.py
+++ b/tests/st/pynative/dynamic_shape/test_pynative_ftrl.py
@@ -0,0 +1,74 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 import numpy as np
 import pytest
 import mindspore.nn as nn
 from mindspore import Tensor, Parameter, context
 from mindspore.nn import TrainOneStepCell
 from mindspore.nn.optim import FTRL, LazyAdam
 from mindspore.ops import operations as P

 context.set_context(enable_sparse=True,
                    mode=context.PYNATIVE_MODE,
                    device_target="Ascend")

 class NetWithSparseGatherV2(nn.Cell):
    def __init__(self):
        super(NetWithSparseGatherV2, self).__init__()
        self.weight1 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight1")
        self.weight2 = Parameter(Tensor(np.ones([3, 1, 2]).astype(np.float32)), name="weight2")
        self.axis = 1
        self.gather = P.SparseGatherV2()

    def construct(self, indices, label):
        return self.gather(self.weight1, indices, self.axis) + self.weight2

@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
 def test_pynative_ftrl_net():
    indices = Tensor(np.array([0, 0, 1]).astype(np.int32))
    label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
    net = NetWithSparseGatherV2()

    optimizer = FTRL(net.trainable_params(), learning_rate=0.1, weight_decay=0.9, loss_scale=2.0)
    optimizer.target = 'Ascend'
    train_network = TrainOneStepCell(net, optimizer)
    output = train_network(indices, label)
    np.allclose(output.asnumpy(), np.array([[[2, 2]], [[2, 2]], [[2, 2]]]))
    np.allclose(net.weight1.asnumpy(), np.array([[[0.7884067, 0.7884067]],
                                                 [[0.68213105, 0.68213105]],
                                                 [[1.0, 1.0]]]))
    np.allclose(net.weight2.asnumpy(), np.array([[[0.6821311, 0.6821311]],
                                                 [[0.6821311, 0.6821311]],
                                                 [[0.6821311, 0.6821311]]]))

@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
 def test_pynative_lazy_adam_net():
    indices = Tensor(np.array([0, 0, 1]).astype(np.int32))
    label = Tensor(np.zeros([2, 1, 2]).astype(np.float32))
    net = NetWithSparseGatherV2()

    optimizer = LazyAdam(net.trainable_params(), learning_rate=0.1, weight_decay=0.9, loss_scale=2.0)
    optimizer.target = 'Ascend'
    train_network = TrainOneStepCell(net, optimizer)
    output = train_network(indices, label)
    np.allclose(output.asnumpy(), np.array([[[2, 2]], [[2, 2]], [[2, 2]]]))
    np.allclose(net.weight1.asnumpy(), np.array([[[0.9, 0.9]], [[0.9, 0.9]], [[1.0, 1.0]]]))
    np.allclose(net.weight2.asnumpy(), np.array([[[0.9, 0.9]], [[0.9, 0.9]], [[0.9, 0.9]]]))
--- a/tests/st/pynative/dynamic_shape/test_pynative_unique.py
+++ b/tests/st/pynative/dynamic_shape/test_pynative_unique.py
@@ -0,0 +1,44 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 import numpy as np
 import pytest
 import mindspore.context as context
 import mindspore.nn as nn
 from mindspore import Tensor
 import mindspore.common.dtype as mstype
 from mindspore.ops import operations as P

 context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")

 class Net(nn.Cell):
    def __init__(self):
        super(Net, self).__init__()
        self.unique = P.Unique()

    def construct(self, x):
        return self.unique(x)

@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_onecard
 def test_pynative_unqiue():
    x = Tensor(np.array([1, 1, 2, 2, 3, 3]), mstype.int32)
    unique = Net()
    output = unique(x)
    expect1 = np.array([1, 2, 3])
    expect2 = np.array([0, 0, 1, 1, 2, 2])
    assert (output[0].asnumpy() == expect1).all()
    assert (output[1].asnumpy() == expect2).all()