fix infer shape and add context

4 years ago · ccdc6e95f0
--- a/docs/api/api_python/mindspore.context.rst
+++ b/docs/api/api_python/mindspore.context.rst
@@ -138,6 +138,7 @@ MindSpore context，用于配置当前执行环境，包括执行模式、执行
    - **grad_for_scalar** (bool)：  表示是否获取标量梯度。默认值：False。当 `grad_for_scalar` 设置为True时，则可以导出函数的标量输入。由于后端目前不支持伸缩操作，所以该接口只支持在前端可推演的简单操作。
    - **enable_compile_cache** (bool) - 表示是否加载或者保存前端编译的图。当 `enable_compile_cache` 被设置为True时，在第一次执行的过程中，一个硬件无关的编译缓存会被生成并且导出为一个MINDIR文件。当该网络被再次执行时，如果 `enable_compile_cache` 仍然为True并且网络脚本没有被更改，那么这个编译缓存会被加载。注意目前只支持有限的Python脚本更改的自动检测，这意味着可能有正确性风险。默认值：False。这是一个实验特性，可能会被更改或者删除。
    - **compile_cache_path** (str) - 保存前端图编译缓存的路径。默认值："."。如果目录不存在，系统会自动创建这个目录。缓存会被保存到如下目录： `compile_cache_path/rank_${rank_id}/` 。 `rank_id` 是集群上当前设备的ID。
    - **runtime_num_threads** (int) - 运行时线程池的线程数控制。 默认值为系统线程数的0.6倍。

    **异常：**

--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
@@ -25,8 +25,6 @@

 namespace mindspore {
 namespace kernel {
 constexpr size_t kDefaultKernelSpinCount = 3000;

 void CpuDynamicKernel::UpdateArgs() {
  if (!is_input_dynamic_shape_ && is_output_dynamic_shape_ && !have_depends()) {
    return;
@@ -176,7 +174,6 @@ ActorThreadPool *GetActorMgrInnerThreadPool() {
    thread_pool = actor_manager->GetActorThreadPool();
    MS_EXCEPTION_IF_NULL(thread_pool);
  }
  thread_pool->SetKernelThreadMaxSpinCount(kDefaultKernelSpinCount);
  return thread_pool;
 }

--- a/mindspore/ccsrc/runtime/framework/actor/actor_common.h
+++ b/mindspore/ccsrc/runtime/framework/actor/actor_common.h
@@ -154,11 +154,12 @@ class ActorDispatcher {
  }

  // The first five executions are for warm-up, the next five executions are statistics of multi thread execution time,
  // and the next next five executions are statistics of single thread execution time.
  static constexpr size_t kMultiThreadExecutionCountBegin{21};
  static constexpr size_t kMultiThreadExecutionCountEnd{30};
  static constexpr size_t kSingleThreadExecutionCountBegin{31};
  static constexpr size_t kSingleThreadExecutionCountEnd{40};
  // and the next next five executions are statistics of single thread execution time. The first 30 step which do search
  // if there are cpu kernels.
  static constexpr size_t kMultiThreadExecutionCountBegin{31};
  static constexpr size_t kMultiThreadExecutionCountEnd{40};
  static constexpr size_t kSingleThreadExecutionCountBegin{41};
  static constexpr size_t kSingleThreadExecutionCountEnd{50};
  // The single thread execution constraint.
  static constexpr size_t kSingleThreadExecutionActorMaxNum{100};

--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
@@ -386,9 +386,6 @@ void GraphScheduler::Run(ActorSet *const actor_set, const std::vector<DeviceCont
  MS_EXCEPTION_IF_NULL(ActorMgr::GetActorMgrRef());
  auto thread_pool = ActorMgr::GetActorMgrRef()->GetActorThreadPool();
  MS_EXCEPTION_IF_NULL(thread_pool);
  if (actor_set->is_multi_thread_execution_) {
    thread_pool->SetSpinCountMaxValue();
  }
  ActorDispatcher::is_multi_thread_execution(actor_set->is_multi_thread_execution_);
  double start_time = GetTime();
  ActorDispatcher::Send(actor_set->data_prepare_actor_->GetAID(), &DataPrepareActor::PrepareData, input_tensors,
@@ -398,7 +395,6 @@ void GraphScheduler::Run(ActorSet *const actor_set, const std::vector<DeviceCont
  auto result_future = result[0].GetFuture();
  result_future.Wait();
  MsException::Instance().CheckException();
  thread_pool->SetSpinCountMinValue();
  if (!result_future.IsOK()) {
 #ifdef ENABLE_DUMP_IR
    mindspore::RDR::TriggerAll();
--- a/mindspore/core/mindrt/src/actor/actormgr.cc
+++ b/mindspore/core/mindrt/src/actor/actormgr.cc
@@ -76,6 +76,7 @@ int ActorMgr::Initialize(bool use_inner_pool, size_t actor_thread_num, size_t ma
    if (inner_pool_ != nullptr) {
      inner_pool_->SetMaxSpinCount(kDefaultSpinCount);
      inner_pool_->SetSpinCountMaxValue();
      inner_pool_->SetKernelThreadMaxSpinCount(kDefaultKernelSpinCount);
    }
  }
  return MINDRT_OK;
--- a/mindspore/core/mindrt/src/thread/threadpool.h
+++ b/mindspore/core/mindrt/src/thread/threadpool.h
@@ -36,6 +36,7 @@
 namespace mindspore {
 constexpr int kDefaultSpinCount = 300000;
 constexpr int kMaxCount = 30000;
 constexpr int kDefaultKernelSpinCount = 3000;
 constexpr int kMinSpinCount = 1;
 constexpr int kDefaultFrequency = 1;
 constexpr float kMaxScale = 1.;
--- a/mindspore/core/ops/strided_slice.cc
+++ b/mindspore/core/ops/strided_slice.cc
@@ -354,6 +354,10 @@ abstract::ShapePtr StridedSliceInferShape(const PrimitivePtr &primitive,
  }
  if (!slice_dynamic) {
    ret_in_shape = ComputeInferShape(primitive, begin_v, end_v, strides_v, x_shape->shape());
    bool has_zero_shape = std::any_of(ret_in_shape.begin(), ret_in_shape.end(), [](int64_t i) { return i == 0; });
    if (has_zero_shape) {
      MS_LOG(EXCEPTION) << "StridedSlice haven't support zero shape yet, now the out shape is " << ret_in_shape;
    }
    return std::make_shared<abstract::Shape>(ret_in_shape);
  }
  ret_in_shape = DynamicComputeInferShape(primitive, begin_v, end_v, strides_v, x_shape->shape(), begin_len);
--- a/mindspore/python/mindspore/context.py
+++ b/mindspore/python/mindspore/context.py
@@ -316,7 +316,7 @@ class _Context:
    def set_runtime_num_threads(self, runtime_num_threads):
        """Check and set runtime_num_threads."""
        if runtime_num_threads <= 0:
            raise ValueError("The num of cpu thread must bigger than 0.")
            raise ValueError("The num of thread must bigger than 0.")
        self.set_param(ms_ctx_param.runtime_num_threads, runtime_num_threads)

    setters = {
--- a/mindspore/python/mindspore/nn/layer/rnns.py
+++ b/mindspore/python/mindspore/nn/layer/rnns.py
@@ -349,8 +349,8 @@ class _DynamicLSTMAscend(Cell):
        outputs, h, c, _, _, _, _, _ = self.lstm(self.cast(x, self.dtype), \
                                                 self.cast(self.transpose(weight, (1, 0)), self.dtype), \
                                                 self.cast(bias, self.dtype), None, \
                                                 self.cast(h_0[0].view(1, *h_0[0].shape), self.dtype), \
                                                 self.cast(h_0[1].view(1, *h_0[1].shape), self.dtype))
                                                 self.cast(P.ExpandDims()(h_0[0], 0), self.dtype), \
                                                 self.cast(P.ExpandDims()(h_0[1], 0), self.dtype))
        if seq_length is not None:
            h = get_hidden(h, seq_length)
            c = get_hidden(c, seq_length)
--- a/mindspore/python/mindspore/nn/layer/timedistributed.py
+++ b/mindspore/python/mindspore/nn/layer/timedistributed.py
@@ -137,6 +137,7 @@ class TimeDistributed(Cell):
            outputs_shape_new = outputs.shape[:reshape_pos] + inputs_shape_new[reshape_pos: reshape_pos + 2]
            if reshape_pos + 1 < len(outputs.shape):
                outputs_shape_new += outputs.shape[reshape_pos + 1:]
            outputs_shape_new = (-1,) + outputs_shape_new[1:]
            return self.reshape(outputs, outputs_shape_new)

        unstack = Unstack(time_axis)
--- a/mindspore/python/mindspore/ops/composite/math_ops.py
+++ b/mindspore/python/mindspore/ops/composite/math_ops.py
@@ -436,7 +436,9 @@ def dot(x1, x2):
        x1_reshape = reshape_op(x1, (-1, x1_shape[-1]))
        x2_reshape = reshape_op(x2_transpose, (x2_shape[-2], -1))
        mul_result = matmul_op(x1_reshape, x2_reshape)
        return reshape_op(mul_result, x1_shape[:-1] + x2_shape[:-2] + x2_shape[-1:])
        reshape_shape = x1_shape[:-1] + x2_shape[:-2] + x2_shape[-1:]
        reshape_shape = (-1,) + reshape_shape[1:]
        return reshape_op(mul_result, reshape_shape)
    return matmul_op(x1, x2)


--- a/mindspore/python/mindspore/ops/operations/array_ops.py
+++ b/mindspore/python/mindspore/ops/operations/array_ops.py
@@ -509,7 +509,7 @@ class Reshape(PrimitiveWithInfer):
            if len(min_shape) != shape_rank or len(max_shape) != shape_rank:
                raise RuntimeError("The primitive[Reshape]'s input[shape] min or max value not math the shape rank.")
            for i in range(shape_rank):
                if min_shape[i] == max_shape[i]:
                if min_shape[i] == max_shape[i] and min_shape[i] != 1:
                    out_shape[i] = min_shape[i]
        elif is_shape_unknown(x_shp) and "max_shape" in x:
            # when dynamic memory allocation is supported, max_shape can be left out