Browse Source

fix infer shape and add context

feature/build-system-rewrite
fangzehua 4 years ago
parent
commit
ccdc6e95f0
12 changed files with 21 additions and 17 deletions
  1. +1
    -0
      docs/api/api_python/mindspore.context.rst
  2. +0
    -3
      mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc
  3. +6
    -5
      mindspore/ccsrc/runtime/framework/actor/actor_common.h
  4. +0
    -4
      mindspore/ccsrc/runtime/framework/graph_scheduler.cc
  5. +1
    -0
      mindspore/core/mindrt/src/actor/actormgr.cc
  6. +1
    -0
      mindspore/core/mindrt/src/thread/threadpool.h
  7. +4
    -0
      mindspore/core/ops/strided_slice.cc
  8. +1
    -1
      mindspore/python/mindspore/context.py
  9. +2
    -2
      mindspore/python/mindspore/nn/layer/rnns.py
  10. +1
    -0
      mindspore/python/mindspore/nn/layer/timedistributed.py
  11. +3
    -1
      mindspore/python/mindspore/ops/composite/math_ops.py
  12. +1
    -1
      mindspore/python/mindspore/ops/operations/array_ops.py

+ 1
- 0
docs/api/api_python/mindspore.context.rst View File

@@ -138,6 +138,7 @@ MindSpore context,用于配置当前执行环境,包括执行模式、执行
- **grad_for_scalar** (bool): 表示是否获取标量梯度。默认值:False。当 `grad_for_scalar` 设置为True时,则可以导出函数的标量输入。由于后端目前不支持伸缩操作,所以该接口只支持在前端可推演的简单操作。
- **enable_compile_cache** (bool) - 表示是否加载或者保存前端编译的图。当 `enable_compile_cache` 被设置为True时,在第一次执行的过程中,一个硬件无关的编译缓存会被生成并且导出为一个MINDIR文件。当该网络被再次执行时,如果 `enable_compile_cache` 仍然为True并且网络脚本没有被更改,那么这个编译缓存会被加载。注意目前只支持有限的Python脚本更改的自动检测,这意味着可能有正确性风险。默认值:False。这是一个实验特性,可能会被更改或者删除。
- **compile_cache_path** (str) - 保存前端图编译缓存的路径。默认值:"."。如果目录不存在,系统会自动创建这个目录。缓存会被保存到如下目录: `compile_cache_path/rank_${rank_id}/` 。 `rank_id` 是集群上当前设备的ID。
- **runtime_num_threads** (int) - 运行时线程池的线程数控制。 默认值为系统线程数的0.6倍。

**异常:**



+ 0
- 3
mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc View File

@@ -25,8 +25,6 @@

namespace mindspore {
namespace kernel {
constexpr size_t kDefaultKernelSpinCount = 3000;

void CpuDynamicKernel::UpdateArgs() {
if (!is_input_dynamic_shape_ && is_output_dynamic_shape_ && !have_depends()) {
return;
@@ -176,7 +174,6 @@ ActorThreadPool *GetActorMgrInnerThreadPool() {
thread_pool = actor_manager->GetActorThreadPool();
MS_EXCEPTION_IF_NULL(thread_pool);
}
thread_pool->SetKernelThreadMaxSpinCount(kDefaultKernelSpinCount);
return thread_pool;
}



+ 6
- 5
mindspore/ccsrc/runtime/framework/actor/actor_common.h View File

@@ -154,11 +154,12 @@ class ActorDispatcher {
}

// The first five executions are for warm-up, the next five executions are statistics of multi thread execution time,
// and the next next five executions are statistics of single thread execution time.
static constexpr size_t kMultiThreadExecutionCountBegin{21};
static constexpr size_t kMultiThreadExecutionCountEnd{30};
static constexpr size_t kSingleThreadExecutionCountBegin{31};
static constexpr size_t kSingleThreadExecutionCountEnd{40};
// and the next next five executions are statistics of single thread execution time. The first 30 step which do search
// if there are cpu kernels.
static constexpr size_t kMultiThreadExecutionCountBegin{31};
static constexpr size_t kMultiThreadExecutionCountEnd{40};
static constexpr size_t kSingleThreadExecutionCountBegin{41};
static constexpr size_t kSingleThreadExecutionCountEnd{50};
// The single thread execution constraint.
static constexpr size_t kSingleThreadExecutionActorMaxNum{100};



+ 0
- 4
mindspore/ccsrc/runtime/framework/graph_scheduler.cc View File

@@ -386,9 +386,6 @@ void GraphScheduler::Run(ActorSet *const actor_set, const std::vector<DeviceCont
MS_EXCEPTION_IF_NULL(ActorMgr::GetActorMgrRef());
auto thread_pool = ActorMgr::GetActorMgrRef()->GetActorThreadPool();
MS_EXCEPTION_IF_NULL(thread_pool);
if (actor_set->is_multi_thread_execution_) {
thread_pool->SetSpinCountMaxValue();
}
ActorDispatcher::is_multi_thread_execution(actor_set->is_multi_thread_execution_);
double start_time = GetTime();
ActorDispatcher::Send(actor_set->data_prepare_actor_->GetAID(), &DataPrepareActor::PrepareData, input_tensors,
@@ -398,7 +395,6 @@ void GraphScheduler::Run(ActorSet *const actor_set, const std::vector<DeviceCont
auto result_future = result[0].GetFuture();
result_future.Wait();
MsException::Instance().CheckException();
thread_pool->SetSpinCountMinValue();
if (!result_future.IsOK()) {
#ifdef ENABLE_DUMP_IR
mindspore::RDR::TriggerAll();


+ 1
- 0
mindspore/core/mindrt/src/actor/actormgr.cc View File

@@ -76,6 +76,7 @@ int ActorMgr::Initialize(bool use_inner_pool, size_t actor_thread_num, size_t ma
if (inner_pool_ != nullptr) {
inner_pool_->SetMaxSpinCount(kDefaultSpinCount);
inner_pool_->SetSpinCountMaxValue();
inner_pool_->SetKernelThreadMaxSpinCount(kDefaultKernelSpinCount);
}
}
return MINDRT_OK;


+ 1
- 0
mindspore/core/mindrt/src/thread/threadpool.h View File

@@ -36,6 +36,7 @@
namespace mindspore {
constexpr int kDefaultSpinCount = 300000;
constexpr int kMaxCount = 30000;
constexpr int kDefaultKernelSpinCount = 3000;
constexpr int kMinSpinCount = 1;
constexpr int kDefaultFrequency = 1;
constexpr float kMaxScale = 1.;


+ 4
- 0
mindspore/core/ops/strided_slice.cc View File

@@ -354,6 +354,10 @@ abstract::ShapePtr StridedSliceInferShape(const PrimitivePtr &primitive,
}
if (!slice_dynamic) {
ret_in_shape = ComputeInferShape(primitive, begin_v, end_v, strides_v, x_shape->shape());
bool has_zero_shape = std::any_of(ret_in_shape.begin(), ret_in_shape.end(), [](int64_t i) { return i == 0; });
if (has_zero_shape) {
MS_LOG(EXCEPTION) << "StridedSlice haven't support zero shape yet, now the out shape is " << ret_in_shape;
}
return std::make_shared<abstract::Shape>(ret_in_shape);
}
ret_in_shape = DynamicComputeInferShape(primitive, begin_v, end_v, strides_v, x_shape->shape(), begin_len);


+ 1
- 1
mindspore/python/mindspore/context.py View File

@@ -316,7 +316,7 @@ class _Context:
def set_runtime_num_threads(self, runtime_num_threads):
"""Check and set runtime_num_threads."""
if runtime_num_threads <= 0:
raise ValueError("The num of cpu thread must bigger than 0.")
raise ValueError("The num of thread must bigger than 0.")
self.set_param(ms_ctx_param.runtime_num_threads, runtime_num_threads)

setters = {


+ 2
- 2
mindspore/python/mindspore/nn/layer/rnns.py View File

@@ -349,8 +349,8 @@ class _DynamicLSTMAscend(Cell):
outputs, h, c, _, _, _, _, _ = self.lstm(self.cast(x, self.dtype), \
self.cast(self.transpose(weight, (1, 0)), self.dtype), \
self.cast(bias, self.dtype), None, \
self.cast(h_0[0].view(1, *h_0[0].shape), self.dtype), \
self.cast(h_0[1].view(1, *h_0[1].shape), self.dtype))
self.cast(P.ExpandDims()(h_0[0], 0), self.dtype), \
self.cast(P.ExpandDims()(h_0[1], 0), self.dtype))
if seq_length is not None:
h = get_hidden(h, seq_length)
c = get_hidden(c, seq_length)


+ 1
- 0
mindspore/python/mindspore/nn/layer/timedistributed.py View File

@@ -137,6 +137,7 @@ class TimeDistributed(Cell):
outputs_shape_new = outputs.shape[:reshape_pos] + inputs_shape_new[reshape_pos: reshape_pos + 2]
if reshape_pos + 1 < len(outputs.shape):
outputs_shape_new += outputs.shape[reshape_pos + 1:]
outputs_shape_new = (-1,) + outputs_shape_new[1:]
return self.reshape(outputs, outputs_shape_new)

unstack = Unstack(time_axis)


+ 3
- 1
mindspore/python/mindspore/ops/composite/math_ops.py View File

@@ -436,7 +436,9 @@ def dot(x1, x2):
x1_reshape = reshape_op(x1, (-1, x1_shape[-1]))
x2_reshape = reshape_op(x2_transpose, (x2_shape[-2], -1))
mul_result = matmul_op(x1_reshape, x2_reshape)
return reshape_op(mul_result, x1_shape[:-1] + x2_shape[:-2] + x2_shape[-1:])
reshape_shape = x1_shape[:-1] + x2_shape[:-2] + x2_shape[-1:]
reshape_shape = (-1,) + reshape_shape[1:]
return reshape_op(mul_result, reshape_shape)
return matmul_op(x1, x2)




+ 1
- 1
mindspore/python/mindspore/ops/operations/array_ops.py View File

@@ -509,7 +509,7 @@ class Reshape(PrimitiveWithInfer):
if len(min_shape) != shape_rank or len(max_shape) != shape_rank:
raise RuntimeError("The primitive[Reshape]'s input[shape] min or max value not math the shape rank.")
for i in range(shape_rank):
if min_shape[i] == max_shape[i]:
if min_shape[i] == max_shape[i] and min_shape[i] != 1:
out_shape[i] = min_shape[i]
elif is_shape_unknown(x_shp) and "max_shape" in x:
# when dynamic memory allocation is supported, max_shape can be left out


Loading…
Cancel
Save