Browse Source

support infershape when running graph

tags/v0.7.0-beta
chenjianping 5 years ago
parent
commit
f2d97520ab
100 changed files with 631 additions and 353 deletions
  1. +6
    -0
      mindspore/lite/include/context.h
  2. +4
    -1
      mindspore/lite/include/errorcode.h
  3. +0
    -5
      mindspore/lite/src/executor.cc
  4. +1
    -1
      mindspore/lite/src/kernel_factory.cc
  5. +0
    -1
      mindspore/lite/src/kernel_registry.h
  6. +32
    -6
      mindspore/lite/src/lite_kernel.h
  7. +1
    -0
      mindspore/lite/src/lite_session.cc
  8. +1
    -1
      mindspore/lite/src/ops/cast.cc
  9. +3
    -0
      mindspore/lite/src/ops/ops.h
  10. +10
    -5
      mindspore/lite/src/ops/reshape.cc
  11. +13
    -8
      mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
  12. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
  13. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.cc
  14. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.h
  15. +15
    -13
      mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc
  16. +4
    -3
      mindspore/lite/src/runtime/kernel/arm/base/concat_base.h
  17. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
  18. +12
    -12
      mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc
  19. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/base/crop_base.h
  20. +9
    -5
      mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc
  21. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.h
  22. +6
    -4
      mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc
  23. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h
  24. +4
    -3
      mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
  25. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
  26. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/base/pad.cc
  27. +8
    -4
      mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc
  28. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/pooling_base.h
  29. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/base/prelu_base.cc
  30. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/prelu_base.h
  31. +7
    -2
      mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc
  32. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/prior_box.h
  33. +9
    -4
      mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
  34. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.h
  35. +6
    -6
      mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
  36. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
  37. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
  38. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h
  39. +6
    -6
      mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
  40. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/split_base.h
  41. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc
  42. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
  43. +9
    -3
      mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
  44. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h
  45. +11
    -3
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc
  46. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.h
  47. +17
    -6
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
  48. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
  49. +15
    -7
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
  50. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
  51. +17
    -5
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
  52. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
  53. +8
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc
  54. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/activation.h
  55. +10
    -10
      mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.cc
  56. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.h
  57. +11
    -8
      mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc
  58. +4
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/addn.h
  59. +6
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.cc
  60. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.h
  61. +11
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
  62. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
  63. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.cc
  64. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.h
  65. +13
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
  66. +4
    -5
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h
  67. +5
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.cc
  68. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.h
  69. +7
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
  70. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
  71. +12
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
  72. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32/bias.h
  73. +9
    -6
      mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.cc
  74. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.h
  75. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.cc
  76. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.h
  77. +11
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc
  78. +5
    -5
      mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.h
  79. +14
    -8
      mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
  80. +6
    -10
      mindspore/lite/src/runtime/kernel/arm/fp32/cast.h
  81. +45
    -35
      mindspore/lite/src/runtime/kernel/arm/fp32/concat.cc
  82. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/concat.h
  83. +17
    -6
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
  84. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution.h
  85. +9
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
  86. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h
  87. +9
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc
  88. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.h
  89. +15
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
  90. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
  91. +10
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.cc
  92. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h
  93. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.cc
  94. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.h
  95. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.cc
  96. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.h
  97. +9
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc
  98. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h
  99. +7
    -10
      mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc
  100. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/crop.h

+ 6
- 0
mindspore/lite/include/context.h View File

@@ -64,11 +64,17 @@ class MS_API Context {
/// \brief Destructor of MindSpore Lite Context. /// \brief Destructor of MindSpore Lite Context.
virtual ~Context(); virtual ~Context();


void InferShapeInterrupt() {
infer_shape_interrupt_ = true;
}

public: public:
DeviceContext device_ctx_{DT_CPU}; DeviceContext device_ctx_{DT_CPU};
int thread_num_ = 2; /**< thread number config for thread pool */ int thread_num_ = 2; /**< thread number config for thread pool */
std::shared_ptr<Allocator> allocator = nullptr; std::shared_ptr<Allocator> allocator = nullptr;
CpuBindMode cpu_bind_mode_ = MID_CPU; CpuBindMode cpu_bind_mode_ = MID_CPU;
bool infer_shape_interrupt_ = false;
bool running_ = false;
}; };
} // namespace mindspore::lite } // namespace mindspore::lite
#endif // MINDSPORE_LITE_INCLUDE_CONTEXT_H_ #endif // MINDSPORE_LITE_INCLUDE_CONTEXT_H_

+ 4
- 1
mindspore/lite/include/errorcode.h View File

@@ -48,8 +48,11 @@ constexpr int RET_OP_EXECUTE_FAILURE = -304; /**< Failed to execution operator.


/* Tensor error code, range: [-401,-500] */ /* Tensor error code, range: [-401,-500] */
constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */ constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */

/* InferShape error code, range: [-501,-600] */
constexpr int RET_INFER_ERR = -501; /**< Failed to infer shape. */
constexpr int RET_INFER_INVALID = -502; /**< Invalid to infer shape before runtime. */
} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore


#endif // MINDSPORE_LITE_INCLUDE_ERRORCODE_H_ #endif // MINDSPORE_LITE_INCLUDE_ERRORCODE_H_


+ 0
- 5
mindspore/lite/src/executor.cc View File

@@ -37,11 +37,6 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten
kernel::LiteKernelUtil::InitTensorRefCount(kernels); kernel::LiteKernelUtil::InitTensorRefCount(kernels);
for (auto *kernel : kernels) { for (auto *kernel : kernels) {
MS_ASSERT(nullptr != kernel); MS_ASSERT(nullptr != kernel);
auto &outputs = kernel->GetOutputs();
for (auto *output : outputs) {
MS_ASSERT(nullptr != output);
output->MallocData();
}
session::CallBackParam callbackParam; session::CallBackParam callbackParam;
callbackParam.name_callback_param = kernel->Name(); callbackParam.name_callback_param = kernel->Name();
callbackParam.type_callback_param = kernel->type_str(); callbackParam.type_callback_param = kernel->type_str();


+ 1
- 1
mindspore/lite/src/kernel_factory.cc View File

@@ -45,7 +45,7 @@ LiteKernel *KernelFactory::GetKernel(const std::vector<tensor::Tensor *> &inputs
} }
auto creator = KernelRegistry::GetInstance()->GetCreator(key); auto creator = KernelRegistry::GetInstance()->GetCreator(key);
if (creator != nullptr) { if (creator != nullptr) {
auto kernel = creator(inputs, outputs, parameter, ctx, key);
auto kernel = creator(inputs, outputs, parameter, ctx, key, primitive);
return kernel; return kernel;
} }
return nullptr; return nullptr;


+ 0
- 1
mindspore/lite/src/kernel_registry.h View File

@@ -45,7 +45,6 @@ class KernelRegistry {
int device_type_length_; int device_type_length_;
int data_type_length_; int data_type_length_;
int op_type_length_; int op_type_length_;
std::mutex lock_;
}; };


class KernelRegistrar { class KernelRegistrar {


+ 32
- 6
mindspore/lite/src/lite_kernel.h View File

@@ -25,6 +25,7 @@
#include "include/context.h" #include "include/context.h"
#include "src/ir/tensor.h" #include "src/ir/tensor.h"
#include "src/ops/ops.h" #include "src/ops/ops.h"
#include "include/errorcode.h"


#ifdef ENABLE_FP16 #ifdef ENABLE_FP16
using FLOAT_t = float16_t; using FLOAT_t = float16_t;
@@ -34,6 +35,8 @@ using FLOAT_t = float;


// using mindspore::kernel::AddressPtr; // using mindspore::kernel::AddressPtr;
namespace mindspore::kernel { namespace mindspore::kernel {
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
enum KERNEL_ARCH { kCPU, kGPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU }; enum KERNEL_ARCH { kCPU, kGPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU };
struct KernelKey { struct KernelKey {
KERNEL_ARCH arch; KERNEL_ARCH arch;
@@ -55,15 +58,30 @@ class LiteKernel {
public: public:
LiteKernel() = default; LiteKernel() = default;
explicit LiteKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, explicit LiteKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: opParameter(parameter), inputs_(inputs), outputs_(outputs), train_mode(false) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: opParameter(parameter), inputs_(inputs), outputs_(outputs), train_mode(false), primitive_(primitive),
context_(ctx) {
this->in_kernel_.clear(); this->in_kernel_.clear();
this->out_kernel_.clear(); this->out_kernel_.clear();
} }


virtual ~LiteKernel() { delete opParameter; } virtual ~LiteKernel() { delete opParameter; }


virtual int Prepare() { return -1; }
virtual int Prepare() {
if (primitive_ != nullptr && !primitive_->GetInferFlag()) {
(const_cast<lite::Primitive *>(primitive_))->InferShape(inputs_, outputs_);
}
if (need_reinit) {
Init();
}
auto &outputs = this->GetOutputs();
for (auto *output : outputs) {
MS_ASSERT(output != nullptr);
output->MallocData();
}
return RET_OK;
}
virtual int Init() { return -1; } virtual int Init() { return -1; }
virtual int ReSize() { return -1; } virtual int ReSize() { return -1; }
virtual int Run() { return -1; } virtual int Run() { return -1; }
@@ -103,16 +121,23 @@ class LiteKernel {


void set_desc(const KernelKey kernel_key) { desc = kernel_key; } void set_desc(const KernelKey kernel_key) { desc = kernel_key; }


void SetNeedReInit() {
need_reinit = true;
}

protected: protected:
KernelKey desc; KernelKey desc;
std::string name; std::string name;
OpParameter *opParameter = nullptr; OpParameter *opParameter = nullptr;
const lite::Primitive *primitive_;
const lite::Context *context_;
// tensor will free in ~lite_session() // tensor will free in ~lite_session()
std::vector<lite::tensor::Tensor *> inputs_; std::vector<lite::tensor::Tensor *> inputs_;
std::vector<lite::tensor::Tensor *> outputs_; std::vector<lite::tensor::Tensor *> outputs_;
std::vector<LiteKernel *> in_kernel_; std::vector<LiteKernel *> in_kernel_;
std::vector<LiteKernel *> out_kernel_; std::vector<LiteKernel *> out_kernel_;
bool train_mode; bool train_mode;
bool need_reinit = false;
}; };


class SubGraphKernel : public LiteKernel { class SubGraphKernel : public LiteKernel {
@@ -121,8 +146,9 @@ class SubGraphKernel : public LiteKernel {
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
const std::vector<kernel::LiteKernel *> &inKernels, const std::vector<kernel::LiteKernel *> &inKernels,
const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &outKernels,
const std::vector<kernel::LiteKernel *> &nodes)
: LiteKernel(nullptr, inputs, outputs),
const std::vector<kernel::LiteKernel *> &nodes, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(nullptr, inputs, outputs, ctx, primitive),
inputs_(inputs), inputs_(inputs),
outputs_(outputs), outputs_(outputs),
inkernels_(inKernels), inkernels_(inKernels),
@@ -144,7 +170,7 @@ class SubGraphKernel : public LiteKernel {


typedef LiteKernel *(*KernelCreator)(const std::vector<lite::tensor::Tensor *> &inputs, typedef LiteKernel *(*KernelCreator)(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter,
const lite::Context *ctx, const KernelKey &desc);
const lite::Context *ctx, const KernelKey &desc, const lite::Primitive *primitive);


class LiteKernelUtil { class LiteKernelUtil {
public: public:


+ 1
- 0
mindspore/lite/src/lite_session.cc View File

@@ -168,6 +168,7 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() const {
int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) { int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) {
MS_EXCEPTION_IF_NULL(this->context_); MS_EXCEPTION_IF_NULL(this->context_);
SetMaxWokerNum(context_->thread_num_); SetMaxWokerNum(context_->thread_num_);
context_->running_ = true;
Executor executor; Executor executor;
if (before == nullptr && after == nullptr) { if (before == nullptr && after == nullptr) {
return executor.Run(this->inputs, this->outputs, this->kernels, this->context_->allocator.get()); return executor.Run(this->inputs, this->outputs, this->kernels, this->context_->allocator.get());


+ 1
- 1
mindspore/lite/src/ops/cast.cc View File

@@ -40,7 +40,7 @@ int Cast::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::
MS_LOG(ERROR) << "Unsupport input data type " << input->data_type(); MS_LOG(ERROR) << "Unsupport input data type " << input->data_type();
return RET_INPUT_TENSOR_ERROR; return RET_INPUT_TENSOR_ERROR;
} }
if (cast_prim->dstT() != kNumberTypeFloat || cast_prim->dstT() != kNumberTypeFloat32) {
if (cast_prim->dstT() != kNumberTypeFloat && cast_prim->dstT() != kNumberTypeFloat32) {
MS_LOG(ERROR) << "Invalid output datatype " << cast_prim->dstT(); MS_LOG(ERROR) << "Invalid output datatype " << cast_prim->dstT();
return RET_INPUT_TENSOR_ERROR; return RET_INPUT_TENSOR_ERROR;
} }


+ 3
- 0
mindspore/lite/src/ops/ops.h View File

@@ -45,12 +45,15 @@ class Primitive {
static Primitive *CreatePrimitive(schema::Primitive *primitive); static Primitive *CreatePrimitive(schema::Primitive *primitive);
virtual ~Primitive() {} virtual ~Primitive() {}
const schema::Primitive *Value() const { return this->primitive; } const schema::Primitive *Value() const { return this->primitive; }
const bool GetInferFlag() const { return this->infer_flag_; }
void SetInferFlag(bool flag) { this->infer_flag_ = flag; }
schema::PrimitiveType Type() const { return this->primitive->value_type(); } schema::PrimitiveType Type() const { return this->primitive->value_type(); }
const void *Attribute() const { return this->primitive->value(); } const void *Attribute() const { return this->primitive->value(); }
virtual int InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::Tensor *> outputs_); virtual int InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::Tensor *> outputs_);


protected: protected:
schema::Primitive *primitive; schema::Primitive *primitive;
bool infer_flag_ = true;
}; };


class Conv2D : public Primitive { class Conv2D : public Primitive {


+ 10
- 5
mindspore/lite/src/ops/reshape.cc View File

@@ -34,11 +34,11 @@ int Reshape::CalNewShape(const tensor::Tensor *in_tensor, std::vector<int> *out_
inferIndex = i; inferIndex = i;
} else { } else {
MS_LOG(ERROR) << "output shape should has no more than one dim which need infer"; MS_LOG(ERROR) << "output shape should has no more than one dim which need infer";
return RET_ERROR;
return RET_INFER_ERR;
} }
} else if (out_shape->at(i) < 0) { } else if (out_shape->at(i) < 0) {
MS_LOG(ERROR) << "output shape dim should be non-negative"; MS_LOG(ERROR) << "output shape dim should be non-negative";
return RET_ERROR;
return RET_INFER_ERR;
} else if (out_shape->at(i) == 0) { } else if (out_shape->at(i) == 0) {
out_shape->at(i) = in_tensor->shape().at(i); out_shape->at(i) = in_tensor->shape().at(i);
out_shapeSize *= out_shape->at(i); out_shapeSize *= out_shape->at(i);
@@ -49,7 +49,7 @@ int Reshape::CalNewShape(const tensor::Tensor *in_tensor, std::vector<int> *out_


if (inferIndex == -1 && out_shapeSize != in_shape_size) { if (inferIndex == -1 && out_shapeSize != in_shape_size) {
MS_LOG(ERROR) << "output shapeSize: " << out_shapeSize << " should be equal to input shapeSize: " << in_shape_size; MS_LOG(ERROR) << "output shapeSize: " << out_shapeSize << " should be equal to input shapeSize: " << in_shape_size;
return RET_ERROR;
return RET_INFER_ERR;
} }
if (inferIndex != -1) { if (inferIndex != -1) {
out_shape->at(inferIndex) = in_shape_size / out_shapeSize; out_shape->at(inferIndex) = in_shape_size / out_shapeSize;
@@ -88,7 +88,11 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso
std::vector<int> out_shape; std::vector<int> out_shape;
if (inputs_.size() == kDoubleNum) { if (inputs_.size() == kDoubleNum) {
auto shape_tensor = inputs_.at(1); auto shape_tensor = inputs_.at(1);
size_t shape_size = shape_tensor->ElementsNum();
if (shape_tensor->Data() == nullptr) {
MS_LOG(INFO) << "Do infer shape in runtime.";
return RET_INFER_INVALID;
}
size_t shape_size = shape_tensor->shape().size();
switch (shape_tensor->data_type()) { switch (shape_tensor->data_type()) {
case kNumberTypeInt8: { case kNumberTypeInt8: {
auto data = reinterpret_cast<int8_t *>(shape_tensor->Data()); auto data = reinterpret_cast<int8_t *>(shape_tensor->Data());
@@ -108,13 +112,14 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso
} break; } break;
default: { default: {
MS_LOG(ERROR) << "Reshape weight tensor has unsupported dataType: " << shape_tensor->data_type(); MS_LOG(ERROR) << "Reshape weight tensor has unsupported dataType: " << shape_tensor->data_type();
return RET_ERROR;
return RET_INFER_ERR;
} }
} }
} else if (inputs_.size() == kSingleNum) { } else if (inputs_.size() == kSingleNum) {
std::copy(reshape_prim->shape()->begin(), reshape_prim->shape()->end(), std::back_inserter(out_shape)); std::copy(reshape_prim->shape()->begin(), reshape_prim->shape()->end(), std::back_inserter(out_shape));
} else { } else {
MS_LOG(ERROR) << "inputs tensor size invalid."; MS_LOG(ERROR) << "inputs tensor size invalid.";
return RET_INFER_ERR;
} }


auto ret = CalNewShape(inputs_.front(), &out_shape); auto ret = CalNewShape(inputs_.front(), &out_shape);


+ 13
- 8
mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc View File

@@ -24,14 +24,18 @@


using mindspore::lite::KernelRegistrar; using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR; using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_PARAM_INVALID;
using mindspore::lite::RET_FORMAT_ERR; using mindspore::lite::RET_FORMAT_ERR;
using mindspore::lite::RET_OK; using mindspore::lite::RET_OK;
using mindspore::lite::RET_PARAM_INVALID;
using mindspore::schema::PrimitiveType_ArgMax; using mindspore::schema::PrimitiveType_ArgMax;
using mindspore::schema::PrimitiveType_ArgMin; using mindspore::schema::PrimitiveType_ArgMin;


namespace mindspore::kernel { namespace mindspore::kernel {
int ArgMinMaxBaseCPUKernel::Init() { int ArgMinMaxBaseCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter); auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter);
switch (opParameter->type_) { switch (opParameter->type_) {
case PrimitiveType_ArgMax: case PrimitiveType_ArgMax:
@@ -44,6 +48,7 @@ int ArgMinMaxBaseCPUKernel::Init() {
MS_LOG(ERROR) << "Unexpected type " << opParameter->type_; MS_LOG(ERROR) << "Unexpected type " << opParameter->type_;
return RET_ERROR; return RET_ERROR;
} }

auto in_shape = inputs_.at(0)->shape(); auto in_shape = inputs_.at(0)->shape();
auto dims_size = in_shape.size(); auto dims_size = in_shape.size();
int axis = param->axis_ < 0 ? param->axis_ + dims_size : param->axis_; int axis = param->axis_ < 0 ? param->axis_ + dims_size : param->axis_;
@@ -56,9 +61,9 @@ int ArgMinMaxBaseCPUKernel::Init() {
param->topk_ = MSMIN(param->topk_, in_shape[axis]); param->topk_ = MSMIN(param->topk_, in_shape[axis]);
if (param->topk_ > 1) { if (param->topk_ > 1) {
if (context_ != nullptr && context_->allocator != nullptr) { if (context_ != nullptr && context_->allocator != nullptr) {
param->arg_elements_
= reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis]));
data_from_allocator_ = true;
param->arg_elements_ =
reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis]));
data_from_allocator_ = true;
} else { } else {
param->arg_elements_ = reinterpret_cast<ArgElement *>(malloc(sizeof(ArgElement) * in_shape[axis])); param->arg_elements_ = reinterpret_cast<ArgElement *>(malloc(sizeof(ArgElement) * in_shape[axis]));
} }
@@ -98,12 +103,12 @@ void ArgMinMaxBaseCPUKernel::FreeTmpMemory() {
kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx, OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (op_parameter == nullptr) { if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!"; MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr; return nullptr;
} }
auto kernel = new (std::nothrow) ArgMinMaxInt8CPUKernel(op_parameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) ArgMinMaxInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new ArgMinMaxInt8CPUKernel fail!"; MS_LOG(ERROR) << "new ArgMinMaxInt8CPUKernel fail!";
return nullptr; return nullptr;
@@ -122,12 +127,12 @@ kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor
kernel::LiteKernel *CpuArgMinMaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuArgMinMaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx, OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (op_parameter == nullptr) { if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!"; MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr; return nullptr;
} }
auto kernel = new (std::nothrow) ArgMinMaxCPUKernel(op_parameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) ArgMinMaxCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new ArgMinMaxCPUKernel fail!"; MS_LOG(ERROR) << "new ArgMinMaxCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h View File

@@ -24,8 +24,9 @@ namespace mindspore::kernel {
class ArgMinMaxBaseCPUKernel : public LiteKernel { class ArgMinMaxBaseCPUKernel : public LiteKernel {
public: public:
ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), context_(ctx), data_from_allocator_(false) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), data_from_allocator_(false) {
opParameter->thread_num_ = ctx->thread_num_; opParameter->thread_num_ = ctx->thread_num_;
} }


@@ -40,7 +41,6 @@ class ArgMinMaxBaseCPUKernel : public LiteKernel {
void FreeTmpMemory(); void FreeTmpMemory();


private: private:
const lite::Context *context_;
bool data_from_allocator_; bool data_from_allocator_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.cc View File

@@ -46,13 +46,13 @@ int BatchToSpaceBaseCPUKernel::Init() {
kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx, OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace); MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace);
if (op_parameter == nullptr) { if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!"; MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) BatchToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) BatchToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!"; MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!";
return nullptr; return nullptr;
@@ -71,13 +71,13 @@ kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::ten
kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx, OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace); MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace);
if (op_parameter == nullptr) { if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!"; MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(op_parameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new BatchToSpaceCPUKernel fail!"; MS_LOG(ERROR) << "new BatchToSpaceCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class BatchToSpaceBaseCPUKernel : public LiteKernel { class BatchToSpaceBaseCPUKernel : public LiteKernel {
public: public:
BatchToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, BatchToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
opParameter->thread_num_ = ctx->thread_num_; opParameter->thread_num_ = ctx->thread_num_;
} }




+ 15
- 13
mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc View File

@@ -30,21 +30,24 @@ using mindspore::schema::PrimitiveType_Concat;


namespace mindspore::kernel { namespace mindspore::kernel {
int ConcatBaseCPUKernel::Init() { int ConcatBaseCPUKernel::Init() {
auto axis = concat_param_->axis_;
axis_ = axis >= 0 ? axis : inputs_.front()->shape().size() + axis;
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
axis_ = concat_param_->axis_ >= 0 ? concat_param_->axis_ : inputs_.front()->shape().size() + concat_param_->axis_;
return RET_OK; return RET_OK;
} }


kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Concat); MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto *kernel = new(std::nothrow) ConcatInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ConcatInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; MS_LOG(ERROR) << "new ConcatCPUKernel fail!";
return nullptr; return nullptr;
@@ -60,15 +63,15 @@ kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::tensor::T
} }


kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Concat); MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto *kernel = new(std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; MS_LOG(ERROR) << "new ConcatCPUKernel fail!";
return nullptr; return nullptr;
@@ -84,15 +87,15 @@ kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::tensor::
} }


kernel::LiteKernel *CpuConcatFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuConcatFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Concat); MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto *kernel = new(std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new ConcatCPUKernel fail!"; MS_LOG(ERROR) << "new ConcatCPUKernel fail!";
return nullptr; return nullptr;
@@ -111,4 +114,3 @@ REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, CpuConcatInt8KernelCreat
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Concat, CpuConcatInt32KernelCreator) REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Concat, CpuConcatInt32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Concat, CpuConcatFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Concat, CpuConcatFp32KernelCreator)
} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 4
- 3
mindspore/lite/src/runtime/kernel/arm/base/concat_base.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class ConcatBaseCPUKernel : public LiteKernel { class ConcatBaseCPUKernel : public LiteKernel {
public: public:
ConcatBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ConcatBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_; opParameter->thread_num_ = ctx->thread_num_;
concat_param_ = reinterpret_cast<ConcatParameter *>(opParameter); concat_param_ = reinterpret_cast<ConcatParameter *>(opParameter);
} }
@@ -41,6 +42,7 @@ class ConcatBaseCPUKernel : public LiteKernel {
int ReSize() override { return 0; } int ReSize() override { return 0; }


int Run() override { return 0; } int Run() override { return 0; }

protected: protected:
int thread_count_; int thread_count_;
int axis_; int axis_;
@@ -50,4 +52,3 @@ class ConcatBaseCPUKernel : public LiteKernel {
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CONCAT_BASE_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CONCAT_BASE_H_


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h View File

@@ -37,8 +37,9 @@ namespace mindspore::kernel {
class ConvolutionBaseCPUKernel : public LiteKernel { class ConvolutionBaseCPUKernel : public LiteKernel {
public: public:
ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_; opParameter->thread_num_ = ctx->thread_num_;
conv_param_ = reinterpret_cast<ConvParameter *>(opParameter); conv_param_ = reinterpret_cast<ConvParameter *>(opParameter);
} }


+ 12
- 12
mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc View File

@@ -31,15 +31,15 @@ namespace mindspore::kernel {
int CropBaseCPUKernel::Init() { return RET_OK; } int CropBaseCPUKernel::Init() { return RET_OK; }


kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Crop); MS_ASSERT(desc.type == schema::PrimitiveType_Crop);
auto *kernel = new (std::nothrow) CropInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) CropInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new CropCPUKernel fail!"; MS_LOG(ERROR) << "new CropCPUKernel fail!";
return nullptr; return nullptr;
@@ -55,15 +55,15 @@ kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::tensor::Ten
} }


kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Crop); MS_ASSERT(desc.type == schema::PrimitiveType_Crop);
auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new CropCPUKernel fail!"; MS_LOG(ERROR) << "new CropCPUKernel fail!";
return nullptr; return nullptr;
@@ -79,15 +79,15 @@ kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::tensor::Te
} }


kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Crop); MS_ASSERT(desc.type == schema::PrimitiveType_Crop);
auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new CropCPUKernel fail!"; MS_LOG(ERROR) << "new CropCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/base/crop_base.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class CropBaseCPUKernel : public LiteKernel { class CropBaseCPUKernel : public LiteKernel {
public: public:
CropBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, CropBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_; opParameter->thread_num_ = ctx->thread_num_;
} }
~CropBaseCPUKernel() = default; ~CropBaseCPUKernel() = default;
@@ -39,7 +40,6 @@ class CropBaseCPUKernel : public LiteKernel {


protected: protected:
int thread_count_; int thread_count_;
const Context *ctx_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel




+ 9
- 5
mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc View File

@@ -25,13 +25,17 @@


using mindspore::lite::KernelRegistrar; using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR; using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_PARAM_INVALID;
using mindspore::lite::RET_FORMAT_ERR; using mindspore::lite::RET_FORMAT_ERR;
using mindspore::lite::RET_OK; using mindspore::lite::RET_OK;
using mindspore::lite::RET_PARAM_INVALID;
using mindspore::schema::PrimitiveType_DepthToSpace; using mindspore::schema::PrimitiveType_DepthToSpace;


namespace mindspore::kernel { namespace mindspore::kernel {
int DepthToSpaceBaseCPUKernel::Init() { int DepthToSpaceBaseCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
if (inputs_[0]->GetFormat() != schema::Format_NHWC) { if (inputs_[0]->GetFormat() != schema::Format_NHWC) {
MS_LOG(ERROR) << "depth_to_space only support NHWC now!"; MS_LOG(ERROR) << "depth_to_space only support NHWC now!";
return RET_FORMAT_ERR; return RET_FORMAT_ERR;
@@ -62,13 +66,13 @@ int DepthToSpaceBaseCPUKernel::Init() {
kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx, OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace); MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace);
if (op_parameter == nullptr) { if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!"; MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) DepthToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) DepthToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!"; MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!";
return nullptr; return nullptr;
@@ -87,13 +91,13 @@ kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::ten
kernel::LiteKernel *CpuDepthToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuDepthToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx, OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace); MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace);
if (op_parameter == nullptr) { if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!"; MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) DepthToSpaceCPUKernel(op_parameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) DepthToSpaceCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new DepthToSpaceCPUKernel fail!"; MS_LOG(ERROR) << "new DepthToSpaceCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class DepthToSpaceBaseCPUKernel : public LiteKernel { class DepthToSpaceBaseCPUKernel : public LiteKernel {
public: public:
DepthToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, DepthToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
opParameter->thread_num_ = ctx->thread_num_; opParameter->thread_num_ = ctx->thread_num_;
} }




+ 6
- 4
mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc View File

@@ -35,10 +35,11 @@ int FullconnectionBaseCPUKernel::Init() {
kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Concat); MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto kernel = new (std::nothrow) FullconnectionInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) FullconnectionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (!kernel) { if (!kernel) {
MS_LOG(ERROR) << "kernel is nullptr."; MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr; return nullptr;
@@ -56,10 +57,11 @@ kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::t
kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Concat); MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (!kernel) { if (!kernel) {
MS_LOG(ERROR) << "kernel is nullptr."; MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class FullconnectionBaseCPUKernel : public LiteKernel { class FullconnectionBaseCPUKernel : public LiteKernel {
public: public:
FullconnectionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, FullconnectionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
fc_param_ = reinterpret_cast<MatMulParameter *>(opParameter); fc_param_ = reinterpret_cast<MatMulParameter *>(opParameter);
} }
~FullconnectionBaseCPUKernel() = default; ~FullconnectionBaseCPUKernel() = default;


+ 4
- 3
mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc View File

@@ -28,7 +28,8 @@ using mindspore::schema::PrimitiveType_MatMul;
namespace mindspore::kernel { namespace mindspore::kernel {
kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *opParameter, const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *opParameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
const lite::Context *ctx, const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Concat); MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto input_tensor = inputs.at(kInputIndex); auto input_tensor = inputs.at(kInputIndex);
@@ -37,7 +38,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso
switch (data_type) { switch (data_type) {
case kNumberTypeInt8: case kNumberTypeInt8:
case kNumberTypeUInt8: { case kNumberTypeUInt8: {
kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (!kernel) { if (!kernel) {
MS_LOG(ERROR) << "kernel is nullptr."; MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr; return nullptr;
@@ -46,7 +47,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso
} }


case kNumberTypeFloat32: { case kNumberTypeFloat32: {
kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (!kernel) { if (!kernel) {
MS_LOG(ERROR) << "kernel is nullptr."; MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class MatmulBaseCPUKernel : public LiteKernel { class MatmulBaseCPUKernel : public LiteKernel {
public: public:
MatmulBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, MatmulBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
params_ = reinterpret_cast<MatMulParameter *>(opParameter); params_ = reinterpret_cast<MatMulParameter *>(opParameter);
} }
~MatmulBaseCPUKernel() = default; ~MatmulBaseCPUKernel() = default;


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/base/pad.cc View File

@@ -31,10 +31,10 @@ namespace mindspore::kernel {
kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Pad); MS_ASSERT(desc.type == schema::PrimitiveType_Pad);
auto *kernel = new (std::nothrow) PadInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) PadInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new PadCPUKernel failed."; MS_LOG(ERROR) << "new PadCPUKernel failed.";
return nullptr; return nullptr;
@@ -52,10 +52,10 @@ kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::tensor::Tens
kernel::LiteKernel *CpuPadFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuPadFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Pad); MS_ASSERT(desc.type == schema::PrimitiveType_Pad);
auto *kernel = new (std::nothrow) PadCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) PadCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new PadCPUKernel failed."; MS_LOG(ERROR) << "new PadCPUKernel failed.";
return nullptr; return nullptr;


+ 8
- 4
mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc View File

@@ -56,6 +56,10 @@ void PoolingBaseCPUKernel::FreeQuantParam() {
} }


int PoolingBaseCPUKernel::Init() { int PoolingBaseCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
MS_ASSERT(inputs_.size() == 1); MS_ASSERT(inputs_.size() == 1);
MS_ASSERT(outputs_.size() == 1); MS_ASSERT(outputs_.size() == 1);
pooling_param_->thread_num_ = thread_count_; pooling_param_->thread_num_ = thread_count_;
@@ -78,13 +82,13 @@ int PoolingBaseCPUKernel::Init() {
kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Pooling); MS_ASSERT(desc.type == schema::PrimitiveType_Pooling);
auto *kernel = new (std::nothrow) PoolingInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) PoolingInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new PoolingInt8CPUKernel fail!"; MS_LOG(ERROR) << "new PoolingInt8CPUKernel fail!";
return nullptr; return nullptr;
@@ -102,13 +106,13 @@ kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::tensor::
kernel::LiteKernel *CpuPoolingFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuPoolingFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Pooling); MS_ASSERT(desc.type == schema::PrimitiveType_Pooling);
auto *kernel = new (std::nothrow) PoolingCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) PoolingCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new PoolingCPUKernel fail!"; MS_LOG(ERROR) << "new PoolingCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/pooling_base.h View File

@@ -29,8 +29,9 @@ namespace mindspore::kernel {
class PoolingBaseCPUKernel : public LiteKernel { class PoolingBaseCPUKernel : public LiteKernel {
public: public:
PoolingBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, PoolingBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
pooling_param_ = reinterpret_cast<PoolingParameter *>(opParameter); pooling_param_ = reinterpret_cast<PoolingParameter *>(opParameter);
} }
~PoolingBaseCPUKernel() = default; ~PoolingBaseCPUKernel() = default;


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/base/prelu_base.cc View File

@@ -32,13 +32,13 @@ int PreluBaseCPUKernel::Init() {return RET_OK;}
kernel::LiteKernel *CpuPreluInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuPreluInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Prelu); MS_ASSERT(desc.type == schema::PrimitiveType_Prelu);
auto *kernel = new(std::nothrow) PreluInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new(std::nothrow) PreluInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new PreluCPUKernel fail!"; MS_LOG(ERROR) << "new PreluCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/prelu_base.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class PreluBaseCPUKernel : public LiteKernel { class PreluBaseCPUKernel : public LiteKernel {
public: public:
PreluBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, PreluBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_; opParameter->thread_num_ = ctx->thread_num_;
prelu_param_ = reinterpret_cast<PreluParameter *>(opParameter); prelu_param_ = reinterpret_cast<PreluParameter *>(opParameter);
} }


+ 7
- 2
mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc View File

@@ -39,6 +39,11 @@ int PriorBoxCPUKernel::Init() {
MS_LOG(ERROR) << "PriorBoxParameter nullptr"; MS_LOG(ERROR) << "PriorBoxParameter nullptr";
return RET_NULL_PTR; return RET_NULL_PTR;
} }

if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
MS_ASSERT(inputs_.size() == kInputNum); MS_ASSERT(inputs_.size() == kInputNum);
MS_ASSERT(outputs_.size() == kOutputNum); MS_ASSERT(outputs_.size() == kOutputNum);


@@ -164,7 +169,7 @@ int PriorBoxCPUKernel::Run() {
kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
@@ -173,7 +178,7 @@ kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::tensor::Ten
MS_LOG(ERROR) << "PriorBox invalid desc type " << desc.type; MS_LOG(ERROR) << "PriorBox invalid desc type " << desc.type;
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) PriorBoxCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) PriorBoxCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new PriorBoxCPUKernel fail!"; MS_LOG(ERROR) << "new PriorBoxCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/prior_box.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class PriorBoxCPUKernel : public LiteKernel { class PriorBoxCPUKernel : public LiteKernel {
public: public:
PriorBoxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, PriorBoxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
prior_box_param_ = reinterpret_cast<PriorBoxParameter *>(opParameter); prior_box_param_ = reinterpret_cast<PriorBoxParameter *>(opParameter);
} }
~PriorBoxCPUKernel() = default; ~PriorBoxCPUKernel() = default;


+ 9
- 4
mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc View File

@@ -34,6 +34,10 @@ constexpr int kQuantDTypeCastOutputNum = 1;
} // namespace } // namespace


int QuantDTypeCastCPUKernel::Init() { int QuantDTypeCastCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
if (inputs_.size() != 1) { if (inputs_.size() != 1) {
MS_LOG(ERROR) << "inputs number should be 1, but " << inputs_.size() << " is given."; MS_LOG(ERROR) << "inputs number should be 1, but " << inputs_.size() << " is given.";
return RET_ERROR; return RET_ERROR;
@@ -83,8 +87,8 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) {
ret = DequantizeInt8(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint, ret = DequantizeInt8(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint,
num_unit_thread); num_unit_thread);
} else { } else {
ret = QuantizeToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale,
quant_arg.zeroPoint, num_unit_thread);
ret = QuantizeToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint,
num_unit_thread);
} }
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]"; MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]";
@@ -124,12 +128,13 @@ int QuantDTypeCastCPUKernel::Run() {
kernel::LiteKernel *CpuQuantDTypeCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuQuantDTypeCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) QuantDTypeCastCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) QuantDTypeCastCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new QuantDTypeCastCPUKernel fail!"; MS_LOG(ERROR) << "new QuantDTypeCastCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.h View File

@@ -24,8 +24,9 @@ namespace mindspore::kernel {
class QuantDTypeCastCPUKernel : public LiteKernel { class QuantDTypeCastCPUKernel : public LiteKernel {
public: public:
QuantDTypeCastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, QuantDTypeCastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), thread_num_(ctx->thread_num_) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
~QuantDTypeCastCPUKernel() = default; ~QuantDTypeCastCPUKernel() = default;


int Init() override; int Init() override;


+ 6
- 6
mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc View File

@@ -36,13 +36,13 @@ int ReshapeBaseCPUKernel::Init() {
kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); MS_ASSERT(desc.type == schema::PrimitiveType_Reshape);
auto *kernel = new (std::nothrow) ReshapeInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ReshapeInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new ReshapeInt8CPUKernel fail!"; MS_LOG(ERROR) << "new ReshapeInt8CPUKernel fail!";
return nullptr; return nullptr;
@@ -60,13 +60,13 @@ kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor::
kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); MS_ASSERT(desc.type == schema::PrimitiveType_Reshape);
auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new ReshapeCPUKernel fail!"; MS_LOG(ERROR) << "new ReshapeCPUKernel fail!";
return nullptr; return nullptr;
@@ -84,13 +84,13 @@ kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::tensor:
kernel::LiteKernel *CpuReshapeFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuReshapeFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Reshape); MS_ASSERT(desc.type == schema::PrimitiveType_Reshape);
auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new ReshapeCPUKernel fail!"; MS_LOG(ERROR) << "new ReshapeCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class ReshapeBaseCPUKernel : public LiteKernel { class ReshapeBaseCPUKernel : public LiteKernel {
public: public:
ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
reshape_param_ = reinterpret_cast<ReshapeParameter *>(opParameter); reshape_param_ = reinterpret_cast<ReshapeParameter *>(opParameter);
} }
~ReshapeBaseCPUKernel() = default; ~ReshapeBaseCPUKernel() = default;
@@ -45,4 +46,3 @@ class ReshapeBaseCPUKernel : public LiteKernel {
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc View File

@@ -53,13 +53,13 @@ int SoftmaxBaseCPUKernel::Init() {
kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax); MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax);
auto *kernel = new (std::nothrow) SoftmaxInt8CPUKernel(opParameter, inputs, outputs, ctx);
SoftmaxInt8CPUKernel *kernel = new (std::nothrow) SoftmaxInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!"; MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!";
return nullptr; return nullptr;
@@ -77,13 +77,13 @@ kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::tensor::
kernel::LiteKernel *CpuSoftmaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuSoftmaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax); MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax);
auto *kernel = new (std::nothrow) SoftmaxCPUKernel(opParameter, inputs, outputs, ctx);
SoftmaxCPUKernel *kernel = new (std::nothrow) SoftmaxCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!"; MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class SoftmaxBaseCPUKernel : public LiteKernel { class SoftmaxBaseCPUKernel : public LiteKernel {
public: public:
SoftmaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, SoftmaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_; opParameter->thread_num_ = ctx->thread_num_;
softmax_param_ = reinterpret_cast<SoftmaxParameter *>(opParameter); softmax_param_ = reinterpret_cast<SoftmaxParameter *>(opParameter);
} }


+ 6
- 6
mindspore/lite/src/runtime/kernel/arm/base/split_base.cc View File

@@ -61,13 +61,13 @@ int SplitBaseCPUKernel::Init() {
kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Split); MS_ASSERT(desc.type == schema::PrimitiveType_Split);
auto *kernel = new (std::nothrow) SplitInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) SplitInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new SplitCPUKernel fail!"; MS_LOG(ERROR) << "new SplitCPUKernel fail!";
return nullptr; return nullptr;
@@ -85,13 +85,13 @@ kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::tensor::Te
kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Split); MS_ASSERT(desc.type == schema::PrimitiveType_Split);
auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new SplitCPUKernel fail!"; MS_LOG(ERROR) << "new SplitCPUKernel fail!";
return nullptr; return nullptr;
@@ -109,13 +109,13 @@ kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::tensor::T
kernel::LiteKernel *CpuSplitFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuSplitFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Split); MS_ASSERT(desc.type == schema::PrimitiveType_Split);
auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new SplitCPUKernel fail!"; MS_LOG(ERROR) << "new SplitCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/split_base.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class SplitBaseCPUKernel : public LiteKernel { class SplitBaseCPUKernel : public LiteKernel {
public: public:
SplitBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, SplitBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
param = reinterpret_cast<SplitParameter *>(opParameter); param = reinterpret_cast<SplitParameter *>(opParameter);
} }
~SplitBaseCPUKernel() = default; ~SplitBaseCPUKernel() = default;


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc View File

@@ -32,13 +32,13 @@ int SqueezeBaseCPUKernel::Init() { return RET_OK; }
kernel::LiteKernel *CpuSqueezeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuSqueezeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_Squeeze); MS_ASSERT(desc.type == schema::PrimitiveType_Squeeze);
auto *kernel = new (std::nothrow) SqueezeInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) SqueezeInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new SqueezeCPUKernel fail!"; MS_LOG(ERROR) << "new SqueezeCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class SqueezeBaseCPUKernel : public LiteKernel { class SqueezeBaseCPUKernel : public LiteKernel {
public: public:
SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_; opParameter->thread_num_ = ctx->thread_num_;
} }




+ 9
- 3
mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc View File

@@ -42,12 +42,18 @@ int StridedSliceCPUKernel::Init() {
int StridedSliceCPUKernel::ReSize() { return 0; } int StridedSliceCPUKernel::ReSize() { return 0; }


int StridedSliceCPUKernel::Run() { int StridedSliceCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}

auto input = inputs_.at(0); auto input = inputs_.at(0);
auto output = outputs_.at(0); auto output = outputs_.at(0);
MS_ASSERT(input); MS_ASSERT(input);
MS_ASSERT(output); MS_ASSERT(output);


auto ret = DoStridedSlice(input->Data(), output->Data(), reinterpret_cast<StridedSliceParameter *>(opParameter));
ret = DoStridedSlice(input->Data(), output->Data(), reinterpret_cast<StridedSliceParameter *>(opParameter));
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "StridedSlice error error_code[" << ret << "]"; MS_LOG(ERROR) << "StridedSlice error error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;
@@ -58,13 +64,13 @@ int StridedSliceCPUKernel::Run() {
kernel::LiteKernel *CpuStridedSliceKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuStridedSliceKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(desc.type == schema::PrimitiveType_StridedSlice); MS_ASSERT(desc.type == schema::PrimitiveType_StridedSlice);
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "opParameter null pointer dereferencing."; MS_LOG(ERROR) << "opParameter null pointer dereferencing.";
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) StridedSliceCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) StridedSliceCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "New kernel fails."; MS_LOG(ERROR) << "New kernel fails.";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class StridedSliceCPUKernel : public LiteKernel { class StridedSliceCPUKernel : public LiteKernel {
public: public:
StridedSliceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, StridedSliceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), thread_num_(ctx->thread_num_) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
~StridedSliceCPUKernel() override = default; ~StridedSliceCPUKernel() override = default;


int Init() override; int Init() override;


+ 11
- 3
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc View File

@@ -183,10 +183,14 @@ void Convolution3x3FP16CPUKernel::ConfigInputOutput() {
} }


int Convolution3x3FP16CPUKernel::Init() { int Convolution3x3FP16CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConvolutionBaseCPUKernel::Init(); auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed."; MS_LOG(ERROR) << "ConvolutionBase init failed.";
return RET_ERROR;
return ret;
} }
ret = InitWeightBias(); ret = InitWeightBias();
if (ret != RET_OK) { if (ret != RET_OK) {
@@ -228,7 +232,7 @@ int Convolution3x3FP16CPUKernel::ReSize() {
auto ret = ConvolutionBaseCPUKernel::Init(); auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed."; MS_LOG(ERROR) << "ConvolutionBase init failed.";
return RET_ERROR;
return ret;
} }
ret = InitTmpBuffer(); ret = InitTmpBuffer();
if (ret != RET_OK) { if (ret != RET_OK) {
@@ -256,7 +260,11 @@ int Convolution3x3Fp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata)
} }


int Convolution3x3FP16CPUKernel::Run() { int Convolution3x3FP16CPUKernel::Run() {
// cast fp32 input data to fp16
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
auto input_tensor = inputs_.at(kInputIndex); auto input_tensor = inputs_.at(kInputIndex);
auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data()); auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data());
for (int i = 0; i < input_tensor->ElementsNum(); ++i) { for (int i = 0; i < input_tensor->ElementsNum(); ++i) {


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class Convolution3x3FP16CPUKernel : public ConvolutionBaseCPUKernel { class Convolution3x3FP16CPUKernel : public ConvolutionBaseCPUKernel {
public: public:
Convolution3x3FP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, Convolution3x3FP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~Convolution3x3FP16CPUKernel() override { ~Convolution3x3FP16CPUKernel() override {
if (fp16_input_ != nullptr) { if (fp16_input_ != nullptr) {
free(fp16_input_); free(fp16_input_);
@@ -78,4 +79,3 @@ void ProcessFilterFp16(float16_t *origin_weight, float16_t *dst_weight, ConvPara
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_3x3_FP16_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_3x3_FP16_H_


+ 17
- 6
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc View File

@@ -85,14 +85,20 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
} }


int ConvolutionDepthwiseFp16CPUKernel::Init() { int ConvolutionDepthwiseFp16CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
// conv base init // conv base init
ConvolutionBaseCPUKernel::Init();

auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
// init sliding_ window param // init sliding_ window param
sliding_ = new SlidingWindowParam; sliding_ = new SlidingWindowParam;
InitSlidingParam(sliding_, conv_param_, C8NUM); InitSlidingParam(sliding_, conv_param_, C8NUM);


auto ret = InitWeightBias();
ret = InitWeightBias();
if (ret != 0) { if (ret != 0) {
MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed."; MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed.";
return RET_ERROR; return RET_ERROR;
@@ -138,6 +144,11 @@ int ConvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int ConvolutionDepthwiseFp16CPUKernel::Run() { int ConvolutionDepthwiseFp16CPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) { if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel."; MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR; return RET_ERROR;
@@ -149,7 +160,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_, PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_,
conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);


auto ret = LiteBackendParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_);
ret = LiteBackendParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;
@@ -165,10 +176,10 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D); MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D);
auto kernel = new (std::nothrow) ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr."; MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
public: public:
ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionDepthwiseFp16CPUKernel() override { ~ConvolutionDepthwiseFp16CPUKernel() override {
delete sliding_; delete sliding_;
free(packed_weight_); free(packed_weight_);


+ 15
- 7
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc View File

@@ -154,10 +154,14 @@ void ConvolutionFP16CPUKernel::ConfigInputOutput() {
} }


int ConvolutionFP16CPUKernel::Init() { int ConvolutionFP16CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConvolutionBaseCPUKernel::Init(); auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed.";
return RET_ERROR;
MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret;
return ret;
} }
ret = InitWeightBias(); ret = InitWeightBias();
if (ret != RET_OK) { if (ret != RET_OK) {
@@ -193,7 +197,7 @@ int ConvolutionFP16CPUKernel::ReSize() {
auto ret = ConvolutionBaseCPUKernel::Init(); auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed."; MS_LOG(ERROR) << "ConvolutionBase init failed.";
return RET_ERROR;
return ret;
} }
ret = InitTmpBuffer(); ret = InitTmpBuffer();
if (ret != RET_OK) { if (ret != RET_OK) {
@@ -220,7 +224,11 @@ int ConvolutionFp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int ConvolutionFP16CPUKernel::Run() { int ConvolutionFP16CPUKernel::Run() {
// cast fp32 input data to fp16
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
auto input_tensor = inputs_.at(kInputIndex); auto input_tensor = inputs_.at(kInputIndex);
auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data()); auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data());
for (int i = 0; i < input_tensor->ElementsNum(); ++i) { for (int i = 0; i < input_tensor->ElementsNum(); ++i) {
@@ -251,7 +259,7 @@ int ConvolutionFP16CPUKernel::Run() {
kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D);
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
@@ -267,7 +275,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten
conv_param->output_w_ = outputs.front()->Width(); conv_param->output_w_ = outputs.front()->Width();
kernel::LiteKernel *kernel = nullptr; kernel::LiteKernel *kernel = nullptr;
if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) {
kernel = new (std::nothrow) kernel::Convolution3x3FP16CPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::Convolution3x3FP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
} else { } else {
bool use_winograd = false; bool use_winograd = false;
int out_unit; int out_unit;
@@ -275,7 +283,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten
OutputTransformUnitFunc output_trans_func = nullptr; OutputTransformUnitFunc output_trans_func = nullptr;
CheckIfUseWinograd(&use_winograd, &out_unit, conv_param, input_trans_func, output_trans_func); CheckIfUseWinograd(&use_winograd, &out_unit, conv_param, input_trans_func, output_trans_func);
if (kernel_h != 1 && kernel_w != 1 && !use_winograd) { if (kernel_h != 1 && kernel_w != 1 && !use_winograd) {
kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
} }
} }
if (kernel == nullptr) { if (kernel == nullptr) {


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel { class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel {
public: public:
ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionFP16CPUKernel() override { ~ConvolutionFP16CPUKernel() override {
if (fp16_input_ != nullptr) { if (fp16_input_ != nullptr) {
free(fp16_input_); free(fp16_input_);


+ 17
- 5
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc View File

@@ -99,12 +99,19 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
} }


int DeconvolutionDepthwiseFp16CPUKernel::Init() { int DeconvolutionDepthwiseFp16CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
sliding_ = new SlidingWindowParam; sliding_ = new SlidingWindowParam;
InitSlideParam(); InitSlideParam();
// conv base init // conv base init
ConvolutionBaseCPUKernel::Init();
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}


auto ret = InitWeightBias();
ret = InitWeightBias();
if (ret != 0) { if (ret != 0) {
MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitWeightBias failed."; MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitWeightBias failed.";
return RET_ERROR; return RET_ERROR;
@@ -150,6 +157,11 @@ int DeconvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int DeconvolutionDepthwiseFp16CPUKernel::Run() { int DeconvolutionDepthwiseFp16CPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) { if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel."; MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR; return RET_ERROR;
@@ -161,7 +173,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_, PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_,
conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);


auto ret = LiteBackendParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_);
ret = LiteBackendParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;
@@ -176,10 +188,10 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_DeDepthwiseConv2D); MS_ASSERT(desc.type == schema::PrimitiveType_DeDepthwiseConv2D);
auto kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr."; MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
public: public:
DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~DeconvolutionDepthwiseFp16CPUKernel() override { ~DeconvolutionDepthwiseFp16CPUKernel() override {
delete sliding_; delete sliding_;
free(packed_weight_); free(packed_weight_);


+ 8
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc View File

@@ -19,6 +19,7 @@
#include "src/kernel_registry.h" #include "src/kernel_registry.h"
#include "src/runtime/runtime_api.h" #include "src/runtime/runtime_api.h"
#include "include/errorcode.h" #include "include/errorcode.h"
#include "src/ops/ops.h"


using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar; using mindspore::lite::KernelRegistrar;
@@ -78,6 +79,11 @@ int ActivationRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int ActivationCPUKernel::Run() { int ActivationCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
int error_code = LiteBackendParallelLaunch(ActivationRun, this, thread_count_); int error_code = LiteBackendParallelLaunch(ActivationRun, this, thread_count_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
@@ -89,10 +95,10 @@ int ActivationCPUKernel::Run() {
kernel::LiteKernel *CpuActivationFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuActivationFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Activation); MS_ASSERT(desc.type == schema::PrimitiveType_Activation);
auto *kernel = new (std::nothrow) ActivationCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ActivationCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr."; MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/activation.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class ActivationCPUKernel : public LiteKernel { class ActivationCPUKernel : public LiteKernel {
public: public:
ActivationCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, ActivationCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(param, inputs, outputs), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
type_ = (reinterpret_cast<ActivationParameter *>(param))->type_; type_ = (reinterpret_cast<ActivationParameter *>(param))->type_;
alpha_ = (reinterpret_cast<ActivationParameter *>(param))->alpha_; alpha_ = (reinterpret_cast<ActivationParameter *>(param))->alpha_;
} }


+ 10
- 10
mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.cc View File

@@ -20,8 +20,8 @@
#include "src/runtime/runtime_api.h" #include "src/runtime/runtime_api.h"
#include "include/errorcode.h" #include "include/errorcode.h"


using mindspore::lite::KernelRegistrar;
using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR; using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK; using mindspore::lite::RET_OK;
using mindspore::schema::ActivationGradType_HSWISH; using mindspore::schema::ActivationGradType_HSWISH;
@@ -32,8 +32,8 @@ using mindspore::schema::PrimitiveType_ActivationGrad;


namespace mindspore::kernel { namespace mindspore::kernel {
int ActivationGradCPUKernel::Init() { int ActivationGradCPUKernel::Init() {
outputs_[0]->set_shape(inputs_[0]->shape());
return RET_OK;
outputs_[0]->set_shape(inputs_[0]->shape());
return RET_OK;
} }


int ActivationGradCPUKernel::ReSize() { return RET_OK; } int ActivationGradCPUKernel::ReSize() { return RET_OK; }
@@ -58,7 +58,7 @@ int ActivationGradCPUKernel::DoActivation(int task_id) {
error_code = TanhGrad(yt_addr, input_addr, length, output_addr); error_code = TanhGrad(yt_addr, input_addr, length, output_addr);
} else if (type_ == schema::ActivationGradType_HSWISH) { } else if (type_ == schema::ActivationGradType_HSWISH) {
error_code = HSwishGrad(yt_addr, input_addr, length, output_addr); error_code = HSwishGrad(yt_addr, input_addr, length, output_addr);
} else if (type_ == schema::ActivationGradType_HSIGMOID) {
} else if (type_ == schema::ActivationGradType_HSIGMOID) {
error_code = HSigmoidGrad(yt_addr, input_addr, length, output_addr); error_code = HSigmoidGrad(yt_addr, input_addr, length, output_addr);
} else { } else {
MS_LOG(ERROR) << "Activation type error"; MS_LOG(ERROR) << "Activation type error";
@@ -90,17 +90,17 @@ int ActivationGradCPUKernel::Run() {
} }


kernel::LiteKernel *CpuActivationGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuActivationGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_ActivationGrad); MS_ASSERT(desc.type == schema::PrimitiveType_ActivationGrad);
auto *kernel = new (std::nothrow) ActivationGradCPUKernel(opParameter, inputs, outputs);
auto *kernel = new (std::nothrow) ActivationGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr); MS_ASSERT(kernel != nullptr);
auto ret = kernel->Init(); auto ret = kernel->Init();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "InferShape kernel failed, name: " << opParameter->name_
<< ", type: "
MS_LOG(ERROR) << "InferShape kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
} }
return kernel; return kernel;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class ActivationGradCPUKernel : public LiteKernel { class ActivationGradCPUKernel : public LiteKernel {
public: public:
explicit ActivationGradCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs, explicit ActivationGradCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(param, inputs, outputs) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(param, inputs, outputs, ctx, primitive) {
ActivationGradParameter *param_act_grad = reinterpret_cast<ActivationGradParameter *>(param); ActivationGradParameter *param_act_grad = reinterpret_cast<ActivationGradParameter *>(param);
type_ = param_act_grad->type_; type_ = param_act_grad->type_;
alpha_ = param_act_grad->alpha_; alpha_ = param_act_grad->alpha_;


+ 11
- 8
mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc View File

@@ -36,12 +36,9 @@ int AddNLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
auto kernel = reinterpret_cast<AddNCPUKernel *>(cdata); auto kernel = reinterpret_cast<AddNCPUKernel *>(cdata);
return kernel->AddNParallelRun(thread_id); return kernel->AddNParallelRun(thread_id);
} }
}
} // namespace


int AddNCPUKernel::Init() {
elements_num_ = inputs_[0]->ElementsNum();
return RET_OK;
}
int AddNCPUKernel::Init() { return RET_OK; }


int AddNCPUKernel::ReSize() { return RET_OK; } int AddNCPUKernel::ReSize() { return RET_OK; }


@@ -58,6 +55,12 @@ int AddNCPUKernel::AddNParallelRun(int thread_id) {
} }


int AddNCPUKernel::Run() { int AddNCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
elements_num_ = inputs_[0]->ElementsNum();
auto input0_data = reinterpret_cast<float *>(inputs_[0]->Data()); auto input0_data = reinterpret_cast<float *>(inputs_[0]->Data());
auto input1_data = reinterpret_cast<float *>(inputs_[1]->Data()); auto input1_data = reinterpret_cast<float *>(inputs_[1]->Data());
auto output_data = reinterpret_cast<float *>(outputs_[0]->Data()); auto output_data = reinterpret_cast<float *>(outputs_[0]->Data());
@@ -71,7 +74,7 @@ int AddNCPUKernel::Run() {
in1_addr_ = input0_data; in1_addr_ = input0_data;
in2_addr_ = input1_data; in2_addr_ = input1_data;
out_addr_ = output_data; out_addr_ = output_data;
int ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_);
ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "addn launch fail!ret: " << ret; MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
return RET_ERROR; return RET_ERROR;
@@ -91,7 +94,7 @@ int AddNCPUKernel::Run() {
kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx, OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (op_parameter == nullptr) { if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!"; MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr; return nullptr;
@@ -102,7 +105,7 @@ kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Ten
} }
MS_ASSERT(desc.type == schema::PrimitiveType_AddN); MS_ASSERT(desc.type == schema::PrimitiveType_AddN);
op_parameter->thread_num_ = ctx->thread_num_; op_parameter->thread_num_ = ctx->thread_num_;
auto *kernel = new (std::nothrow) AddNCPUKernel(op_parameter, inputs, outputs);
auto *kernel = new (std::nothrow) AddNCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new AddNCPUKernel fail!"; MS_LOG(ERROR) << "new AddNCPUKernel fail!";
return nullptr; return nullptr;


+ 4
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/addn.h View File

@@ -21,18 +21,20 @@
#include "src/lite_kernel.h" #include "src/lite_kernel.h"
#include "schema/model_generated.h" #include "schema/model_generated.h"



namespace mindspore::kernel { namespace mindspore::kernel {
class AddNCPUKernel : public LiteKernel { class AddNCPUKernel : public LiteKernel {
public: public:
AddNCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, AddNCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs) : LiteKernel(parameter, inputs, outputs) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~AddNCPUKernel() = default; ~AddNCPUKernel() = default;


int Init() override; int Init() override;
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int AddNParallelRun(int thread_id); int AddNParallelRun(int thread_id);

private: private:
float *in1_addr_; float *in1_addr_;
float *in2_addr_; float *in2_addr_;


+ 6
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.cc View File

@@ -40,7 +40,12 @@ int ArgMinMaxCPUKernel::Init() {
} }


int ArgMinMaxCPUKernel::Run() { int ArgMinMaxCPUKernel::Run() {
auto ret = ArgMinMaxBaseCPUKernel::Run();
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
return ret;
}
ret = ArgMinMaxBaseCPUKernel::Run();
ArgMinMaxBaseCPUKernel::FreeTmpMemory(); ArgMinMaxBaseCPUKernel::FreeTmpMemory();
return ret; return ret;
} }


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.h View File

@@ -23,8 +23,9 @@ namespace mindspore::kernel {
class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel { class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel {
public: public:
ArgMinMaxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ArgMinMaxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}


~ArgMinMaxCPUKernel() = default; ~ArgMinMaxCPUKernel() = default;


@@ -35,4 +36,3 @@ class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel {
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARGMINMAX_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARGMINMAX_H_


+ 11
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc View File

@@ -41,6 +41,10 @@ ArithmeticCPUKernel::~ArithmeticCPUKernel() {
} }
} }
int ArithmeticCPUKernel::Init() { int ArithmeticCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto element_num = outputs_[0]->ElementsNum(); auto element_num = outputs_[0]->ElementsNum();


tile_data0_ = new float[element_num]; tile_data0_ = new float[element_num];
@@ -92,6 +96,11 @@ int ArithmeticsRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int ArithmeticCPUKernel::Run() { int ArithmeticCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
if (arithmeticParameter_->broadcasting_) { if (arithmeticParameter_->broadcasting_) {
auto input_data0 = reinterpret_cast<float *>(inputs_[0]->Data()); auto input_data0 = reinterpret_cast<float *>(inputs_[0]->Data());
auto input_data1 = reinterpret_cast<float *>(inputs_[1]->Data()); auto input_data1 = reinterpret_cast<float *>(inputs_[1]->Data());
@@ -108,9 +117,9 @@ int ArithmeticCPUKernel::Run() {
kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *parameter, const lite::Context *ctx, OpParameter *parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(parameter != nullptr); MS_ASSERT(parameter != nullptr);
auto kernel = new (std::nothrow) ArithmeticCPUKernel(parameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) ArithmeticCPUKernel(parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_; MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_;
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h View File

@@ -48,8 +48,9 @@ class ArithmeticCPUKernel : public LiteKernel {


public: public:
ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
arithmeticParameter_ = reinterpret_cast<ArithmeticParameter *>(parameter); arithmeticParameter_ = reinterpret_cast<ArithmeticParameter *>(parameter);
switch (parameter->type_) { switch (parameter->type_) {
case PrimitiveType_Mul: case PrimitiveType_Mul:


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.cc View File

@@ -261,12 +261,13 @@ int ArithmeticGradCPUKernel::Run() {
kernel::LiteKernel *CpuArithmeticGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuArithmeticGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_EXCEPTION_IF_NULL(opParameter); MS_EXCEPTION_IF_NULL(opParameter);
if (opParameter == nullptr) { if (opParameter == nullptr) {
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) ArithmeticGradCPUKernel(opParameter, inputs, outputs);
auto *kernel = new (std::nothrow) ArithmeticGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr); MS_ASSERT(kernel != nullptr);
auto ret = kernel->Init(); auto ret = kernel->Init();
if (ret != RET_OK) { if (ret != RET_OK) {


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.h View File

@@ -37,8 +37,9 @@ class ArithmeticGradCPUKernel : public LiteKernel {


public: public:
explicit ArithmeticGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, explicit ArithmeticGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) {
switch (type()) { switch (type()) {
case PrimitiveType_MulGrad: case PrimitiveType_MulGrad:
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul; // this will be adjusted in InferShape arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul; // this will be adjusted in InferShape


+ 13
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc View File

@@ -27,6 +27,10 @@ using mindspore::lite::RET_OK;


namespace mindspore::kernel { namespace mindspore::kernel {
int ArithmeticSelfCPUKernel::Init() { int ArithmeticSelfCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
int ret = ReSize(); int ret = ReSize();
return ret; return ret;
} }
@@ -68,11 +72,16 @@ int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) {
} }


int ArithmeticSelfCPUKernel::Run() { int ArithmeticSelfCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
auto input_tensor = inputs_.at(0); auto input_tensor = inputs_.at(0);
auto out_tensor = outputs_.at(0); auto out_tensor = outputs_.at(0);
in_ptr_ = reinterpret_cast<float *>(input_tensor->Data()); in_ptr_ = reinterpret_cast<float *>(input_tensor->Data());
out_ptr_ = reinterpret_cast<float *>(out_tensor->Data()); out_ptr_ = reinterpret_cast<float *>(out_tensor->Data());
int ret = LiteBackendParallelLaunch(ArithmeticSelfRuns, this, thread_sz_count_);
ret = LiteBackendParallelLaunch(ArithmeticSelfRuns, this, thread_sz_count_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
return ret; return ret;
@@ -83,13 +92,14 @@ int ArithmeticSelfCPUKernel::Run() {
kernel::LiteKernel *CpuArithmeticSelfFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuArithmeticSelfFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Creator failed, opParameter is nullptr!"; MS_LOG(ERROR) << "Creator failed, opParameter is nullptr!";
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) ArithmeticSelfCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ArithmeticSelfCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr); MS_ASSERT(kernel != nullptr);
auto ret = kernel->Init(); auto ret = kernel->Init();
if (ret != RET_OK) { if (ret != RET_OK) {


+ 4
- 5
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h View File

@@ -24,9 +24,9 @@
#include "schema/model_generated.h" #include "schema/model_generated.h"
#include "include/context.h" #include "include/context.h"



using mindspore::lite::Context; using mindspore::lite::Context;
using mindspore::schema::PrimitiveType_Abs; using mindspore::schema::PrimitiveType_Abs;
using mindspore::schema::PrimitiveType_Ceil;
using mindspore::schema::PrimitiveType_Cos; using mindspore::schema::PrimitiveType_Cos;
using mindspore::schema::PrimitiveType_Exp; using mindspore::schema::PrimitiveType_Exp;
using mindspore::schema::PrimitiveType_Floor; using mindspore::schema::PrimitiveType_Floor;
@@ -36,7 +36,6 @@ using mindspore::schema::PrimitiveType_Rsqrt;
using mindspore::schema::PrimitiveType_Sin; using mindspore::schema::PrimitiveType_Sin;
using mindspore::schema::PrimitiveType_Sqrt; using mindspore::schema::PrimitiveType_Sqrt;
using mindspore::schema::PrimitiveType_Square; using mindspore::schema::PrimitiveType_Square;
using mindspore::schema::PrimitiveType_Ceil;


namespace mindspore::kernel { namespace mindspore::kernel {
class ArithmeticSelfCPUKernel : public LiteKernel { class ArithmeticSelfCPUKernel : public LiteKernel {
@@ -44,8 +43,9 @@ class ArithmeticSelfCPUKernel : public LiteKernel {


public: public:
explicit ArithmeticSelfCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, explicit ArithmeticSelfCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
switch (parameter->type_) { switch (parameter->type_) {
case PrimitiveType_Abs: case PrimitiveType_Abs:
arithmeticSelf_run_ = ElementAbs; arithmeticSelf_run_ = ElementAbs;
@@ -106,4 +106,3 @@ class ArithmeticSelfCPUKernel : public LiteKernel {
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_SELF_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_SELF_H_


+ 5
- 0
mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.cc View File

@@ -28,6 +28,11 @@ int BatchToSpaceCPUKernel::Init() {
} }


int BatchToSpaceCPUKernel::Run() { int BatchToSpaceCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input = inputs_[0]; auto input = inputs_[0];
auto output = outputs_[0]; auto output = outputs_[0];
const float *input_data = reinterpret_cast<const float *>(input->Data()); const float *input_data = reinterpret_cast<const float *>(input->Data());


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.h View File

@@ -22,8 +22,9 @@ namespace mindspore::kernel {
class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel { class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel {
public: public:
BatchToSpaceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, BatchToSpaceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: BatchToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: BatchToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}


~BatchToSpaceCPUKernel() = default; ~BatchToSpaceCPUKernel() = default;


@@ -34,4 +35,3 @@ class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel {
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCH_TO_SPACE_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCH_TO_SPACE_H_


+ 7
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc View File

@@ -53,6 +53,11 @@ int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int BatchnormCPUKernel::Run() { int BatchnormCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
in_addr_ = reinterpret_cast<float *>(inputs_.at(0)->Data()); in_addr_ = reinterpret_cast<float *>(inputs_.at(0)->Data());
mean_addr_ = reinterpret_cast<float *>(inputs_.at(1)->Data()); mean_addr_ = reinterpret_cast<float *>(inputs_.at(1)->Data());
var_addr_ = reinterpret_cast<float *>(inputs_.at(2)->Data()); var_addr_ = reinterpret_cast<float *>(inputs_.at(2)->Data());
@@ -76,10 +81,10 @@ int BatchnormCPUKernel::Run() {
kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_BatchNorm); MS_ASSERT(desc.type == schema::PrimitiveType_BatchNorm);
auto *kernel = new (std::nothrow) BatchnormCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) BatchnormCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new BatchNormCPUKernel fail!"; MS_LOG(ERROR) << "new BatchNormCPUKernel fail!";
return nullptr; return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class BatchnormCPUKernel : public LiteKernel { class BatchnormCPUKernel : public LiteKernel {
public: public:
BatchnormCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, BatchnormCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter); batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter);
} }
~BatchnormCPUKernel() override { delete batchnorm_param_; } ~BatchnormCPUKernel() override { delete batchnorm_param_; }


+ 12
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc View File

@@ -31,6 +31,11 @@ namespace mindspore::kernel {
int BiasCPUKernel::ReSize() { return RET_OK; } int BiasCPUKernel::ReSize() { return RET_OK; }


int BiasCPUKernel::Run() { int BiasCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto in = reinterpret_cast<float *>(inputs_.at(0)->Data()); auto in = reinterpret_cast<float *>(inputs_.at(0)->Data());
auto bias = reinterpret_cast<float *>(inputs_.at(1)->Data()); auto bias = reinterpret_cast<float *>(inputs_.at(1)->Data());
auto out = reinterpret_cast<float *>(outputs_.at(0)->Data()); auto out = reinterpret_cast<float *>(outputs_.at(0)->Data());
@@ -44,6 +49,10 @@ int BiasCPUKernel::Run() {
} }


int BiasCPUKernel::Init() { int BiasCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto dims = inputs_[0]->shape(); auto dims = inputs_[0]->shape();
MS_ASSERT(dims.size() <= 5); MS_ASSERT(dims.size() <= 5);
bias_param_->ndim_ = dims.size(); bias_param_->ndim_ = dims.size();
@@ -58,10 +67,11 @@ int BiasCPUKernel::Init() {


kernel::LiteKernel *CpuBiasFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuBiasFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
const lite::Context *ctx, const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(parameter != nullptr); MS_ASSERT(parameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_BiasAdd); MS_ASSERT(desc.type == schema::PrimitiveType_BiasAdd);
auto kernel = new (std::nothrow) BiasCPUKernel(parameter, inputs, outputs);
auto kernel = new (std::nothrow) BiasCPUKernel(parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_; MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_;
return nullptr; return nullptr;


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/fp32/bias.h View File

@@ -24,9 +24,10 @@ namespace mindspore::kernel {
class BiasCPUKernel : public LiteKernel { class BiasCPUKernel : public LiteKernel {
public: public:
BiasCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, BiasCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs) {
bias_param_ = reinterpret_cast<ArithmeticParameter*>(parameter);
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
bias_param_ = reinterpret_cast<ArithmeticParameter *>(parameter);
} }
~BiasCPUKernel() override = default; ~BiasCPUKernel() override = default;


@@ -40,4 +41,3 @@ class BiasCPUKernel : public LiteKernel {
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_H_


+ 9
- 6
mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.cc View File

@@ -20,12 +20,11 @@
#include "src/kernel_registry.h" #include "src/kernel_registry.h"
#include "include/errorcode.h" #include "include/errorcode.h"



using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar; using mindspore::lite::KernelRegistrar;
using mindspore::schema::PrimitiveType_BiasGrad;
using mindspore::lite::RET_ERROR; using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK; using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_BiasGrad;


namespace mindspore::kernel { namespace mindspore::kernel {
int BiasGradCPUKernel::InferShape() { int BiasGradCPUKernel::InferShape() {
@@ -68,10 +67,14 @@ int BiasGradCPUKernel::Init() {
return RET_OK; return RET_OK;
} }



int BiasGradCPUKernel::ReSize() { return 0; } int BiasGradCPUKernel::ReSize() { return 0; }


int BiasGradCPUKernel::Run() { int BiasGradCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
auto in = reinterpret_cast<float *>(inputs_.at(0)->Data()); auto in = reinterpret_cast<float *>(inputs_.at(0)->Data());
auto out = reinterpret_cast<float *>(outputs_.at(0)->Data()); auto out = reinterpret_cast<float *>(outputs_.at(0)->Data());
// size_t data_size = inputs_.at(0)->ElementsNum(); // size_t data_size = inputs_.at(0)->ElementsNum();
@@ -91,14 +94,14 @@ int BiasGradCPUKernel::Run() {
return RET_OK; return RET_OK;
} }



kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_BiasGrad); MS_ASSERT(desc.type == schema::PrimitiveType_BiasGrad);
auto *kernel = new (std::nothrow) BiasGradCPUKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
auto *kernel =
new (std::nothrow) BiasGradCPUKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr); MS_ASSERT(kernel != nullptr);


auto ret = kernel->Init(); auto ret = kernel->Init();


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class BiasGradCPUKernel : public LiteKernel { class BiasGradCPUKernel : public LiteKernel {
public: public:
explicit BiasGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, explicit BiasGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
bias_param = reinterpret_cast<ArithmeticParameter *>(parameter); bias_param = reinterpret_cast<ArithmeticParameter *>(parameter);
} }
~BiasGradCPUKernel() override = default; ~BiasGradCPUKernel() override = default;


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.cc View File

@@ -96,12 +96,12 @@ int BNGradInputCPUKernel::Run() {
kernel::LiteKernel *CpuBNGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuBNGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_BNGradInput); MS_ASSERT(desc.type == schema::PrimitiveType_BNGradInput);
// parameter->name = opDef.name()->str().data(); // parameter->name = opDef.name()->str().data();
// parameter->type = opDef.attr_type(); // parameter->type = opDef.attr_type();
auto *kernel = new (std::nothrow) BNGradInputCPUKernel(opParameter, inputs, outputs);
auto *kernel = new (std::nothrow) BNGradInputCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr); MS_ASSERT(kernel != nullptr);
auto ret = kernel->Init(); auto ret = kernel->Init();
if (RET_OK != ret) { if (RET_OK != ret) {


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class BNGradInputCPUKernel : public LiteKernel { class BNGradInputCPUKernel : public LiteKernel {
public: public:
explicit BNGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, explicit BNGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~BNGradInputCPUKernel() override { delete workspace; } ~BNGradInputCPUKernel() override { delete workspace; }


int Init() override; int Init() override;


+ 11
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc View File

@@ -27,6 +27,10 @@ using mindspore::schema::PrimitiveType_BroadcastTo;
namespace mindspore::kernel { namespace mindspore::kernel {


int BroadcastToCPUKernel::Init() { int BroadcastToCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto input_shape = inputs_[0]->shape(); auto input_shape = inputs_[0]->shape();
for (size_t i = 0; i < input_shape.size(); ++i) { for (size_t i = 0; i < input_shape.size(); ++i) {
shape_info_.input_shape_[i] = input_shape[i]; shape_info_.input_shape_[i] = input_shape[i];
@@ -42,6 +46,11 @@ int BroadcastToCPUKernel::Init() {
} }


int BroadcastToCPUKernel::Run() { int BroadcastToCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input_data = reinterpret_cast<float *>(inputs_.at(0)->Data()); auto input_data = reinterpret_cast<float *>(inputs_.at(0)->Data());
auto output_data = reinterpret_cast<float *>(outputs_.at(0)->Data()); auto output_data = reinterpret_cast<float *>(outputs_.at(0)->Data());


@@ -51,13 +60,13 @@ int BroadcastToCPUKernel::Run() {
kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx, OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (op_parameter == nullptr) { if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!"; MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr; return nullptr;
} }
MS_ASSERT(desc.type == schema::PrimitiveType_BroadcastTo); MS_ASSERT(desc.type == schema::PrimitiveType_BroadcastTo);
auto *kernel = new (std::nothrow) BroadcastToCPUKernel(op_parameter, inputs, outputs);
auto *kernel = new (std::nothrow) BroadcastToCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new BroadcastToCPUKernel fail!"; MS_LOG(ERROR) << "new BroadcastToCPUKernel fail!";
return nullptr; return nullptr;


+ 5
- 5
mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.h View File

@@ -25,18 +25,18 @@ namespace mindspore::kernel {
class BroadcastToCPUKernel : public LiteKernel { class BroadcastToCPUKernel : public LiteKernel {
public: public:
BroadcastToCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, BroadcastToCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs) : LiteKernel(parameter, inputs, outputs) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~BroadcastToCPUKernel() = default; ~BroadcastToCPUKernel() = default;


int Init() override; int Init() override;
int ReSize() override {
return 0;
}
int ReSize() override { return 0; }
int Run() override; int Run() override;

private: private:
BroadcastShapeInfo shape_info_; BroadcastShapeInfo shape_info_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BROADCAST_TO_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BROADCAST_TO_H_


+ 14
- 8
mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc View File

@@ -30,9 +30,6 @@ using mindspore::schema::PrimitiveType_Cast;


namespace mindspore::kernel { namespace mindspore::kernel {
namespace { namespace {
constexpr int kInputNum = 1;
constexpr int kOutputNum = 1;
const std::vector<int> kSupportInputDataType = {kNumberTypeUInt8, kNumberTypeInt32};
int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
if (cdata == nullptr) { if (cdata == nullptr) {
MS_LOG(ERROR) << "input cdata is nullptr!"; MS_LOG(ERROR) << "input cdata is nullptr!";
@@ -44,12 +41,16 @@ int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
} // namespace } // namespace


int CastCPUKernel::Init() { int CastCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
data_num_ = inputs_[0]->ElementsNum(); data_num_ = inputs_[0]->ElementsNum();
if (data_num_ == 0) { if (data_num_ == 0) {
return RET_OK; return RET_OK;
} }
thread_num_ = MSMIN(thread_num_, data_num_);
stride_ = UP_DIV(data_num_, thread_num_);
opParameter->thread_num_ = MSMIN(opParameter->thread_num_, data_num_);
stride_ = UP_DIV(data_num_, opParameter->thread_num_);
return RET_OK; return RET_OK;
} }


@@ -77,16 +78,21 @@ int CastCPUKernel::DoCast(int thread_id) {
} }


int CastCPUKernel::Run() { int CastCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
if (data_num_ == 0) { if (data_num_ == 0) {
return RET_OK; return RET_OK;
} }
return LiteBackendParallelLaunch(CastRun, this, thread_num_);
return LiteBackendParallelLaunch(CastRun, this, opParameter->thread_num_);
} }


kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) { if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!"; MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr; return nullptr;
@@ -99,7 +105,7 @@ kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Ten
MS_LOG(ERROR) << "context thread num is 0!"; MS_LOG(ERROR) << "context thread num is 0!";
return nullptr; return nullptr;
} }
auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "new CastCPUKernel fail!"; MS_LOG(ERROR) << "new CastCPUKernel fail!";
return nullptr; return nullptr;


+ 6
- 10
mindspore/lite/src/runtime/kernel/arm/fp32/cast.h View File

@@ -23,27 +23,23 @@ namespace mindspore::kernel {
class CastCPUKernel : public LiteKernel { class CastCPUKernel : public LiteKernel {
public: public:
CastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, CastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs) {
if (ctx != nullptr) {
thread_num_ = ctx->thread_num_;
}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
opParameter->thread_num_ = ctx->thread_num_;
} }


~CastCPUKernel() = default; ~CastCPUKernel() = default;


int Init() override; int Init() override;
int ReSize() override {
return 0;
};
int ReSize() override { return 0; };
int Run() override; int Run() override;
int DoCast(int thread_id); int DoCast(int thread_id);

private: private:
uint32_t thread_num_;
uint32_t stride_; uint32_t stride_;
uint32_t data_num_; uint32_t data_num_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CAST_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CAST_H_


+ 45
- 35
mindspore/lite/src/runtime/kernel/arm/fp32/concat.cc View File

@@ -28,44 +28,54 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Concat; using mindspore::schema::PrimitiveType_Concat;


namespace mindspore::kernel { namespace mindspore::kernel {
int ConcatCPUKernel::Init() {
ConcatBaseCPUKernel::Init();
schema::Format input0_format = inputs_[0]->GetFormat();
bool need_convert_format = false;
for (size_t i = 1; i < inputs_.size(); ++i) {
if (inputs_[i]->GetFormat() != input0_format) {
need_convert_format = true;
}
}
if (!need_convert_format) {
outputs_[0]->SetFormat(input0_format);
return RET_OK;
}
MS_LOG(ERROR) << "All input format should be the same!";
return RET_ERROR;
int ConcatCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConcatBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
schema::Format input0_format = inputs_[0]->GetFormat();
bool need_convert_format = false;
for (size_t i = 1; i < inputs_.size(); ++i) {
if (inputs_[i]->GetFormat() != input0_format) {
need_convert_format = true;
} }
}
if (!need_convert_format) {
outputs_[0]->SetFormat(input0_format);
return RET_OK;
}
MS_LOG(ERROR) << "All input format should be the same!";
return RET_ERROR;
}


int ConcatCPUKernel::ReSize() { return RET_OK; }
int ConcatCPUKernel::ReSize() { return RET_OK; }


int ConcatCPUKernel::Run() {
auto input_num = inputs_.size();
std::vector<void *> inputs_addr(input_num, nullptr);
std::vector<int *> inputs_output_shape(input_num + 1, nullptr);
int ConcatCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input_num = inputs_.size();
std::vector<void *> inputs_addr(input_num, nullptr);
std::vector<int *> inputs_output_shape(input_num + 1, nullptr);


std::vector <std::vector<int>> shapes;
for (size_t i = 0; i < input_num; ++i) {
inputs_addr[i] = inputs_[i]->Data();
shapes.push_back(inputs_[i]->shape());
inputs_output_shape[i] = shapes[i].data();
}
auto output_shape = outputs_.at(0)->shape();
inputs_output_shape[input_num] = output_shape.data();
auto output_addr = outputs_.at(0)->Data();
std::vector<std::vector<int>> shapes;
for (size_t i = 0; i < input_num; ++i) {
inputs_addr[i] = inputs_[i]->Data();
shapes.push_back(inputs_[i]->shape());
inputs_output_shape[i] = shapes[i].data();
}
auto output_shape = outputs_.at(0)->shape();
inputs_output_shape[input_num] = output_shape.data();
auto output_addr = outputs_.at(0)->Data();


Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(),
output_shape.size(), output_addr);
return RET_OK;
}
Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(),
output_shape.size(), output_addr);
return RET_OK;
}
} // namespace mindspore::kernel } // namespace mindspore::kernel



+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/concat.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class ConcatCPUKernel : public ConcatBaseCPUKernel { class ConcatCPUKernel : public ConcatBaseCPUKernel {
public: public:
ConcatCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ConcatCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConcatBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}


~ConcatCPUKernel() = default; ~ConcatCPUKernel() = default;


@@ -42,4 +43,3 @@ class ConcatCPUKernel : public ConcatBaseCPUKernel {
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONCAT_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONCAT_H_


+ 17
- 6
mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc View File

@@ -29,6 +29,7 @@ using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar; using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR; using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK; using mindspore::lite::RET_OK;
using mindspore::lite::RET_INFER_INVALID;
using mindspore::schema::PrimitiveType_Conv2D; using mindspore::schema::PrimitiveType_Conv2D;


namespace mindspore::kernel { namespace mindspore::kernel {
@@ -136,6 +137,10 @@ void ConvolutionCPUKernel::ConfigInputOutput() {
} }


int ConvolutionCPUKernel::Init() { int ConvolutionCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConvolutionBaseCPUKernel::Init(); auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed."; MS_LOG(ERROR) << "ConvolutionBase init failed.";
@@ -204,6 +209,11 @@ int ConvolutionImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int ConvolutionCPUKernel::Run() { int ConvolutionCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input_tensor = inputs_.at(kInputIndex); auto input_tensor = inputs_.at(kInputIndex);
auto ori_input_data = input_tensor->Data(); auto ori_input_data = input_tensor->Data();
int in_batch = conv_param_->input_batch_; int in_batch = conv_param_->input_batch_;
@@ -223,7 +233,7 @@ int ConvolutionCPUKernel::Run() {
kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D);
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter); auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
@@ -245,20 +255,21 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Ten
kernel::LiteKernel *kernel; kernel::LiteKernel *kernel;
if (kernel_h == 1 && kernel_w == 1) { if (kernel_h == 1 && kernel_w == 1) {
// kernel = new (std::nothrow) kernel::Convolution1x1CPUKernel(opParameter, inputs, outputs, ctx); // kernel = new (std::nothrow) kernel::Convolution1x1CPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx, primitive);
} else if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) { } else if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) {
kernel = new (std::nothrow) kernel::Convolution3x3CPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::Convolution3x3CPUKernel(opParameter, inputs, outputs, ctx, primitive);
} else if (use_winograd) { } else if (use_winograd) {
kernel = new (std::nothrow) kernel::ConvolutionWinogradCPUKernel(opParameter, inputs, outputs, ctx, out_unit);
kernel =
new (std::nothrow) kernel::ConvolutionWinogradCPUKernel(opParameter, inputs, outputs, ctx, primitive, out_unit);
} else { } else {
kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx, primitive);
} }
if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr."; MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr; return nullptr;
} }
auto ret = kernel->Init(); auto ret = kernel->Init();
if (ret != RET_OK) {
if (ret != RET_OK && ret != RET_INFER_INVALID) {
delete kernel; delete kernel;
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
public: public:
ConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionCPUKernel() override { ~ConvolutionCPUKernel() override {
if (packed_input_ != nullptr) { if (packed_input_ != nullptr) {
free(packed_input_); free(packed_input_);


+ 9
- 0
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc View File

@@ -136,6 +136,10 @@ void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) {
} }


int Convolution1x1CPUKernel::Init() { int Convolution1x1CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
ConvolutionBaseCPUKernel::Init(); ConvolutionBaseCPUKernel::Init();
InitConv1x1MatmulParam(); InitConv1x1MatmulParam();


@@ -178,6 +182,11 @@ int Convolution1x1Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int Convolution1x1CPUKernel::Run() { int Convolution1x1CPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto src_in = reinterpret_cast<float *>(inputs_[0]->Data()); auto src_in = reinterpret_cast<float *>(inputs_[0]->Data());
auto src_out = reinterpret_cast<float *>(outputs_[0]->Data()); auto src_out = reinterpret_cast<float *>(outputs_[0]->Data());




+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h View File

@@ -34,8 +34,9 @@ namespace mindspore::kernel {
class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
public: public:
Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {
matmul_param_ = new MatMulParameter(); matmul_param_ = new MatMulParameter();
} }
~Convolution1x1CPUKernel(); ~Convolution1x1CPUKernel();


+ 9
- 0
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc View File

@@ -166,6 +166,10 @@ void Convolution3x3CPUKernel::ConfigInputOutput() {
} }


int Convolution3x3CPUKernel::Init() { int Convolution3x3CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConvolutionBaseCPUKernel::Init(); auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed."; MS_LOG(ERROR) << "ConvolutionBase init failed.";
@@ -237,6 +241,11 @@ int Convolution3x3Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int Convolution3x3CPUKernel::Run() { int Convolution3x3CPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input_tensor = inputs_.at(kInputIndex); auto input_tensor = inputs_.at(kInputIndex);
auto ori_input_data = input_tensor->Data(); auto ori_input_data = input_tensor->Data();
int in_batch = conv_param_->input_batch_; int in_batch = conv_param_->input_batch_;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class Convolution3x3CPUKernel : public ConvolutionBaseCPUKernel { class Convolution3x3CPUKernel : public ConvolutionBaseCPUKernel {
public: public:
Convolution3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, Convolution3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~Convolution3x3CPUKernel() override { ~Convolution3x3CPUKernel() override {
if (transformed_filter_addr_ != nullptr) { if (transformed_filter_addr_ != nullptr) {
free(transformed_filter_addr_); free(transformed_filter_addr_);


+ 15
- 4
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc View File

@@ -25,6 +25,7 @@ using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar; using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR; using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK; using mindspore::lite::RET_OK;
using mindspore::lite::RET_INFER_INVALID;
using mindspore::schema::PrimitiveType_DepthwiseConv2D; using mindspore::schema::PrimitiveType_DepthwiseConv2D;


namespace mindspore::kernel { namespace mindspore::kernel {
@@ -86,6 +87,10 @@ int ConvolutionDepthwiseCPUKernel::InitBuffer() {
} }


int ConvolutionDepthwiseCPUKernel::Init() { int ConvolutionDepthwiseCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
// conv base init // conv base init
ConvolutionBaseCPUKernel::Init(); ConvolutionBaseCPUKernel::Init();


@@ -144,6 +149,11 @@ int ConvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int ConvolutionDepthwiseCPUKernel::Run() { int ConvolutionDepthwiseCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) { if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel."; MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR; return RET_ERROR;
@@ -164,7 +174,7 @@ int ConvolutionDepthwiseCPUKernel::Run() {
packed_output_ = output_addr; packed_output_ = output_addr;
} }


auto ret = LiteBackendParallelLaunch(ConvDwRun, this, conv_param_->thread_num_);
ret = LiteBackendParallelLaunch(ConvDwRun, this, conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]"; MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;
@@ -180,11 +190,11 @@ int ConvolutionDepthwiseCPUKernel::Run() {
kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx, OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D); MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D);
kernel::LiteKernel *kernel; kernel::LiteKernel *kernel;
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx, primitive);
// auto param = reinterpret_cast<ConvParameter *>(opParameter); // auto param = reinterpret_cast<ConvParameter *>(opParameter);
// if (param->kernel_h_ == 3 && param->kernel_w_ == 3 && param->stride_h_ == 1 && param->stride_w_ == 1 && // if (param->kernel_h_ == 3 && param->kernel_w_ == 3 && param->stride_h_ == 1 && param->stride_w_ == 1 &&
// param->dilation_h_ == 1 && param->dilation_w_ == 1) { // param->dilation_h_ == 1 && param->dilation_w_ == 1) {
@@ -192,12 +202,13 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::T
// } else { // } else {
// kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx); // kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx);
// } // }

if (kernel == nullptr) { if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr."; MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr; return nullptr;
} }
auto ret = kernel->Init(); auto ret = kernel->Init();
if (ret != RET_OK) {
if (ret != RET_OK && ret != RET_INFER_INVALID) {
delete kernel; delete kernel;
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
public: public:
ConvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ConvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionDepthwiseCPUKernel() override { ~ConvolutionDepthwiseCPUKernel() override {
delete sliding_; delete sliding_;
free(packed_weight_); free(packed_weight_);
@@ -55,4 +56,3 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
} // namespace mindspore::kernel } // namespace mindspore::kernel


#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_DEPTHWISE_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_DEPTHWISE_H_


+ 10
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.cc View File

@@ -100,6 +100,10 @@ int ConvolutionDepthwise3x3CPUKernel::InitBuffer() {
} }


int ConvolutionDepthwise3x3CPUKernel::Init() { int ConvolutionDepthwise3x3CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
// conv base init // conv base init
ConvolutionBaseCPUKernel::Init(); ConvolutionBaseCPUKernel::Init();


@@ -164,6 +168,11 @@ int ConvDw3x3Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }


int ConvolutionDepthwise3x3CPUKernel::Run() { int ConvolutionDepthwise3x3CPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) { if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel."; MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR; return RET_ERROR;
@@ -184,7 +193,7 @@ int ConvolutionDepthwise3x3CPUKernel::Run() {
packed_output_ = output_addr; packed_output_ = output_addr;
} }


auto ret = LiteBackendParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_);
ret = LiteBackendParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]"; MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel {
public: public:
ConvolutionDepthwise3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ConvolutionDepthwise3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}


~ConvolutionDepthwise3x3CPUKernel() override { ~ConvolutionDepthwise3x3CPUKernel() override {
free(packed_weight_); free(packed_weight_);


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.cc View File

@@ -135,11 +135,12 @@ int ConvolutionGradFilterCPUKernel::Run() {
kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradFilter); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradFilter);


auto *kernel = new (std::nothrow) ConvolutionGradFilterCPUKernel(opParameter, inputs, outputs);
auto *kernel = new (std::nothrow) ConvolutionGradFilterCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr); MS_ASSERT(kernel != nullptr);


auto ret = kernel->Init(); auto ret = kernel->Init();


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class ConvolutionGradFilterCPUKernel : public LiteKernel { class ConvolutionGradFilterCPUKernel : public LiteKernel {
public: public:
explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionGradFilterCPUKernel() override { delete workspace; } ~ConvolutionGradFilterCPUKernel() override { delete workspace; }


int Init() override; int Init() override;


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.cc View File

@@ -23,9 +23,9 @@


using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar; using mindspore::lite::KernelRegistrar;
using mindspore::schema::PrimitiveType_Conv2DGradInput;
using mindspore::lite::RET_ERROR; using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK; using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Conv2DGradInput;


namespace mindspore::kernel { namespace mindspore::kernel {
int ConvolutionGradInputCPUKernel::Init() { int ConvolutionGradInputCPUKernel::Init() {
@@ -115,11 +115,11 @@ int ConvolutionGradInputCPUKernel::Run() {
kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx, OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr); MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradInput); MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradInput);


auto *kernel = new (std::nothrow) ConvolutionGradInputCPUKernel(opParameter, inputs, outputs);
auto *kernel = new (std::nothrow) ConvolutionGradInputCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr); MS_ASSERT(kernel != nullptr);


auto ret = kernel->Init(); auto ret = kernel->Init();


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class ConvolutionGradInputCPUKernel : public LiteKernel { class ConvolutionGradInputCPUKernel : public LiteKernel {
public: public:
explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionGradInputCPUKernel() override { delete workspace; } ~ConvolutionGradInputCPUKernel() override { delete workspace; }


int Init() override; int Init() override;


+ 9
- 0
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc View File

@@ -247,6 +247,10 @@ int ConvolutionWinogradCPUKernel::ConfigInputOutput() {
} }


int ConvolutionWinogradCPUKernel::Init() { int ConvolutionWinogradCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConvolutionBaseCPUKernel::Init(); auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed."; MS_LOG(ERROR) << "ConvolutionBase init failed.";
@@ -339,6 +343,11 @@ int ConvolutionWinogradImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata
} }


int ConvolutionWinogradCPUKernel::Run() { int ConvolutionWinogradCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input_tensor = inputs_.at(kInputIndex); auto input_tensor = inputs_.at(kInputIndex);
auto ori_input_data = input_tensor->Data(); auto ori_input_data = input_tensor->Data();
int in_batch = conv_param_->input_batch_; int in_batch = conv_param_->input_batch_;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
public: public:
ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, int output_unit)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), output_unit_(output_unit) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive, int output_unit)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(output_unit) {}
~ConvolutionWinogradCPUKernel() override { ~ConvolutionWinogradCPUKernel() override {
if (tmp_data_ != nullptr) { if (tmp_data_ != nullptr) {
free(tmp_data_); free(tmp_data_);


+ 7
- 10
mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc View File

@@ -40,15 +40,7 @@ int CropLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
} }
} // namespace } // namespace


int CropCPUKernel::Init() {
schema::Format input0_format = inputs_[0]->GetFormat();
if (input0_format != schema::Format_NCHW && input0_format != schema::Format_NHWC) {
MS_LOG(ERROR) << "Unsupport format " << input0_format;
return RET_FORMAT_ERR;
}
outputs_[0]->SetFormat(input0_format);
return RET_OK;
}
int CropCPUKernel::Init() { return RET_OK; }


int CropCPUKernel::CropParallelRun(int thread_id) { int CropCPUKernel::CropParallelRun(int thread_id) {
auto input = inputs_[0]; auto input = inputs_[0];
@@ -61,6 +53,11 @@ int CropCPUKernel::CropParallelRun(int thread_id) {
} }


int CropCPUKernel::Run() { int CropCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input = inputs_[0]; auto input = inputs_[0];
auto output = outputs_[0]; auto output = outputs_[0];
auto param = reinterpret_cast<CropParameter *>(opParameter); auto param = reinterpret_cast<CropParameter *>(opParameter);
@@ -71,7 +68,7 @@ int CropCPUKernel::Run() {
return RET_OK; return RET_OK;
} }


int ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_);
auto ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;
return RET_ERROR; return RET_ERROR;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/crop.h View File

@@ -24,8 +24,9 @@ namespace mindspore::kernel {
class CropCPUKernel : public CropBaseCPUKernel { class CropCPUKernel : public CropBaseCPUKernel {
public: public:
CropCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, CropCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: CropBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: CropBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~CropCPUKernel() = default; ~CropCPUKernel() = default;
int Init() override; int Init() override;
int ReSize() override { return 0; } int ReSize() override { return 0; }


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save