Browse Source

support infershape when running graph

tags/v0.7.0-beta
chenjianping 5 years ago
parent
commit
f2d97520ab
100 changed files with 631 additions and 353 deletions
  1. +6
    -0
      mindspore/lite/include/context.h
  2. +4
    -1
      mindspore/lite/include/errorcode.h
  3. +0
    -5
      mindspore/lite/src/executor.cc
  4. +1
    -1
      mindspore/lite/src/kernel_factory.cc
  5. +0
    -1
      mindspore/lite/src/kernel_registry.h
  6. +32
    -6
      mindspore/lite/src/lite_kernel.h
  7. +1
    -0
      mindspore/lite/src/lite_session.cc
  8. +1
    -1
      mindspore/lite/src/ops/cast.cc
  9. +3
    -0
      mindspore/lite/src/ops/ops.h
  10. +10
    -5
      mindspore/lite/src/ops/reshape.cc
  11. +13
    -8
      mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc
  12. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h
  13. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.cc
  14. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.h
  15. +15
    -13
      mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc
  16. +4
    -3
      mindspore/lite/src/runtime/kernel/arm/base/concat_base.h
  17. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
  18. +12
    -12
      mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc
  19. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/base/crop_base.h
  20. +9
    -5
      mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc
  21. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.h
  22. +6
    -4
      mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc
  23. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h
  24. +4
    -3
      mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc
  25. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h
  26. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/base/pad.cc
  27. +8
    -4
      mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc
  28. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/pooling_base.h
  29. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/base/prelu_base.cc
  30. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/prelu_base.h
  31. +7
    -2
      mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc
  32. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/prior_box.h
  33. +9
    -4
      mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
  34. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.h
  35. +6
    -6
      mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
  36. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
  37. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
  38. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h
  39. +6
    -6
      mindspore/lite/src/runtime/kernel/arm/base/split_base.cc
  40. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/split_base.h
  41. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc
  42. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
  43. +9
    -3
      mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
  44. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h
  45. +11
    -3
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc
  46. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.h
  47. +17
    -6
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
  48. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
  49. +15
    -7
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
  50. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
  51. +17
    -5
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
  52. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
  53. +8
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc
  54. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/activation.h
  55. +10
    -10
      mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.cc
  56. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.h
  57. +11
    -8
      mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc
  58. +4
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/addn.h
  59. +6
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.cc
  60. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.h
  61. +11
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc
  62. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h
  63. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.cc
  64. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.h
  65. +13
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc
  66. +4
    -5
      mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h
  67. +5
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.cc
  68. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.h
  69. +7
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
  70. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
  71. +12
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc
  72. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32/bias.h
  73. +9
    -6
      mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.cc
  74. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.h
  75. +2
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.cc
  76. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.h
  77. +11
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc
  78. +5
    -5
      mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.h
  79. +14
    -8
      mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc
  80. +6
    -10
      mindspore/lite/src/runtime/kernel/arm/fp32/cast.h
  81. +45
    -35
      mindspore/lite/src/runtime/kernel/arm/fp32/concat.cc
  82. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/concat.h
  83. +17
    -6
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc
  84. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution.h
  85. +9
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc
  86. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h
  87. +9
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc
  88. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.h
  89. +15
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc
  90. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
  91. +10
    -1
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.cc
  92. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h
  93. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.cc
  94. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.h
  95. +3
    -3
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.cc
  96. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.h
  97. +9
    -0
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc
  98. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h
  99. +7
    -10
      mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc
  100. +3
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/crop.h

+ 6
- 0
mindspore/lite/include/context.h View File

@@ -64,11 +64,17 @@ class MS_API Context {
/// \brief Destructor of MindSpore Lite Context.
virtual ~Context();

void InferShapeInterrupt() {
infer_shape_interrupt_ = true;
}

public:
DeviceContext device_ctx_{DT_CPU};
int thread_num_ = 2; /**< thread number config for thread pool */
std::shared_ptr<Allocator> allocator = nullptr;
CpuBindMode cpu_bind_mode_ = MID_CPU;
bool infer_shape_interrupt_ = false;
bool running_ = false;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_INCLUDE_CONTEXT_H_

+ 4
- 1
mindspore/lite/include/errorcode.h View File

@@ -48,8 +48,11 @@ constexpr int RET_OP_EXECUTE_FAILURE = -304; /**< Failed to execution operator.

/* Tensor error code, range: [-401,-500] */
constexpr int RET_FORMAT_ERR = -401; /**< Failed to checking tensor format. */

/* InferShape error code, range: [-501,-600] */
constexpr int RET_INFER_ERR = -501; /**< Failed to infer shape. */
constexpr int RET_INFER_INVALID = -502; /**< Invalid to infer shape before runtime. */
} // namespace lite
} // namespace mindspore

#endif // MINDSPORE_LITE_INCLUDE_ERRORCODE_H_


+ 0
- 5
mindspore/lite/src/executor.cc View File

@@ -37,11 +37,6 @@ int Executor::Run(std::vector<tensor::Tensor *> &inputs, std::vector<tensor::Ten
kernel::LiteKernelUtil::InitTensorRefCount(kernels);
for (auto *kernel : kernels) {
MS_ASSERT(nullptr != kernel);
auto &outputs = kernel->GetOutputs();
for (auto *output : outputs) {
MS_ASSERT(nullptr != output);
output->MallocData();
}
session::CallBackParam callbackParam;
callbackParam.name_callback_param = kernel->Name();
callbackParam.type_callback_param = kernel->type_str();


+ 1
- 1
mindspore/lite/src/kernel_factory.cc View File

@@ -45,7 +45,7 @@ LiteKernel *KernelFactory::GetKernel(const std::vector<tensor::Tensor *> &inputs
}
auto creator = KernelRegistry::GetInstance()->GetCreator(key);
if (creator != nullptr) {
auto kernel = creator(inputs, outputs, parameter, ctx, key);
auto kernel = creator(inputs, outputs, parameter, ctx, key, primitive);
return kernel;
}
return nullptr;


+ 0
- 1
mindspore/lite/src/kernel_registry.h View File

@@ -45,7 +45,6 @@ class KernelRegistry {
int device_type_length_;
int data_type_length_;
int op_type_length_;
std::mutex lock_;
};

class KernelRegistrar {


+ 32
- 6
mindspore/lite/src/lite_kernel.h View File

@@ -25,6 +25,7 @@
#include "include/context.h"
#include "src/ir/tensor.h"
#include "src/ops/ops.h"
#include "include/errorcode.h"

#ifdef ENABLE_FP16
using FLOAT_t = float16_t;
@@ -34,6 +35,8 @@ using FLOAT_t = float;

// using mindspore::kernel::AddressPtr;
namespace mindspore::kernel {
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
enum KERNEL_ARCH { kCPU, kGPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU };
struct KernelKey {
KERNEL_ARCH arch;
@@ -55,15 +58,30 @@ class LiteKernel {
public:
LiteKernel() = default;
explicit LiteKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: opParameter(parameter), inputs_(inputs), outputs_(outputs), train_mode(false) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: opParameter(parameter), inputs_(inputs), outputs_(outputs), train_mode(false), primitive_(primitive),
context_(ctx) {
this->in_kernel_.clear();
this->out_kernel_.clear();
}

virtual ~LiteKernel() { delete opParameter; }

virtual int Prepare() { return -1; }
virtual int Prepare() {
if (primitive_ != nullptr && !primitive_->GetInferFlag()) {
(const_cast<lite::Primitive *>(primitive_))->InferShape(inputs_, outputs_);
}
if (need_reinit) {
Init();
}
auto &outputs = this->GetOutputs();
for (auto *output : outputs) {
MS_ASSERT(output != nullptr);
output->MallocData();
}
return RET_OK;
}
virtual int Init() { return -1; }
virtual int ReSize() { return -1; }
virtual int Run() { return -1; }
@@ -103,16 +121,23 @@ class LiteKernel {

void set_desc(const KernelKey kernel_key) { desc = kernel_key; }

void SetNeedReInit() {
need_reinit = true;
}

protected:
KernelKey desc;
std::string name;
OpParameter *opParameter = nullptr;
const lite::Primitive *primitive_;
const lite::Context *context_;
// tensor will free in ~lite_session()
std::vector<lite::tensor::Tensor *> inputs_;
std::vector<lite::tensor::Tensor *> outputs_;
std::vector<LiteKernel *> in_kernel_;
std::vector<LiteKernel *> out_kernel_;
bool train_mode;
bool need_reinit = false;
};

class SubGraphKernel : public LiteKernel {
@@ -121,8 +146,9 @@ class SubGraphKernel : public LiteKernel {
const std::vector<lite::tensor::Tensor *> &outputs,
const std::vector<kernel::LiteKernel *> &inKernels,
const std::vector<kernel::LiteKernel *> &outKernels,
const std::vector<kernel::LiteKernel *> &nodes)
: LiteKernel(nullptr, inputs, outputs),
const std::vector<kernel::LiteKernel *> &nodes, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(nullptr, inputs, outputs, ctx, primitive),
inputs_(inputs),
outputs_(outputs),
inkernels_(inKernels),
@@ -144,7 +170,7 @@ class SubGraphKernel : public LiteKernel {

typedef LiteKernel *(*KernelCreator)(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter,
const lite::Context *ctx, const KernelKey &desc);
const lite::Context *ctx, const KernelKey &desc, const lite::Primitive *primitive);

class LiteKernelUtil {
public:


+ 1
- 0
mindspore/lite/src/lite_session.cc View File

@@ -168,6 +168,7 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() const {
int LiteSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) {
MS_EXCEPTION_IF_NULL(this->context_);
SetMaxWokerNum(context_->thread_num_);
context_->running_ = true;
Executor executor;
if (before == nullptr && after == nullptr) {
return executor.Run(this->inputs, this->outputs, this->kernels, this->context_->allocator.get());


+ 1
- 1
mindspore/lite/src/ops/cast.cc View File

@@ -40,7 +40,7 @@ int Cast::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::
MS_LOG(ERROR) << "Unsupport input data type " << input->data_type();
return RET_INPUT_TENSOR_ERROR;
}
if (cast_prim->dstT() != kNumberTypeFloat || cast_prim->dstT() != kNumberTypeFloat32) {
if (cast_prim->dstT() != kNumberTypeFloat && cast_prim->dstT() != kNumberTypeFloat32) {
MS_LOG(ERROR) << "Invalid output datatype " << cast_prim->dstT();
return RET_INPUT_TENSOR_ERROR;
}


+ 3
- 0
mindspore/lite/src/ops/ops.h View File

@@ -45,12 +45,15 @@ class Primitive {
static Primitive *CreatePrimitive(schema::Primitive *primitive);
virtual ~Primitive() {}
const schema::Primitive *Value() const { return this->primitive; }
const bool GetInferFlag() const { return this->infer_flag_; }
void SetInferFlag(bool flag) { this->infer_flag_ = flag; }
schema::PrimitiveType Type() const { return this->primitive->value_type(); }
const void *Attribute() const { return this->primitive->value(); }
virtual int InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor::Tensor *> outputs_);

protected:
schema::Primitive *primitive;
bool infer_flag_ = true;
};

class Conv2D : public Primitive {


+ 10
- 5
mindspore/lite/src/ops/reshape.cc View File

@@ -34,11 +34,11 @@ int Reshape::CalNewShape(const tensor::Tensor *in_tensor, std::vector<int> *out_
inferIndex = i;
} else {
MS_LOG(ERROR) << "output shape should has no more than one dim which need infer";
return RET_ERROR;
return RET_INFER_ERR;
}
} else if (out_shape->at(i) < 0) {
MS_LOG(ERROR) << "output shape dim should be non-negative";
return RET_ERROR;
return RET_INFER_ERR;
} else if (out_shape->at(i) == 0) {
out_shape->at(i) = in_tensor->shape().at(i);
out_shapeSize *= out_shape->at(i);
@@ -49,7 +49,7 @@ int Reshape::CalNewShape(const tensor::Tensor *in_tensor, std::vector<int> *out_

if (inferIndex == -1 && out_shapeSize != in_shape_size) {
MS_LOG(ERROR) << "output shapeSize: " << out_shapeSize << " should be equal to input shapeSize: " << in_shape_size;
return RET_ERROR;
return RET_INFER_ERR;
}
if (inferIndex != -1) {
out_shape->at(inferIndex) = in_shape_size / out_shapeSize;
@@ -88,7 +88,11 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso
std::vector<int> out_shape;
if (inputs_.size() == kDoubleNum) {
auto shape_tensor = inputs_.at(1);
size_t shape_size = shape_tensor->ElementsNum();
if (shape_tensor->Data() == nullptr) {
MS_LOG(INFO) << "Do infer shape in runtime.";
return RET_INFER_INVALID;
}
size_t shape_size = shape_tensor->shape().size();
switch (shape_tensor->data_type()) {
case kNumberTypeInt8: {
auto data = reinterpret_cast<int8_t *>(shape_tensor->Data());
@@ -108,13 +112,14 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso
} break;
default: {
MS_LOG(ERROR) << "Reshape weight tensor has unsupported dataType: " << shape_tensor->data_type();
return RET_ERROR;
return RET_INFER_ERR;
}
}
} else if (inputs_.size() == kSingleNum) {
std::copy(reshape_prim->shape()->begin(), reshape_prim->shape()->end(), std::back_inserter(out_shape));
} else {
MS_LOG(ERROR) << "inputs tensor size invalid.";
return RET_INFER_ERR;
}

auto ret = CalNewShape(inputs_.front(), &out_shape);


+ 13
- 8
mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.cc View File

@@ -24,14 +24,18 @@

using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_PARAM_INVALID;
using mindspore::lite::RET_FORMAT_ERR;
using mindspore::lite::RET_OK;
using mindspore::lite::RET_PARAM_INVALID;
using mindspore::schema::PrimitiveType_ArgMax;
using mindspore::schema::PrimitiveType_ArgMin;

namespace mindspore::kernel {
int ArgMinMaxBaseCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto param = reinterpret_cast<ArgMinMaxParameter *>(opParameter);
switch (opParameter->type_) {
case PrimitiveType_ArgMax:
@@ -44,6 +48,7 @@ int ArgMinMaxBaseCPUKernel::Init() {
MS_LOG(ERROR) << "Unexpected type " << opParameter->type_;
return RET_ERROR;
}

auto in_shape = inputs_.at(0)->shape();
auto dims_size = in_shape.size();
int axis = param->axis_ < 0 ? param->axis_ + dims_size : param->axis_;
@@ -56,9 +61,9 @@ int ArgMinMaxBaseCPUKernel::Init() {
param->topk_ = MSMIN(param->topk_, in_shape[axis]);
if (param->topk_ > 1) {
if (context_ != nullptr && context_->allocator != nullptr) {
param->arg_elements_
= reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis]));
data_from_allocator_ = true;
param->arg_elements_ =
reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis]));
data_from_allocator_ = true;
} else {
param->arg_elements_ = reinterpret_cast<ArgElement *>(malloc(sizeof(ArgElement) * in_shape[axis]));
}
@@ -98,12 +103,12 @@ void ArgMinMaxBaseCPUKernel::FreeTmpMemory() {
kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr;
}
auto kernel = new (std::nothrow) ArgMinMaxInt8CPUKernel(op_parameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) ArgMinMaxInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new ArgMinMaxInt8CPUKernel fail!";
return nullptr;
@@ -122,12 +127,12 @@ kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor
kernel::LiteKernel *CpuArgMinMaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr;
}
auto kernel = new (std::nothrow) ArgMinMaxCPUKernel(op_parameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) ArgMinMaxCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new ArgMinMaxCPUKernel fail!";
return nullptr;


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/base/arg_min_max_base.h View File

@@ -24,8 +24,9 @@ namespace mindspore::kernel {
class ArgMinMaxBaseCPUKernel : public LiteKernel {
public:
ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), context_(ctx), data_from_allocator_(false) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), data_from_allocator_(false) {
opParameter->thread_num_ = ctx->thread_num_;
}

@@ -40,7 +41,6 @@ class ArgMinMaxBaseCPUKernel : public LiteKernel {
void FreeTmpMemory();

private:
const lite::Context *context_;
bool data_from_allocator_;
};
} // namespace mindspore::kernel


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.cc View File

@@ -46,13 +46,13 @@ int BatchToSpaceBaseCPUKernel::Init() {
kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace);
if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr;
}
auto *kernel = new (std::nothrow) BatchToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) BatchToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!";
return nullptr;
@@ -71,13 +71,13 @@ kernel::LiteKernel *CpuBatchToSpaceInt8KernelCreator(const std::vector<lite::ten
kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(desc.type == schema::PrimitiveType_BatchToSpace);
if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr;
}
auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(op_parameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new BatchToSpaceCPUKernel fail!";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/batch_to_space_base.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class BatchToSpaceBaseCPUKernel : public LiteKernel {
public:
BatchToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
opParameter->thread_num_ = ctx->thread_num_;
}



+ 15
- 13
mindspore/lite/src/runtime/kernel/arm/base/concat_base.cc View File

@@ -30,21 +30,24 @@ using mindspore::schema::PrimitiveType_Concat;

namespace mindspore::kernel {
int ConcatBaseCPUKernel::Init() {
auto axis = concat_param_->axis_;
axis_ = axis >= 0 ? axis : inputs_.front()->shape().size() + axis;
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
axis_ = concat_param_->axis_ >= 0 ? concat_param_->axis_ : inputs_.front()->shape().size() + concat_param_->axis_;
return RET_OK;
}

kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto *kernel = new(std::nothrow) ConcatInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ConcatInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new ConcatCPUKernel fail!";
return nullptr;
@@ -60,15 +63,15 @@ kernel::LiteKernel *CpuConcatInt8KernelCreator(const std::vector<lite::tensor::T
}

kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto *kernel = new(std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new ConcatCPUKernel fail!";
return nullptr;
@@ -84,15 +87,15 @@ kernel::LiteKernel *CpuConcatInt32KernelCreator(const std::vector<lite::tensor::
}

kernel::LiteKernel *CpuConcatFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto *kernel = new(std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ConcatCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new ConcatCPUKernel fail!";
return nullptr;
@@ -111,4 +114,3 @@ REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, CpuConcatInt8KernelCreat
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Concat, CpuConcatInt32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Concat, CpuConcatFp32KernelCreator)
} // namespace mindspore::kernel


+ 4
- 3
mindspore/lite/src/runtime/kernel/arm/base/concat_base.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class ConcatBaseCPUKernel : public LiteKernel {
public:
ConcatBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_;
concat_param_ = reinterpret_cast<ConcatParameter *>(opParameter);
}
@@ -41,6 +42,7 @@ class ConcatBaseCPUKernel : public LiteKernel {
int ReSize() override { return 0; }

int Run() override { return 0; }

protected:
int thread_count_;
int axis_;
@@ -50,4 +52,3 @@ class ConcatBaseCPUKernel : public LiteKernel {
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CONCAT_BASE_H_


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h View File

@@ -37,8 +37,9 @@ namespace mindspore::kernel {
class ConvolutionBaseCPUKernel : public LiteKernel {
public:
ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_;
conv_param_ = reinterpret_cast<ConvParameter *>(opParameter);
}


+ 12
- 12
mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc View File

@@ -31,15 +31,15 @@ namespace mindspore::kernel {
int CropBaseCPUKernel::Init() { return RET_OK; }

kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Crop);
auto *kernel = new (std::nothrow) CropInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) CropInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new CropCPUKernel fail!";
return nullptr;
@@ -55,15 +55,15 @@ kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector<lite::tensor::Ten
}

kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Crop);
auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new CropCPUKernel fail!";
return nullptr;
@@ -79,15 +79,15 @@ kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector<lite::tensor::Te
}

kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Crop);
auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new CropCPUKernel fail!";
return nullptr;


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/base/crop_base.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class CropBaseCPUKernel : public LiteKernel {
public:
CropBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_;
}
~CropBaseCPUKernel() = default;
@@ -39,7 +40,6 @@ class CropBaseCPUKernel : public LiteKernel {

protected:
int thread_count_;
const Context *ctx_;
};
} // namespace mindspore::kernel



+ 9
- 5
mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.cc View File

@@ -25,13 +25,17 @@

using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_PARAM_INVALID;
using mindspore::lite::RET_FORMAT_ERR;
using mindspore::lite::RET_OK;
using mindspore::lite::RET_PARAM_INVALID;
using mindspore::schema::PrimitiveType_DepthToSpace;

namespace mindspore::kernel {
int DepthToSpaceBaseCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
if (inputs_[0]->GetFormat() != schema::Format_NHWC) {
MS_LOG(ERROR) << "depth_to_space only support NHWC now!";
return RET_FORMAT_ERR;
@@ -62,13 +66,13 @@ int DepthToSpaceBaseCPUKernel::Init() {
kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace);
if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr;
}
auto *kernel = new (std::nothrow) DepthToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) DepthToSpaceInt8CPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new BatchToSpaceInt8CPUKernel fail!";
return nullptr;
@@ -87,13 +91,13 @@ kernel::LiteKernel *CpuDepthToSpaceInt8KernelCreator(const std::vector<lite::ten
kernel::LiteKernel *CpuDepthToSpaceFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(desc.type == schema::PrimitiveType_DepthToSpace);
if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr;
}
auto *kernel = new (std::nothrow) DepthToSpaceCPUKernel(op_parameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) DepthToSpaceCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new DepthToSpaceCPUKernel fail!";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/depth_to_space_base.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class DepthToSpaceBaseCPUKernel : public LiteKernel {
public:
DepthToSpaceBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
opParameter->thread_num_ = ctx->thread_num_;
}



+ 6
- 4
mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.cc View File

@@ -35,10 +35,11 @@ int FullconnectionBaseCPUKernel::Init() {
kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto kernel = new (std::nothrow) FullconnectionInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) FullconnectionInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (!kernel) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;
@@ -56,10 +57,11 @@ kernel::LiteKernel *CpuFullConnectionInt8KernelCreator(const std::vector<lite::t
kernel::LiteKernel *CpuFullConnectionFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) FullconnectionCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (!kernel) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/fullconnection_base.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class FullconnectionBaseCPUKernel : public LiteKernel {
public:
FullconnectionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
fc_param_ = reinterpret_cast<MatMulParameter *>(opParameter);
}
~FullconnectionBaseCPUKernel() = default;


+ 4
- 3
mindspore/lite/src/runtime/kernel/arm/base/matmul_base.cc View File

@@ -28,7 +28,8 @@ using mindspore::schema::PrimitiveType_MatMul;
namespace mindspore::kernel {
kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *opParameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
const lite::Context *ctx, const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
auto input_tensor = inputs.at(kInputIndex);
@@ -37,7 +38,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso
switch (data_type) {
case kNumberTypeInt8:
case kNumberTypeUInt8: {
kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) MatmulInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (!kernel) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;
@@ -46,7 +47,7 @@ kernel::LiteKernel *CpuMatmulKernelCreator(const std::vector<lite::tensor::Tenso
}

case kNumberTypeFloat32: {
kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) MatmulCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (!kernel) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/matmul_base.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class MatmulBaseCPUKernel : public LiteKernel {
public:
MatmulBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
params_ = reinterpret_cast<MatMulParameter *>(opParameter);
}
~MatmulBaseCPUKernel() = default;


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/base/pad.cc View File

@@ -31,10 +31,10 @@ namespace mindspore::kernel {
kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Pad);
auto *kernel = new (std::nothrow) PadInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) PadInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new PadCPUKernel failed.";
return nullptr;
@@ -52,10 +52,10 @@ kernel::LiteKernel *CpuPadInt8KernelCreator(const std::vector<lite::tensor::Tens
kernel::LiteKernel *CpuPadFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Pad);
auto *kernel = new (std::nothrow) PadCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) PadCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new PadCPUKernel failed.";
return nullptr;


+ 8
- 4
mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc View File

@@ -56,6 +56,10 @@ void PoolingBaseCPUKernel::FreeQuantParam() {
}

int PoolingBaseCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
MS_ASSERT(inputs_.size() == 1);
MS_ASSERT(outputs_.size() == 1);
pooling_param_->thread_num_ = thread_count_;
@@ -78,13 +82,13 @@ int PoolingBaseCPUKernel::Init() {
kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Pooling);
auto *kernel = new (std::nothrow) PoolingInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) PoolingInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new PoolingInt8CPUKernel fail!";
return nullptr;
@@ -102,13 +106,13 @@ kernel::LiteKernel *CpuPoolingInt8KernelCreator(const std::vector<lite::tensor::
kernel::LiteKernel *CpuPoolingFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Pooling);
auto *kernel = new (std::nothrow) PoolingCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) PoolingCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new PoolingCPUKernel fail!";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/pooling_base.h View File

@@ -29,8 +29,9 @@ namespace mindspore::kernel {
class PoolingBaseCPUKernel : public LiteKernel {
public:
PoolingBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
pooling_param_ = reinterpret_cast<PoolingParameter *>(opParameter);
}
~PoolingBaseCPUKernel() = default;


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/base/prelu_base.cc View File

@@ -32,13 +32,13 @@ int PreluBaseCPUKernel::Init() {return RET_OK;}
kernel::LiteKernel *CpuPreluInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Prelu);
auto *kernel = new(std::nothrow) PreluInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new(std::nothrow) PreluInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new PreluCPUKernel fail!";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/prelu_base.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class PreluBaseCPUKernel : public LiteKernel {
public:
PreluBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_;
prelu_param_ = reinterpret_cast<PreluParameter *>(opParameter);
}


+ 7
- 2
mindspore/lite/src/runtime/kernel/arm/base/prior_box.cc View File

@@ -39,6 +39,11 @@ int PriorBoxCPUKernel::Init() {
MS_LOG(ERROR) << "PriorBoxParameter nullptr";
return RET_NULL_PTR;
}

if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
MS_ASSERT(inputs_.size() == kInputNum);
MS_ASSERT(outputs_.size() == kOutputNum);

@@ -164,7 +169,7 @@ int PriorBoxCPUKernel::Run() {
kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
@@ -173,7 +178,7 @@ kernel::LiteKernel *CpuPriorBoxKernelCreator(const std::vector<lite::tensor::Ten
MS_LOG(ERROR) << "PriorBox invalid desc type " << desc.type;
return nullptr;
}
auto *kernel = new (std::nothrow) PriorBoxCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) PriorBoxCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new PriorBoxCPUKernel fail!";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/prior_box.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class PriorBoxCPUKernel : public LiteKernel {
public:
PriorBoxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
prior_box_param_ = reinterpret_cast<PriorBoxParameter *>(opParameter);
}
~PriorBoxCPUKernel() = default;


+ 9
- 4
mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc View File

@@ -34,6 +34,10 @@ constexpr int kQuantDTypeCastOutputNum = 1;
} // namespace

int QuantDTypeCastCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
if (inputs_.size() != 1) {
MS_LOG(ERROR) << "inputs number should be 1, but " << inputs_.size() << " is given.";
return RET_ERROR;
@@ -83,8 +87,8 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) {
ret = DequantizeInt8(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint,
num_unit_thread);
} else {
ret = QuantizeToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale,
quant_arg.zeroPoint, num_unit_thread);
ret = QuantizeToInt8(float32_ptr_ + thread_offset, int8_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint,
num_unit_thread);
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "QuantDTypeCast error task_id[" << task_id << "] error_code[" << ret << "]";
@@ -124,12 +128,13 @@ int QuantDTypeCastCPUKernel::Run() {
kernel::LiteKernel *CpuQuantDTypeCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
auto *kernel = new (std::nothrow) QuantDTypeCastCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) QuantDTypeCastCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new QuantDTypeCastCPUKernel fail!";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.h View File

@@ -24,8 +24,9 @@ namespace mindspore::kernel {
class QuantDTypeCastCPUKernel : public LiteKernel {
public:
QuantDTypeCastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), thread_num_(ctx->thread_num_) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
~QuantDTypeCastCPUKernel() = default;

int Init() override;


+ 6
- 6
mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc View File

@@ -36,13 +36,13 @@ int ReshapeBaseCPUKernel::Init() {
kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Reshape);
auto *kernel = new (std::nothrow) ReshapeInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ReshapeInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new ReshapeInt8CPUKernel fail!";
return nullptr;
@@ -60,13 +60,13 @@ kernel::LiteKernel *CpuReshapeInt8KernelCreator(const std::vector<lite::tensor::
kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Reshape);
auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new ReshapeCPUKernel fail!";
return nullptr;
@@ -84,13 +84,13 @@ kernel::LiteKernel *CpuReshapeInt32KernelCreator(const std::vector<lite::tensor:
kernel::LiteKernel *CpuReshapeFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Reshape);
auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ReshapeCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new ReshapeCPUKernel fail!";
return nullptr;


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class ReshapeBaseCPUKernel : public LiteKernel {
public:
ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
reshape_param_ = reinterpret_cast<ReshapeParameter *>(opParameter);
}
~ReshapeBaseCPUKernel() = default;
@@ -45,4 +46,3 @@ class ReshapeBaseCPUKernel : public LiteKernel {
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc View File

@@ -53,13 +53,13 @@ int SoftmaxBaseCPUKernel::Init() {
kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax);
auto *kernel = new (std::nothrow) SoftmaxInt8CPUKernel(opParameter, inputs, outputs, ctx);
SoftmaxInt8CPUKernel *kernel = new (std::nothrow) SoftmaxInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!";
return nullptr;
@@ -77,13 +77,13 @@ kernel::LiteKernel *CpuSoftmaxInt8KernelCreator(const std::vector<lite::tensor::
kernel::LiteKernel *CpuSoftmaxFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_SoftMax);
auto *kernel = new (std::nothrow) SoftmaxCPUKernel(opParameter, inputs, outputs, ctx);
SoftmaxCPUKernel *kernel = new (std::nothrow) SoftmaxCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new SoftmaxCPUKernel fail!";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/softmax_base.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class SoftmaxBaseCPUKernel : public LiteKernel {
public:
SoftmaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_;
softmax_param_ = reinterpret_cast<SoftmaxParameter *>(opParameter);
}


+ 6
- 6
mindspore/lite/src/runtime/kernel/arm/base/split_base.cc View File

@@ -61,13 +61,13 @@ int SplitBaseCPUKernel::Init() {
kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Split);
auto *kernel = new (std::nothrow) SplitInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) SplitInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new SplitCPUKernel fail!";
return nullptr;
@@ -85,13 +85,13 @@ kernel::LiteKernel *CpuSplitInt8KernelCreator(const std::vector<lite::tensor::Te
kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Split);
auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new SplitCPUKernel fail!";
return nullptr;
@@ -109,13 +109,13 @@ kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::tensor::T
kernel::LiteKernel *CpuSplitFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Split);
auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new SplitCPUKernel fail!";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/split_base.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class SplitBaseCPUKernel : public LiteKernel {
public:
SplitBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
param = reinterpret_cast<SplitParameter *>(opParameter);
}
~SplitBaseCPUKernel() = default;


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc View File

@@ -32,13 +32,13 @@ int SqueezeBaseCPUKernel::Init() { return RET_OK; }
kernel::LiteKernel *CpuSqueezeInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_Squeeze);
auto *kernel = new (std::nothrow) SqueezeInt8CPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) SqueezeInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new SqueezeCPUKernel fail!";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class SqueezeBaseCPUKernel : public LiteKernel {
public:
SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
opParameter->thread_num_ = ctx->thread_num_;
}



+ 9
- 3
mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc View File

@@ -42,12 +42,18 @@ int StridedSliceCPUKernel::Init() {
int StridedSliceCPUKernel::ReSize() { return 0; }

int StridedSliceCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}

auto input = inputs_.at(0);
auto output = outputs_.at(0);
MS_ASSERT(input);
MS_ASSERT(output);

auto ret = DoStridedSlice(input->Data(), output->Data(), reinterpret_cast<StridedSliceParameter *>(opParameter));
ret = DoStridedSlice(input->Data(), output->Data(), reinterpret_cast<StridedSliceParameter *>(opParameter));
if (ret != RET_OK) {
MS_LOG(ERROR) << "StridedSlice error error_code[" << ret << "]";
return RET_ERROR;
@@ -58,13 +64,13 @@ int StridedSliceCPUKernel::Run() {
kernel::LiteKernel *CpuStridedSliceKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(desc.type == schema::PrimitiveType_StridedSlice);
if (opParameter == nullptr) {
MS_LOG(ERROR) << "opParameter null pointer dereferencing.";
return nullptr;
}
auto *kernel = new (std::nothrow) StridedSliceCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) StridedSliceCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "New kernel fails.";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class StridedSliceCPUKernel : public LiteKernel {
public:
StridedSliceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), thread_num_(ctx->thread_num_) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_num_(ctx->thread_num_) {}
~StridedSliceCPUKernel() override = default;

int Init() override;


+ 11
- 3
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc View File

@@ -183,10 +183,14 @@ void Convolution3x3FP16CPUKernel::ConfigInputOutput() {
}

int Convolution3x3FP16CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed.";
return RET_ERROR;
return ret;
}
ret = InitWeightBias();
if (ret != RET_OK) {
@@ -228,7 +232,7 @@ int Convolution3x3FP16CPUKernel::ReSize() {
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed.";
return RET_ERROR;
return ret;
}
ret = InitTmpBuffer();
if (ret != RET_OK) {
@@ -256,7 +260,11 @@ int Convolution3x3Fp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata)
}

int Convolution3x3FP16CPUKernel::Run() {
// cast fp32 input data to fp16
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
auto input_tensor = inputs_.at(kInputIndex);
auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data());
for (int i = 0; i < input_tensor->ElementsNum(); ++i) {


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class Convolution3x3FP16CPUKernel : public ConvolutionBaseCPUKernel {
public:
Convolution3x3FP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~Convolution3x3FP16CPUKernel() override {
if (fp16_input_ != nullptr) {
free(fp16_input_);
@@ -78,4 +79,3 @@ void ProcessFilterFp16(float16_t *origin_weight, float16_t *dst_weight, ConvPara
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONVOLUTION_3x3_FP16_H_


+ 17
- 6
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc View File

@@ -85,14 +85,20 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
}

int ConvolutionDepthwiseFp16CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
// conv base init
ConvolutionBaseCPUKernel::Init();

auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
// init sliding_ window param
sliding_ = new SlidingWindowParam;
InitSlidingParam(sliding_, conv_param_, C8NUM);

auto ret = InitWeightBias();
ret = InitWeightBias();
if (ret != 0) {
MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed.";
return RET_ERROR;
@@ -138,6 +144,11 @@ int ConvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ConvolutionDepthwiseFp16CPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
@@ -149,7 +160,7 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_,
conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);

auto ret = LiteBackendParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_);
ret = LiteBackendParallelLaunch(ConvDwFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]";
return RET_ERROR;
@@ -165,10 +176,10 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() {
kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D);
auto kernel = new (std::nothrow) ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
public:
ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionDepthwiseFp16CPUKernel() override {
delete sliding_;
free(packed_weight_);


+ 15
- 7
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc View File

@@ -154,10 +154,14 @@ void ConvolutionFP16CPUKernel::ConfigInputOutput() {
}

int ConvolutionFP16CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed.";
return RET_ERROR;
MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret;
return ret;
}
ret = InitWeightBias();
if (ret != RET_OK) {
@@ -193,7 +197,7 @@ int ConvolutionFP16CPUKernel::ReSize() {
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed.";
return RET_ERROR;
return ret;
}
ret = InitTmpBuffer();
if (ret != RET_OK) {
@@ -220,7 +224,11 @@ int ConvolutionFp16Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ConvolutionFP16CPUKernel::Run() {
// cast fp32 input data to fp16
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
auto input_tensor = inputs_.at(kInputIndex);
auto ori_input_data = reinterpret_cast<float *>(input_tensor->Data());
for (int i = 0; i < input_tensor->ElementsNum(); ++i) {
@@ -251,7 +259,7 @@ int ConvolutionFP16CPUKernel::Run() {
kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D);
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
@@ -267,7 +275,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten
conv_param->output_w_ = outputs.front()->Width();
kernel::LiteKernel *kernel = nullptr;
if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) {
kernel = new (std::nothrow) kernel::Convolution3x3FP16CPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::Convolution3x3FP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
} else {
bool use_winograd = false;
int out_unit;
@@ -275,7 +283,7 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::tensor::Ten
OutputTransformUnitFunc output_trans_func = nullptr;
CheckIfUseWinograd(&use_winograd, &out_unit, conv_param, input_trans_func, output_trans_func);
if (kernel_h != 1 && kernel_w != 1 && !use_winograd) {
kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
}
}
if (kernel == nullptr) {


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel {
public:
ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionFP16CPUKernel() override {
if (fp16_input_ != nullptr) {
free(fp16_input_);


+ 17
- 5
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc View File

@@ -99,12 +99,19 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
}

int DeconvolutionDepthwiseFp16CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
sliding_ = new SlidingWindowParam;
InitSlideParam();
// conv base init
ConvolutionBaseCPUKernel::Init();
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}

auto ret = InitWeightBias();
ret = InitWeightBias();
if (ret != 0) {
MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitWeightBias failed.";
return RET_ERROR;
@@ -150,6 +157,11 @@ int DeconvDwFp16Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int DeconvolutionDepthwiseFp16CPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
@@ -161,7 +173,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
PackNHWCFp32ToNHWC8Fp16(input_addr, packed_input_, conv_param_->input_batch_,
conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_);

auto ret = LiteBackendParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_);
ret = LiteBackendParallelLaunch(DeconvDwFp16Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]";
return RET_ERROR;
@@ -176,10 +188,10 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_DeDepthwiseConv2D);
auto kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
public:
DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~DeconvolutionDepthwiseFp16CPUKernel() override {
delete sliding_;
free(packed_weight_);


+ 8
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/activation.cc View File

@@ -19,6 +19,7 @@
#include "src/kernel_registry.h"
#include "src/runtime/runtime_api.h"
#include "include/errorcode.h"
#include "src/ops/ops.h"

using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
@@ -78,6 +79,11 @@ int ActivationRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ActivationCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
int error_code = LiteBackendParallelLaunch(ActivationRun, this, thread_count_);
if (error_code != RET_OK) {
MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]";
@@ -89,10 +95,10 @@ int ActivationCPUKernel::Run() {
kernel::LiteKernel *CpuActivationFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Activation);
auto *kernel = new (std::nothrow) ActivationCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ActivationCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/activation.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class ActivationCPUKernel : public LiteKernel {
public:
ActivationCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(param, inputs, outputs), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(param, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
type_ = (reinterpret_cast<ActivationParameter *>(param))->type_;
alpha_ = (reinterpret_cast<ActivationParameter *>(param))->alpha_;
}


+ 10
- 10
mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.cc View File

@@ -20,8 +20,8 @@
#include "src/runtime/runtime_api.h"
#include "include/errorcode.h"

using mindspore::lite::KernelRegistrar;
using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::ActivationGradType_HSWISH;
@@ -32,8 +32,8 @@ using mindspore::schema::PrimitiveType_ActivationGrad;

namespace mindspore::kernel {
int ActivationGradCPUKernel::Init() {
outputs_[0]->set_shape(inputs_[0]->shape());
return RET_OK;
outputs_[0]->set_shape(inputs_[0]->shape());
return RET_OK;
}

int ActivationGradCPUKernel::ReSize() { return RET_OK; }
@@ -58,7 +58,7 @@ int ActivationGradCPUKernel::DoActivation(int task_id) {
error_code = TanhGrad(yt_addr, input_addr, length, output_addr);
} else if (type_ == schema::ActivationGradType_HSWISH) {
error_code = HSwishGrad(yt_addr, input_addr, length, output_addr);
} else if (type_ == schema::ActivationGradType_HSIGMOID) {
} else if (type_ == schema::ActivationGradType_HSIGMOID) {
error_code = HSigmoidGrad(yt_addr, input_addr, length, output_addr);
} else {
MS_LOG(ERROR) << "Activation type error";
@@ -90,17 +90,17 @@ int ActivationGradCPUKernel::Run() {
}

kernel::LiteKernel *CpuActivationGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_ActivationGrad);
auto *kernel = new (std::nothrow) ActivationGradCPUKernel(opParameter, inputs, outputs);
auto *kernel = new (std::nothrow) ActivationGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr);
auto ret = kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "InferShape kernel failed, name: " << opParameter->name_
<< ", type: "
MS_LOG(ERROR) << "InferShape kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
}
return kernel;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/activation_grad.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class ActivationGradCPUKernel : public LiteKernel {
public:
explicit ActivationGradCPUKernel(OpParameter *param, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(param, inputs, outputs) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(param, inputs, outputs, ctx, primitive) {
ActivationGradParameter *param_act_grad = reinterpret_cast<ActivationGradParameter *>(param);
type_ = param_act_grad->type_;
alpha_ = param_act_grad->alpha_;


+ 11
- 8
mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc View File

@@ -36,12 +36,9 @@ int AddNLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
auto kernel = reinterpret_cast<AddNCPUKernel *>(cdata);
return kernel->AddNParallelRun(thread_id);
}
}
} // namespace

int AddNCPUKernel::Init() {
elements_num_ = inputs_[0]->ElementsNum();
return RET_OK;
}
int AddNCPUKernel::Init() { return RET_OK; }

int AddNCPUKernel::ReSize() { return RET_OK; }

@@ -58,6 +55,12 @@ int AddNCPUKernel::AddNParallelRun(int thread_id) {
}

int AddNCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
elements_num_ = inputs_[0]->ElementsNum();
auto input0_data = reinterpret_cast<float *>(inputs_[0]->Data());
auto input1_data = reinterpret_cast<float *>(inputs_[1]->Data());
auto output_data = reinterpret_cast<float *>(outputs_[0]->Data());
@@ -71,7 +74,7 @@ int AddNCPUKernel::Run() {
in1_addr_ = input0_data;
in2_addr_ = input1_data;
out_addr_ = output_data;
int ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_);
ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "addn launch fail!ret: " << ret;
return RET_ERROR;
@@ -91,7 +94,7 @@ int AddNCPUKernel::Run() {
kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr;
@@ -102,7 +105,7 @@ kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector<lite::tensor::Ten
}
MS_ASSERT(desc.type == schema::PrimitiveType_AddN);
op_parameter->thread_num_ = ctx->thread_num_;
auto *kernel = new (std::nothrow) AddNCPUKernel(op_parameter, inputs, outputs);
auto *kernel = new (std::nothrow) AddNCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new AddNCPUKernel fail!";
return nullptr;


+ 4
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/addn.h View File

@@ -21,18 +21,20 @@
#include "src/lite_kernel.h"
#include "schema/model_generated.h"


namespace mindspore::kernel {
class AddNCPUKernel : public LiteKernel {
public:
AddNCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs) : LiteKernel(parameter, inputs, outputs) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~AddNCPUKernel() = default;

int Init() override;
int ReSize() override;
int Run() override;
int AddNParallelRun(int thread_id);

private:
float *in1_addr_;
float *in2_addr_;


+ 6
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.cc View File

@@ -40,7 +40,12 @@ int ArgMinMaxCPUKernel::Init() {
}

int ArgMinMaxCPUKernel::Run() {
auto ret = ArgMinMaxBaseCPUKernel::Run();
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
return ret;
}
ret = ArgMinMaxBaseCPUKernel::Run();
ArgMinMaxBaseCPUKernel::FreeTmpMemory();
return ret;
}


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/argminmax.h View File

@@ -23,8 +23,9 @@ namespace mindspore::kernel {
class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel {
public:
ArgMinMaxCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ArgMinMaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}

~ArgMinMaxCPUKernel() = default;

@@ -35,4 +36,3 @@ class ArgMinMaxCPUKernel : public ArgMinMaxBaseCPUKernel {
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARGMINMAX_H_


+ 11
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.cc View File

@@ -41,6 +41,10 @@ ArithmeticCPUKernel::~ArithmeticCPUKernel() {
}
}
int ArithmeticCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto element_num = outputs_[0]->ElementsNum();

tile_data0_ = new float[element_num];
@@ -92,6 +96,11 @@ int ArithmeticsRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ArithmeticCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
if (arithmeticParameter_->broadcasting_) {
auto input_data0 = reinterpret_cast<float *>(inputs_[0]->Data());
auto input_data1 = reinterpret_cast<float *>(inputs_[1]->Data());
@@ -108,9 +117,9 @@ int ArithmeticCPUKernel::Run() {
kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(parameter != nullptr);
auto kernel = new (std::nothrow) ArithmeticCPUKernel(parameter, inputs, outputs, ctx);
auto kernel = new (std::nothrow) ArithmeticCPUKernel(parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_;
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic.h View File

@@ -48,8 +48,9 @@ class ArithmeticCPUKernel : public LiteKernel {

public:
ArithmeticCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {
arithmeticParameter_ = reinterpret_cast<ArithmeticParameter *>(parameter);
switch (parameter->type_) {
case PrimitiveType_Mul:


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.cc View File

@@ -261,12 +261,13 @@ int ArithmeticGradCPUKernel::Run() {
kernel::LiteKernel *CpuArithmeticGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_EXCEPTION_IF_NULL(opParameter);
if (opParameter == nullptr) {
return nullptr;
}
auto *kernel = new (std::nothrow) ArithmeticGradCPUKernel(opParameter, inputs, outputs);
auto *kernel = new (std::nothrow) ArithmeticGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr);
auto ret = kernel->Init();
if (ret != RET_OK) {


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_grad.h View File

@@ -37,8 +37,9 @@ class ArithmeticGradCPUKernel : public LiteKernel {

public:
explicit ArithmeticGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), tile_data0(NULL), tile_data1(NULL), tile_data2(NULL) {
switch (type()) {
case PrimitiveType_MulGrad:
arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul; // this will be adjusted in InferShape


+ 13
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.cc View File

@@ -27,6 +27,10 @@ using mindspore::lite::RET_OK;

namespace mindspore::kernel {
int ArithmeticSelfCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
int ret = ReSize();
return ret;
}
@@ -68,11 +72,16 @@ int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) {
}

int ArithmeticSelfCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
auto input_tensor = inputs_.at(0);
auto out_tensor = outputs_.at(0);
in_ptr_ = reinterpret_cast<float *>(input_tensor->Data());
out_ptr_ = reinterpret_cast<float *>(out_tensor->Data());
int ret = LiteBackendParallelLaunch(ArithmeticSelfRuns, this, thread_sz_count_);
ret = LiteBackendParallelLaunch(ArithmeticSelfRuns, this, thread_sz_count_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
return ret;
@@ -83,13 +92,14 @@ int ArithmeticSelfCPUKernel::Run() {
kernel::LiteKernel *CpuArithmeticSelfFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Creator failed, opParameter is nullptr!";
return nullptr;
}
auto *kernel = new (std::nothrow) ArithmeticSelfCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) ArithmeticSelfCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr);
auto ret = kernel->Init();
if (ret != RET_OK) {


+ 4
- 5
mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self.h View File

@@ -24,9 +24,9 @@
#include "schema/model_generated.h"
#include "include/context.h"


using mindspore::lite::Context;
using mindspore::schema::PrimitiveType_Abs;
using mindspore::schema::PrimitiveType_Ceil;
using mindspore::schema::PrimitiveType_Cos;
using mindspore::schema::PrimitiveType_Exp;
using mindspore::schema::PrimitiveType_Floor;
@@ -36,7 +36,6 @@ using mindspore::schema::PrimitiveType_Rsqrt;
using mindspore::schema::PrimitiveType_Sin;
using mindspore::schema::PrimitiveType_Sqrt;
using mindspore::schema::PrimitiveType_Square;
using mindspore::schema::PrimitiveType_Ceil;

namespace mindspore::kernel {
class ArithmeticSelfCPUKernel : public LiteKernel {
@@ -44,8 +43,9 @@ class ArithmeticSelfCPUKernel : public LiteKernel {

public:
explicit ArithmeticSelfCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
switch (parameter->type_) {
case PrimitiveType_Abs:
arithmeticSelf_run_ = ElementAbs;
@@ -106,4 +106,3 @@ class ArithmeticSelfCPUKernel : public LiteKernel {
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_ARITHMETIC_SELF_H_


+ 5
- 0
mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.cc View File

@@ -28,6 +28,11 @@ int BatchToSpaceCPUKernel::Init() {
}

int BatchToSpaceCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input = inputs_[0];
auto output = outputs_[0];
const float *input_data = reinterpret_cast<const float *>(input->Data());


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space.h View File

@@ -22,8 +22,9 @@ namespace mindspore::kernel {
class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel {
public:
BatchToSpaceCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: BatchToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: BatchToSpaceBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}

~BatchToSpaceCPUKernel() = default;

@@ -34,4 +35,3 @@ class BatchToSpaceCPUKernel : public BatchToSpaceBaseCPUKernel {
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCH_TO_SPACE_H_


+ 7
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc View File

@@ -53,6 +53,11 @@ int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int BatchnormCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
in_addr_ = reinterpret_cast<float *>(inputs_.at(0)->Data());
mean_addr_ = reinterpret_cast<float *>(inputs_.at(1)->Data());
var_addr_ = reinterpret_cast<float *>(inputs_.at(2)->Data());
@@ -76,10 +81,10 @@ int BatchnormCPUKernel::Run() {
kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_BatchNorm);
auto *kernel = new (std::nothrow) BatchnormCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) BatchnormCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new BatchNormCPUKernel fail!";
return nullptr;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class BatchnormCPUKernel : public LiteKernel {
public:
BatchnormCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->thread_num_) {
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {
batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter);
}
~BatchnormCPUKernel() override { delete batchnorm_param_; }


+ 12
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/bias.cc View File

@@ -31,6 +31,11 @@ namespace mindspore::kernel {
int BiasCPUKernel::ReSize() { return RET_OK; }

int BiasCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto in = reinterpret_cast<float *>(inputs_.at(0)->Data());
auto bias = reinterpret_cast<float *>(inputs_.at(1)->Data());
auto out = reinterpret_cast<float *>(outputs_.at(0)->Data());
@@ -44,6 +49,10 @@ int BiasCPUKernel::Run() {
}

int BiasCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto dims = inputs_[0]->shape();
MS_ASSERT(dims.size() <= 5);
bias_param_->ndim_ = dims.size();
@@ -58,10 +67,11 @@ int BiasCPUKernel::Init() {

kernel::LiteKernel *CpuBiasFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter,
const lite::Context *ctx, const kernel::KernelKey &desc) {
const lite::Context *ctx, const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(parameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_BiasAdd);
auto kernel = new (std::nothrow) BiasCPUKernel(parameter, inputs, outputs);
auto kernel = new (std::nothrow) BiasCPUKernel(parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "Create kernel failed, name: " << parameter->name_;
return nullptr;


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/fp32/bias.h View File

@@ -24,9 +24,10 @@ namespace mindspore::kernel {
class BiasCPUKernel : public LiteKernel {
public:
BiasCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs) {
bias_param_ = reinterpret_cast<ArithmeticParameter*>(parameter);
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
bias_param_ = reinterpret_cast<ArithmeticParameter *>(parameter);
}
~BiasCPUKernel() override = default;

@@ -40,4 +41,3 @@ class BiasCPUKernel : public LiteKernel {
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BIAS_H_


+ 9
- 6
mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.cc View File

@@ -20,12 +20,11 @@
#include "src/kernel_registry.h"
#include "include/errorcode.h"


using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::schema::PrimitiveType_BiasGrad;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_BiasGrad;

namespace mindspore::kernel {
int BiasGradCPUKernel::InferShape() {
@@ -68,10 +67,14 @@ int BiasGradCPUKernel::Init() {
return RET_OK;
}


int BiasGradCPUKernel::ReSize() { return 0; }

int BiasGradCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return RET_ERROR;
}
auto in = reinterpret_cast<float *>(inputs_.at(0)->Data());
auto out = reinterpret_cast<float *>(outputs_.at(0)->Data());
// size_t data_size = inputs_.at(0)->ElementsNum();
@@ -91,14 +94,14 @@ int BiasGradCPUKernel::Run() {
return RET_OK;
}


kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_BiasGrad);
auto *kernel = new (std::nothrow) BiasGradCPUKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs);
auto *kernel =
new (std::nothrow) BiasGradCPUKernel(reinterpret_cast<OpParameter *>(opParameter), inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr);

auto ret = kernel->Init();


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/bias_grad.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class BiasGradCPUKernel : public LiteKernel {
public:
explicit BiasGradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
bias_param = reinterpret_cast<ArithmeticParameter *>(parameter);
}
~BiasGradCPUKernel() override = default;


+ 2
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.cc View File

@@ -96,12 +96,12 @@ int BNGradInputCPUKernel::Run() {
kernel::LiteKernel *CpuBNGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_BNGradInput);
// parameter->name = opDef.name()->str().data();
// parameter->type = opDef.attr_type();
auto *kernel = new (std::nothrow) BNGradInputCPUKernel(opParameter, inputs, outputs);
auto *kernel = new (std::nothrow) BNGradInputCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr);
auto ret = kernel->Init();
if (RET_OK != ret) {


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/bngrad_input.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class BNGradInputCPUKernel : public LiteKernel {
public:
explicit BNGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~BNGradInputCPUKernel() override { delete workspace; }

int Init() override;


+ 11
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc View File

@@ -27,6 +27,10 @@ using mindspore::schema::PrimitiveType_BroadcastTo;
namespace mindspore::kernel {

int BroadcastToCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto input_shape = inputs_[0]->shape();
for (size_t i = 0; i < input_shape.size(); ++i) {
shape_info_.input_shape_[i] = input_shape[i];
@@ -42,6 +46,11 @@ int BroadcastToCPUKernel::Init() {
}

int BroadcastToCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input_data = reinterpret_cast<float *>(inputs_.at(0)->Data());
auto output_data = reinterpret_cast<float *>(outputs_.at(0)->Data());

@@ -51,13 +60,13 @@ int BroadcastToCPUKernel::Run() {
kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *op_parameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (op_parameter == nullptr) {
MS_LOG(ERROR) << "Input op_parameter is nullptr!";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_BroadcastTo);
auto *kernel = new (std::nothrow) BroadcastToCPUKernel(op_parameter, inputs, outputs);
auto *kernel = new (std::nothrow) BroadcastToCPUKernel(op_parameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new BroadcastToCPUKernel fail!";
return nullptr;


+ 5
- 5
mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.h View File

@@ -25,18 +25,18 @@ namespace mindspore::kernel {
class BroadcastToCPUKernel : public LiteKernel {
public:
BroadcastToCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs) : LiteKernel(parameter, inputs, outputs) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~BroadcastToCPUKernel() = default;

int Init() override;
int ReSize() override {
return 0;
}
int ReSize() override { return 0; }
int Run() override;

private:
BroadcastShapeInfo shape_info_;
};
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BROADCAST_TO_H_


+ 14
- 8
mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc View File

@@ -30,9 +30,6 @@ using mindspore::schema::PrimitiveType_Cast;

namespace mindspore::kernel {
namespace {
constexpr int kInputNum = 1;
constexpr int kOutputNum = 1;
const std::vector<int> kSupportInputDataType = {kNumberTypeUInt8, kNumberTypeInt32};
int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
if (cdata == nullptr) {
MS_LOG(ERROR) << "input cdata is nullptr!";
@@ -44,12 +41,16 @@ int CastRun(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
} // namespace

int CastCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
data_num_ = inputs_[0]->ElementsNum();
if (data_num_ == 0) {
return RET_OK;
}
thread_num_ = MSMIN(thread_num_, data_num_);
stride_ = UP_DIV(data_num_, thread_num_);
opParameter->thread_num_ = MSMIN(opParameter->thread_num_, data_num_);
stride_ = UP_DIV(data_num_, opParameter->thread_num_);
return RET_OK;
}

@@ -77,16 +78,21 @@ int CastCPUKernel::DoCast(int thread_id) {
}

int CastCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
if (data_num_ == 0) {
return RET_OK;
}
return LiteBackendParallelLaunch(CastRun, this, thread_num_);
return LiteBackendParallelLaunch(CastRun, this, opParameter->thread_num_);
}

kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Input opParameter is nullptr!";
return nullptr;
@@ -99,7 +105,7 @@ kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vector<lite::tensor::Ten
MS_LOG(ERROR) << "context thread num is 0!";
return nullptr;
}
auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx);
auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new CastCPUKernel fail!";
return nullptr;


+ 6
- 10
mindspore/lite/src/runtime/kernel/arm/fp32/cast.h View File

@@ -23,27 +23,23 @@ namespace mindspore::kernel {
class CastCPUKernel : public LiteKernel {
public:
CastCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs) {
if (ctx != nullptr) {
thread_num_ = ctx->thread_num_;
}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
opParameter->thread_num_ = ctx->thread_num_;
}

~CastCPUKernel() = default;

int Init() override;
int ReSize() override {
return 0;
};
int ReSize() override { return 0; };
int Run() override;
int DoCast(int thread_id);

private:
uint32_t thread_num_;
uint32_t stride_;
uint32_t data_num_;
};
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CAST_H_


+ 45
- 35
mindspore/lite/src/runtime/kernel/arm/fp32/concat.cc View File

@@ -28,44 +28,54 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Concat;

namespace mindspore::kernel {
int ConcatCPUKernel::Init() {
ConcatBaseCPUKernel::Init();
schema::Format input0_format = inputs_[0]->GetFormat();
bool need_convert_format = false;
for (size_t i = 1; i < inputs_.size(); ++i) {
if (inputs_[i]->GetFormat() != input0_format) {
need_convert_format = true;
}
}
if (!need_convert_format) {
outputs_[0]->SetFormat(input0_format);
return RET_OK;
}
MS_LOG(ERROR) << "All input format should be the same!";
return RET_ERROR;
int ConcatCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConcatBaseCPUKernel::Init();
if (ret != RET_OK) {
return ret;
}
schema::Format input0_format = inputs_[0]->GetFormat();
bool need_convert_format = false;
for (size_t i = 1; i < inputs_.size(); ++i) {
if (inputs_[i]->GetFormat() != input0_format) {
need_convert_format = true;
}
}
if (!need_convert_format) {
outputs_[0]->SetFormat(input0_format);
return RET_OK;
}
MS_LOG(ERROR) << "All input format should be the same!";
return RET_ERROR;
}

int ConcatCPUKernel::ReSize() { return RET_OK; }
int ConcatCPUKernel::ReSize() { return RET_OK; }

int ConcatCPUKernel::Run() {
auto input_num = inputs_.size();
std::vector<void *> inputs_addr(input_num, nullptr);
std::vector<int *> inputs_output_shape(input_num + 1, nullptr);
int ConcatCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input_num = inputs_.size();
std::vector<void *> inputs_addr(input_num, nullptr);
std::vector<int *> inputs_output_shape(input_num + 1, nullptr);

std::vector <std::vector<int>> shapes;
for (size_t i = 0; i < input_num; ++i) {
inputs_addr[i] = inputs_[i]->Data();
shapes.push_back(inputs_[i]->shape());
inputs_output_shape[i] = shapes[i].data();
}
auto output_shape = outputs_.at(0)->shape();
inputs_output_shape[input_num] = output_shape.data();
auto output_addr = outputs_.at(0)->Data();
std::vector<std::vector<int>> shapes;
for (size_t i = 0; i < input_num; ++i) {
inputs_addr[i] = inputs_[i]->Data();
shapes.push_back(inputs_[i]->shape());
inputs_output_shape[i] = shapes[i].data();
}
auto output_shape = outputs_.at(0)->shape();
inputs_output_shape[input_num] = output_shape.data();
auto output_addr = outputs_.at(0)->Data();

Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(),
output_shape.size(), output_addr);
return RET_OK;
}
Concat(reinterpret_cast<void **>(inputs_addr.data()), input_num, axis_, inputs_output_shape.data(),
output_shape.size(), output_addr);
return RET_OK;
}
} // namespace mindspore::kernel



+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/concat.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class ConcatCPUKernel : public ConcatBaseCPUKernel {
public:
ConcatCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConcatBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}

~ConcatCPUKernel() = default;

@@ -42,4 +43,3 @@ class ConcatCPUKernel : public ConcatBaseCPUKernel {
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONCAT_H_


+ 17
- 6
mindspore/lite/src/runtime/kernel/arm/fp32/convolution.cc View File

@@ -29,6 +29,7 @@ using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::lite::RET_INFER_INVALID;
using mindspore::schema::PrimitiveType_Conv2D;

namespace mindspore::kernel {
@@ -136,6 +137,10 @@ void ConvolutionCPUKernel::ConfigInputOutput() {
}

int ConvolutionCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed.";
@@ -204,6 +209,11 @@ int ConvolutionImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ConvolutionCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input_tensor = inputs_.at(kInputIndex);
auto ori_input_data = input_tensor->Data();
int in_batch = conv_param_->input_batch_;
@@ -223,7 +233,7 @@ int ConvolutionCPUKernel::Run() {
kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2D);
auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
@@ -245,20 +255,21 @@ kernel::LiteKernel *CpuConvFp32KernelCreator(const std::vector<lite::tensor::Ten
kernel::LiteKernel *kernel;
if (kernel_h == 1 && kernel_w == 1) {
// kernel = new (std::nothrow) kernel::Convolution1x1CPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx, primitive);
} else if (kernel_h == 3 && kernel_w == 3 && stride_h == 1 && stride_w == 1 && dilation_h == 1 && dilation_w == 1) {
kernel = new (std::nothrow) kernel::Convolution3x3CPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::Convolution3x3CPUKernel(opParameter, inputs, outputs, ctx, primitive);
} else if (use_winograd) {
kernel = new (std::nothrow) kernel::ConvolutionWinogradCPUKernel(opParameter, inputs, outputs, ctx, out_unit);
kernel =
new (std::nothrow) kernel::ConvolutionWinogradCPUKernel(opParameter, inputs, outputs, ctx, primitive, out_unit);
} else {
kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::ConvolutionCPUKernel(opParameter, inputs, outputs, ctx, primitive);
}
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
if (ret != RET_OK && ret != RET_INFER_INVALID) {
delete kernel;
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution.h View File

@@ -27,8 +27,9 @@ namespace mindspore::kernel {
class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
public:
ConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionCPUKernel() override {
if (packed_input_ != nullptr) {
free(packed_input_);


+ 9
- 0
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.cc View File

@@ -136,6 +136,10 @@ void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) {
}

int Convolution1x1CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
ConvolutionBaseCPUKernel::Init();
InitConv1x1MatmulParam();

@@ -178,6 +182,11 @@ int Convolution1x1Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int Convolution1x1CPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto src_in = reinterpret_cast<float *>(inputs_[0]->Data());
auto src_out = reinterpret_cast<float *>(outputs_[0]->Data());



+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1.h View File

@@ -34,8 +34,9 @@ namespace mindspore::kernel {
class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
public:
Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {
matmul_param_ = new MatMulParameter();
}
~Convolution1x1CPUKernel();


+ 9
- 0
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.cc View File

@@ -166,6 +166,10 @@ void Convolution3x3CPUKernel::ConfigInputOutput() {
}

int Convolution3x3CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed.";
@@ -237,6 +241,11 @@ int Convolution3x3Impl(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int Convolution3x3CPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input_tensor = inputs_.at(kInputIndex);
auto ori_input_data = input_tensor->Data();
int in_batch = conv_param_->input_batch_;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_3x3.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class Convolution3x3CPUKernel : public ConvolutionBaseCPUKernel {
public:
Convolution3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~Convolution3x3CPUKernel() override {
if (transformed_filter_addr_ != nullptr) {
free(transformed_filter_addr_);


+ 15
- 4
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.cc View File

@@ -25,6 +25,7 @@ using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::lite::RET_INFER_INVALID;
using mindspore::schema::PrimitiveType_DepthwiseConv2D;

namespace mindspore::kernel {
@@ -86,6 +87,10 @@ int ConvolutionDepthwiseCPUKernel::InitBuffer() {
}

int ConvolutionDepthwiseCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
// conv base init
ConvolutionBaseCPUKernel::Init();

@@ -144,6 +149,11 @@ int ConvDwRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ConvolutionDepthwiseCPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
@@ -164,7 +174,7 @@ int ConvolutionDepthwiseCPUKernel::Run() {
packed_output_ = output_addr;
}

auto ret = LiteBackendParallelLaunch(ConvDwRun, this, conv_param_->thread_num_);
ret = LiteBackendParallelLaunch(ConvDwRun, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDwRun error: error_code[" << ret << "]";
return RET_ERROR;
@@ -180,11 +190,11 @@ int ConvolutionDepthwiseCPUKernel::Run() {
kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D);
kernel::LiteKernel *kernel;
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx);
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx, primitive);
// auto param = reinterpret_cast<ConvParameter *>(opParameter);
// if (param->kernel_h_ == 3 && param->kernel_w_ == 3 && param->stride_h_ == 1 && param->stride_w_ == 1 &&
// param->dilation_h_ == 1 && param->dilation_w_ == 1) {
@@ -192,12 +202,13 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::T
// } else {
// kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx);
// }

if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
if (ret != RET_OK && ret != RET_INFER_INVALID) {
delete kernel;
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
public:
ConvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionDepthwiseCPUKernel() override {
delete sliding_;
free(packed_weight_);
@@ -55,4 +56,3 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
} // namespace mindspore::kernel

#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_DEPTHWISE_H_


+ 10
- 1
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.cc View File

@@ -100,6 +100,10 @@ int ConvolutionDepthwise3x3CPUKernel::InitBuffer() {
}

int ConvolutionDepthwise3x3CPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
// conv base init
ConvolutionBaseCPUKernel::Init();

@@ -164,6 +168,11 @@ int ConvDw3x3Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
}

int ConvolutionDepthwise3x3CPUKernel::Run() {
auto ret = Prepare();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Prepare failed.";
return ret;
}
if (conv_param_->input_channel_ != conv_param_->output_channel_) {
MS_LOG(ERROR) << "Only support input channel equals output channel.";
return RET_ERROR;
@@ -184,7 +193,7 @@ int ConvolutionDepthwise3x3CPUKernel::Run() {
packed_output_ = output_addr;
}

auto ret = LiteBackendParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_);
ret = LiteBackendParallelLaunch(ConvDw3x3Run, this, conv_param_->thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]";
return RET_ERROR;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h View File

@@ -26,8 +26,9 @@ namespace mindspore::kernel {
class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel {
public:
ConvolutionDepthwise3x3CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
const lite::Primitive *primitive)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}

~ConvolutionDepthwise3x3CPUKernel() override {
free(packed_weight_);


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.cc View File

@@ -135,11 +135,12 @@ int ConvolutionGradFilterCPUKernel::Run() {
kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc,
const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradFilter);

auto *kernel = new (std::nothrow) ConvolutionGradFilterCPUKernel(opParameter, inputs, outputs);
auto *kernel = new (std::nothrow) ConvolutionGradFilterCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr);

auto ret = kernel->Init();


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_filter.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class ConvolutionGradFilterCPUKernel : public LiteKernel {
public:
explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionGradFilterCPUKernel() override { delete workspace; }

int Init() override;


+ 3
- 3
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.cc View File

@@ -23,9 +23,9 @@

using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::schema::PrimitiveType_Conv2DGradInput;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Conv2DGradInput;

namespace mindspore::kernel {
int ConvolutionGradInputCPUKernel::Init() {
@@ -115,11 +115,11 @@ int ConvolutionGradInputCPUKernel::Run() {
kernel::LiteKernel *CpuConvGradInputFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, const lite::Context *ctx,
const kernel::KernelKey &desc) {
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DGradInput);

auto *kernel = new (std::nothrow) ConvolutionGradInputCPUKernel(opParameter, inputs, outputs);
auto *kernel = new (std::nothrow) ConvolutionGradInputCPUKernel(opParameter, inputs, outputs, ctx, primitive);
MS_ASSERT(kernel != nullptr);

auto ret = kernel->Init();


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_grad_input.h View File

@@ -25,8 +25,9 @@ namespace mindspore::kernel {
class ConvolutionGradInputCPUKernel : public LiteKernel {
public:
explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs)
: LiteKernel(parameter, inputs, outputs) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~ConvolutionGradInputCPUKernel() override { delete workspace; }

int Init() override;


+ 9
- 0
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.cc View File

@@ -247,6 +247,10 @@ int ConvolutionWinogradCPUKernel::ConfigInputOutput() {
}

int ConvolutionWinogradCPUKernel::Init() {
if (context_->infer_shape_interrupt_ && !context_->running_) {
SetNeedReInit();
return RET_OK;
}
auto ret = ConvolutionBaseCPUKernel::Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "ConvolutionBase init failed.";
@@ -339,6 +343,11 @@ int ConvolutionWinogradImpl(int task_id, LiteParallelGroupEnv *penv, void *cdata
}

int ConvolutionWinogradCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input_tensor = inputs_.at(kInputIndex);
auto ori_input_data = input_tensor->Data();
int in_batch = conv_param_->input_batch_;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd.h View File

@@ -28,8 +28,9 @@ namespace mindspore::kernel {
class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
public:
ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, int output_unit)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), output_unit_(output_unit) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive, int output_unit)
: ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(output_unit) {}
~ConvolutionWinogradCPUKernel() override {
if (tmp_data_ != nullptr) {
free(tmp_data_);


+ 7
- 10
mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc View File

@@ -40,15 +40,7 @@ int CropLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) {
}
} // namespace

int CropCPUKernel::Init() {
schema::Format input0_format = inputs_[0]->GetFormat();
if (input0_format != schema::Format_NCHW && input0_format != schema::Format_NHWC) {
MS_LOG(ERROR) << "Unsupport format " << input0_format;
return RET_FORMAT_ERR;
}
outputs_[0]->SetFormat(input0_format);
return RET_OK;
}
int CropCPUKernel::Init() { return RET_OK; }

int CropCPUKernel::CropParallelRun(int thread_id) {
auto input = inputs_[0];
@@ -61,6 +53,11 @@ int CropCPUKernel::CropParallelRun(int thread_id) {
}

int CropCPUKernel::Run() {
auto prepare_ret = Prepare();
if (prepare_ret != RET_OK) {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret;
}
auto input = inputs_[0];
auto output = outputs_[0];
auto param = reinterpret_cast<CropParameter *>(opParameter);
@@ -71,7 +68,7 @@ int CropCPUKernel::Run() {
return RET_OK;
}

int ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_);
auto ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Crop launch fail!ret: " << ret;
return RET_ERROR;


+ 3
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/crop.h View File

@@ -24,8 +24,9 @@ namespace mindspore::kernel {
class CropCPUKernel : public CropBaseCPUKernel {
public:
CropCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx)
: CropBaseCPUKernel(parameter, inputs, outputs, ctx) {}
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const lite::Primitive *primitive)
: CropBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~CropCPUKernel() = default;
int Init() override;
int ReSize() override { return 0; }


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save