| @@ -147,7 +147,7 @@ class LogWriter { | |||||
| LogWriter(const LocationInfo &location, MsLogLevel log_level, SubModuleId submodule, | LogWriter(const LocationInfo &location, MsLogLevel log_level, SubModuleId submodule, | ||||
| ExceptionType excp_type = NoExceptionType) | ExceptionType excp_type = NoExceptionType) | ||||
| : location_(location), log_level_(log_level), submodule_(submodule), exception_type_(excp_type) {} | |||||
| : location_(location), log_level_(log_level), exception_type_(excp_type) {} | |||||
| ~LogWriter() = default; | ~LogWriter() = default; | ||||
| void operator<(const LogStream &stream) const noexcept __attribute__((visibility("default"))); | void operator<(const LogStream &stream) const noexcept __attribute__((visibility("default"))); | ||||
| @@ -161,7 +161,6 @@ class LogWriter { | |||||
| LocationInfo location_; | LocationInfo location_; | ||||
| MsLogLevel log_level_; | MsLogLevel log_level_; | ||||
| SubModuleId submodule_; | |||||
| ExceptionType exception_type_; | ExceptionType exception_type_; | ||||
| inline static ExceptionHandler exception_handler_ = nullptr; | inline static ExceptionHandler exception_handler_ = nullptr; | ||||
| @@ -65,22 +65,21 @@ set(CMAKE_VERBOSE_MAKEFILE on) | |||||
| add_compile_definitions(USE_ANDROID_LOG) | add_compile_definitions(USE_ANDROID_LOG) | ||||
| add_compile_definitions(NO_DLIB) | add_compile_definitions(NO_DLIB) | ||||
| add_compile_options(-fPIC) | add_compile_options(-fPIC) | ||||
| if (NOT PLATFORM_ARM64 AND NOT PLATFORM_ARM32) | |||||
| if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default") | |||||
| else () | |||||
| ## enable for binscope for release | |||||
| set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations ${CMAKE_C_FLAGS}") | |||||
| set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations ${CMAKE_CXX_FLAGS}") | |||||
| if (NOT WIN32) | |||||
| set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}") | |||||
| set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}") | |||||
| endif() | |||||
| string(REPLACE " -g " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |||||
| endif () | |||||
| if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default") | |||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default") | |||||
| else () | |||||
| ## enable for binscope for release | |||||
| set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_C_FLAGS}") | |||||
| set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}") | |||||
| if (NOT WIN32) | |||||
| set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}") | |||||
| set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}") | |||||
| endif() | |||||
| string(REPLACE " -g " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |||||
| endif () | endif () | ||||
| if (BUILD_DEVICE) | if (BUILD_DEVICE) | ||||
| @@ -51,6 +51,8 @@ void TileOneDimension(float *inData, float *outData, int dim, size_t ndim, int * | |||||
| int *outStrides, int *multiple); | int *outStrides, int *multiple); | ||||
| void ComputeStrides(int *shape, int *strides, int ndim); | void ComputeStrides(int *shape, int *strides, int ndim); | ||||
| void CalcMultiplesAndStrides(ArithmeticParameter *param); | |||||
| void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_data1, ArithmeticParameter *param); | void TileDimensions(float *data0, float *data1, float *tile_data0, float *tile_data1, ArithmeticParameter *param); | ||||
| void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | void TileDimensionsUint8(uint8_t *data0, uint8_t *data1, uint8_t *tile_data0, uint8_t *tile_data1, | ||||
| ArithmeticParameter *param); | ArithmeticParameter *param); | ||||
| @@ -395,7 +395,6 @@ void Conv3x3Fp16(float16_t *input_data, float16_t *transed_weight, const float16 | |||||
| int input_batch = conv_param->input_batch_; | int input_batch = conv_param->input_batch_; | ||||
| for (int batch = 0; batch < input_batch; batch++) { | for (int batch = 0; batch < input_batch; batch++) { | ||||
| int in_batch_offset = batch * ic4 * C4NUM * conv_param->input_h_ * conv_param->input_w_; | |||||
| int tmp_out_batch_offset = batch * oc8 * C8NUM * out_w_block * out_h_block * output_unit * output_unit; | int tmp_out_batch_offset = batch * oc8 * C8NUM * out_w_block * out_h_block * output_unit * output_unit; | ||||
| for (int thread_id = task_id; thread_id < output_tile_count; thread_id += thread_count) { | for (int thread_id = task_id; thread_id < output_tile_count; thread_id += thread_count) { | ||||
| int start_index = thread_id * tile_num; | int start_index = thread_id * tile_num; | ||||
| @@ -55,7 +55,6 @@ void Im2ColPackUnitFp16(float16_t *input_data, ConvParameter *conv_param, float1 | |||||
| int in_w = conv_param->input_w_; | int in_w = conv_param->input_w_; | ||||
| int out_w = conv_param->output_w_; | int out_w = conv_param->output_w_; | ||||
| int channel_block = UP_DIV(in_channel, 4); | int channel_block = UP_DIV(in_channel, 4); | ||||
| int kernel_plane = kernel_h * kernel_w; | |||||
| for (int i = 0; i < real_cal_num; i++) { | for (int i = 0; i < real_cal_num; i++) { | ||||
| int block_start = block_index + i; | int block_start = block_index + i; | ||||
| @@ -607,7 +607,7 @@ void WinogradInputTransformFp16(const float16_t *input_data, float16_t *trans_in | |||||
| for (int j = 0; j < (interval_x_e - interval_x_s); j++) { | for (int j = 0; j < (interval_x_e - interval_x_s); j++) { | ||||
| int src_x_offset = src_y_offset + j * ic8 * C8NUM; | int src_x_offset = src_y_offset + j * ic8 * C8NUM; | ||||
| int dst_x_offset = dst_y_offset + j * C8NUM; | int dst_x_offset = dst_y_offset + j * C8NUM; | ||||
| float16_t *src_addr = input_data + src_x_offset; | |||||
| const float16_t *src_addr = input_data + src_x_offset; | |||||
| float16_t *dst_addr = tmp_data + dst_x_offset; | float16_t *dst_addr = tmp_data + dst_x_offset; | ||||
| #ifdef ENABLE_NEON | #ifdef ENABLE_NEON | ||||
| vst1q_f16(dst_addr, vld1q_f16(src_addr)); | vst1q_f16(dst_addr, vld1q_f16(src_addr)); | ||||
| @@ -28,7 +28,7 @@ void IndirectGemmInt8(int8_t *dst, int32_t *tmp_dst, const int8_t *src, const in | |||||
| int32_t out_zp = conv_param->conv_quant_arg_.output_quant_args_[0].zp_; | int32_t out_zp = conv_param->conv_quant_arg_.output_quant_args_[0].zp_; | ||||
| int32_t act_min = conv_param->conv_quant_arg_.out_act_min_[0]; | int32_t act_min = conv_param->conv_quant_arg_.out_act_min_[0]; | ||||
| int32_t act_max = conv_param->conv_quant_arg_.out_act_max_[0]; | int32_t act_max = conv_param->conv_quant_arg_.out_act_max_[0]; | ||||
| int oc4 = UP_DIV(output_channel, C4NUM); | |||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| size_t asymmetric = conv_param->conv_quant_arg_.asymmetric_ & FILTER_ASYMMETRIC; | size_t asymmetric = conv_param->conv_quant_arg_.asymmetric_ & FILTER_ASYMMETRIC; | ||||
| size_t per_channel = conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL; | size_t per_channel = conv_param->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL; | ||||
| @@ -36,6 +36,7 @@ void IndirectGemmInt8(int8_t *dst, int32_t *tmp_dst, const int8_t *src, const in | |||||
| output_channel * sizeof(int8_t), input_sum, act_min, act_max, out_zp, out_multiplier, | output_channel * sizeof(int8_t), input_sum, act_min, act_max, out_zp, out_multiplier, | ||||
| shift_before, shift_after, asymmetric, per_channel); | shift_before, shift_after, asymmetric, per_channel); | ||||
| #else | #else | ||||
| int oc4 = UP_DIV(output_channel, C4NUM); | |||||
| int tile_num = conv_param->tile_num_; | int tile_num = conv_param->tile_num_; | ||||
| int plane_c4 = UP_DIV(kernel_plane, C4NUM); | int plane_c4 = UP_DIV(kernel_plane, C4NUM); | ||||
| for (int oc = 0; oc < output_channel; oc++) { | for (int oc = 0; oc < output_channel; oc++) { | ||||
| @@ -63,16 +63,17 @@ void RowMajor2Row16x4MajorInt8(void *src_ptr, void *dst_ptr, int row, int col) { | |||||
| for (int ri = 0; ri < row_4div; ri += C4NUM) { | for (int ri = 0; ri < row_4div; ri += C4NUM) { | ||||
| for (int ci = 0; ci < col_16div; ci += C16NUM) { | for (int ci = 0; ci < col_16div; ci += C16NUM) { | ||||
| #ifdef ENABLE_ARM64 | #ifdef ENABLE_ARM64 | ||||
| size_t col_offset = col; | |||||
| int8_t *src_c = src_r + ci; | int8_t *src_c = src_r + ci; | ||||
| int8_t *dst_c = dst_r + ci * C4NUM; | int8_t *dst_c = dst_r + ci * C4NUM; | ||||
| asm volatile( | asm volatile( | ||||
| "mov x10, %[src_c] \n" | "mov x10, %[src_c] \n" | ||||
| "mov x11, %[dst_c] \n" | "mov x11, %[dst_c] \n" | ||||
| "ld1 {v0.16b}, [x10], %[col]\n" | |||||
| "ld1 {v1.16b}, [x10], %[col]\n" | |||||
| "ld1 {v2.16b}, [x10], %[col]\n" | |||||
| "ld1 {v3.16b}, [x10], %[col]\n" | |||||
| "ld1 {v0.16b}, [x10], %[col_offset]\n" | |||||
| "ld1 {v1.16b}, [x10], %[col_offset]\n" | |||||
| "ld1 {v2.16b}, [x10], %[col_offset]\n" | |||||
| "ld1 {v3.16b}, [x10], %[col_offset]\n" | |||||
| "st1 {v0.16b}, [x11], #16\n" | "st1 {v0.16b}, [x11], #16\n" | ||||
| "st1 {v1.16b}, [x11], #16\n" | "st1 {v1.16b}, [x11], #16\n" | ||||
| @@ -80,7 +81,7 @@ void RowMajor2Row16x4MajorInt8(void *src_ptr, void *dst_ptr, int row, int col) { | |||||
| "st1 {v3.16b}, [x11], #16\n" | "st1 {v3.16b}, [x11], #16\n" | ||||
| : | : | ||||
| : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ col ] "r"(col) | |||||
| : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ col_offset ] "r"(col_offset) | |||||
| : "x10", "x11", "v0", "v1", "v2", "v3"); | : "x10", "x11", "v0", "v1", "v2", "v3"); | ||||
| #else | #else | ||||
| MatrixPack4x16UnitInt8(src_r + ci, dst_r + ci * C4NUM, C4NUM, C16NUM, col); | MatrixPack4x16UnitInt8(src_r + ci, dst_r + ci * C4NUM, C4NUM, C16NUM, col); | ||||
| @@ -1225,9 +1225,9 @@ void Conv3x3Uint8OutputUnit(const int32_t *gemm_out, const int32_t *bias_data, i | |||||
| ls = vld1q_s32(left_shift); | ls = vld1q_s32(left_shift); | ||||
| rs = vld1q_s32(right_shift); | rs = vld1q_s32(right_shift); | ||||
| } else { | } else { | ||||
| out_multiplier = vdupq_n_s32(quant_multiplier); | |||||
| ls = vdupq_n_s32(left_shift); | |||||
| rs = vdupq_n_s32(right_shift); | |||||
| out_multiplier = vdupq_n_s32(quant_multiplier[0]); | |||||
| ls = vdupq_n_s32(left_shift[0]); | |||||
| rs = vdupq_n_s32(right_shift[0]); | |||||
| } | } | ||||
| int32x4_t out_zp = vdupq_n_s32(output_zp); | int32x4_t out_zp = vdupq_n_s32(output_zp); | ||||
| int32x4_t output_min = vdupq_n_s32(out_min); | int32x4_t output_min = vdupq_n_s32(out_min); | ||||
| @@ -43,7 +43,7 @@ std::vector<size_t> GetGraphInputNodes(const schema::MetaGraph *meta_graph) { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return std::move(ret); | |||||
| return ret; | |||||
| } | } | ||||
| std::vector<size_t> GetGraphOutputNodes(const schema::MetaGraph *meta_graph) { | std::vector<size_t> GetGraphOutputNodes(const schema::MetaGraph *meta_graph) { | ||||
| @@ -64,7 +64,7 @@ std::vector<size_t> GetGraphOutputNodes(const schema::MetaGraph *meta_graph) { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return std::move(ret); | |||||
| return ret; | |||||
| } | } | ||||
| // NODE_ID OpNode::ID() { return id; } | // NODE_ID OpNode::ID() { return id; } | ||||
| @@ -54,7 +54,10 @@ int Resize::InferShape(std::vector<lite::tensor::Tensor *> inputs_, std::vector< | |||||
| if (input == nullptr) { | if (input == nullptr) { | ||||
| return 1; | return 1; | ||||
| } | } | ||||
| MS_ASSERT(input->shape().size() == kInputRank); | |||||
| if (input->shape().size() != kInputRank) { | |||||
| MS_LOG(ERROR) << "Size of input shape is wrong."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto output = outputs_.front(); | auto output = outputs_.front(); | ||||
| if (output == nullptr) { | if (output == nullptr) { | ||||
| @@ -40,8 +40,14 @@ int PriorBoxCPUKernel::Init() { | |||||
| return RET_NULL_PTR; | return RET_NULL_PTR; | ||||
| } | } | ||||
| MS_ASSERT(in_tensors_.size() == kInputNum); | |||||
| MS_ASSERT(out_tensors_.size() == kOutputNum); | |||||
| if (in_tensors_.size() != kInputNum) { | |||||
| MS_LOG(ERROR) << "Size of input tensors is wrong."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (in_tensors_.size() != kOutputNum) { | |||||
| MS_LOG(ERROR) << "Size of input tensors is wrong."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -46,7 +46,6 @@ class ArithmeticFP16CPUKernel : public LiteKernel { | |||||
| private: | private: | ||||
| void FreeTmpBuffer(); | void FreeTmpBuffer(); | ||||
| int break_pos_; | int break_pos_; | ||||
| int outside_; | |||||
| int out_thread_stride_; | int out_thread_stride_; | ||||
| int out_count_; | int out_count_; | ||||
| float16_t *tile_data0_ = nullptr; | float16_t *tile_data0_ = nullptr; | ||||
| @@ -44,7 +44,6 @@ class ReduceFp16CPUKernel : public ReduceBaseCPUKernel { | |||||
| private: | private: | ||||
| Reducer reducer_ = nullptr; | Reducer reducer_ = nullptr; | ||||
| std::vector<float16_t *> data_buffers_; | std::vector<float16_t *> data_buffers_; | ||||
| const float *src_data_ = nullptr; | |||||
| float *dst_data_ = nullptr; | float *dst_data_ = nullptr; | ||||
| float16_t *fp16_input_ = nullptr; | float16_t *fp16_input_ = nullptr; | ||||
| const float16_t *fp16_src_data_ = nullptr; | const float16_t *fp16_src_data_ = nullptr; | ||||
| @@ -111,8 +111,8 @@ int SplitFp16CPUKernel::Run() { | |||||
| context_->allocator->Free(output_ptr_[i]); | context_->allocator->Free(output_ptr_[i]); | ||||
| output_ptr_[i] = nullptr; | output_ptr_[i] = nullptr; | ||||
| } | } | ||||
| return RET_OK; | |||||
| } | } | ||||
| return RET_OK; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuSplitFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuSplitFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| @@ -30,10 +30,6 @@ using mindspore::lite::RET_OP_EXECUTE_FAILURE; | |||||
| using mindspore::schema::PrimitiveType_Transpose; | using mindspore::schema::PrimitiveType_Transpose; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr int kTransposeInputNum = 1; | |||||
| constexpr int kTransposeOutputNum = 1; | |||||
| } // namespace | |||||
| int TransposeFp16CPUKernel::Init() { | int TransposeFp16CPUKernel::Init() { | ||||
| TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_); | TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_); | ||||
| num_unit_ = static_cast<int>(in_tensors_[kInputIndex]->shape().at(param->perm_[kNHWC_H])); | num_unit_ = static_cast<int>(in_tensors_[kInputIndex]->shape().at(param->perm_[kNHWC_H])); | ||||
| @@ -46,7 +46,7 @@ class ArithmeticSelfCPUKernel : public LiteKernel { | |||||
| explicit ArithmeticSelfCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | explicit ArithmeticSelfCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | |||||
| switch (parameter->type_) { | switch (parameter->type_) { | ||||
| case PrimitiveType_Abs: | case PrimitiveType_Abs: | ||||
| arithmeticSelf_run_ = ElementAbs; | arithmeticSelf_run_ = ElementAbs; | ||||
| @@ -102,7 +102,6 @@ class ArithmeticSelfCPUKernel : public LiteKernel { | |||||
| size_t data_size_; | size_t data_size_; | ||||
| ArithmeticSelfParameter *arithmeticSelfParameter_; | ArithmeticSelfParameter *arithmeticSelfParameter_; | ||||
| ArithmeticSelfRun arithmeticSelf_run_; | ArithmeticSelfRun arithmeticSelf_run_; | ||||
| const Context *ctx_; | |||||
| int thread_count_; | int thread_count_; | ||||
| float *in_ptr_; | float *in_ptr_; | ||||
| float *out_ptr_; | float *out_ptr_; | ||||
| @@ -28,12 +28,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_ConstantOfShape; | using mindspore::schema::PrimitiveType_ConstantOfShape; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr int kInputNum = 1; | |||||
| constexpr int kOutputNum = 1; | |||||
| } // namespace | |||||
| int ConstantOfShapeCPUKernel::Init() { return RET_OK; } | int ConstantOfShapeCPUKernel::Init() { return RET_OK; } | ||||
| int ConstantOfShapeCPUKernel::ReSize() { return RET_OK; } | int ConstantOfShapeCPUKernel::ReSize() { return RET_OK; } | ||||
| @@ -32,7 +32,7 @@ class ExpandDimsCPUKernel : public LiteKernel { | |||||
| ExpandDimsCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ExpandDimsCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {} | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {} | |||||
| ~ExpandDimsCPUKernel() override = default; | ~ExpandDimsCPUKernel() override = default; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -46,7 +46,6 @@ class ExpandDimsCPUKernel : public LiteKernel { | |||||
| size_t data_size_; | size_t data_size_; | ||||
| float *in_ptr_; | float *in_ptr_; | ||||
| float *out_ptr_; | float *out_ptr_; | ||||
| const Context *ctx_; | |||||
| int thread_count_; | int thread_count_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -28,12 +28,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Fill; | using mindspore::schema::PrimitiveType_Fill; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr int kInputNum = 1; | |||||
| constexpr int kOutputNum = 1; | |||||
| } // namespace | |||||
| int FillCPUKernel::Init() { | int FillCPUKernel::Init() { | ||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -30,7 +30,7 @@ class FillCPUKernel : public LiteKernel { | |||||
| FillCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | FillCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {} | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {} | |||||
| ~FillCPUKernel() override = default; | ~FillCPUKernel() override = default; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -44,7 +44,6 @@ class FillCPUKernel : public LiteKernel { | |||||
| int data_size_; | int data_size_; | ||||
| float src_data_; | float src_data_; | ||||
| float *out_ptr_; | float *out_ptr_; | ||||
| const Context *ctx_; | |||||
| int thread_count_; | int thread_count_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -32,7 +32,7 @@ class GatherNdCPUKernel : public LiteKernel { | |||||
| GatherNdCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | GatherNdCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {} | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {} | |||||
| ~GatherNdCPUKernel() override; | ~GatherNdCPUKernel() override; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -48,7 +48,6 @@ class GatherNdCPUKernel : public LiteKernel { | |||||
| int *in_offset_ = nullptr; | int *in_offset_ = nullptr; | ||||
| float *in_ptr_; | float *in_ptr_; | ||||
| float *out_ptr_; | float *out_ptr_; | ||||
| const Context *ctx_; | |||||
| int thread_count_; | int thread_count_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -30,7 +30,6 @@ class PowerCPUKernel : public PowerBaseCPUKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : PowerBaseCPUKernel(param, inputs, outputs, ctx, primitive), | : PowerBaseCPUKernel(param, inputs, outputs, ctx, primitive), | ||||
| ctx_(ctx), | |||||
| thread_count_(ctx->thread_num_), | thread_count_(ctx->thread_num_), | ||||
| power_(reinterpret_cast<PowerParameter *>(op_parameter_)->power_), | power_(reinterpret_cast<PowerParameter *>(op_parameter_)->power_), | ||||
| scale_(reinterpret_cast<PowerParameter *>(op_parameter_)->scale_), | scale_(reinterpret_cast<PowerParameter *>(op_parameter_)->scale_), | ||||
| @@ -43,7 +42,6 @@ class PowerCPUKernel : public PowerBaseCPUKernel { | |||||
| int RunImpl(int task_id); | int RunImpl(int task_id); | ||||
| private: | private: | ||||
| const lite::Context *ctx_; | |||||
| int thread_count_; | int thread_count_; | ||||
| float power_; | float power_; | ||||
| float scale_; | float scale_; | ||||
| @@ -27,12 +27,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Range; | using mindspore::schema::PrimitiveType_Range; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr int kInputNum = 0; | |||||
| constexpr int kOutputNum = 1; | |||||
| } // namespace | |||||
| int RangeCPUKernel::Init() { return RET_OK; } | int RangeCPUKernel::Init() { return RET_OK; } | ||||
| int RangeCPUKernel::ReSize() { return RET_OK; } | int RangeCPUKernel::ReSize() { return RET_OK; } | ||||
| @@ -27,12 +27,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Rank; | using mindspore::schema::PrimitiveType_Rank; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr int kInputNum = 1; | |||||
| constexpr int kOutputNum = 1; | |||||
| } // namespace | |||||
| int RankCPUKernel::Init() { return RET_OK; } | int RankCPUKernel::Init() { return RET_OK; } | ||||
| int RankCPUKernel::ReSize() { return RET_OK; } | int RankCPUKernel::ReSize() { return RET_OK; } | ||||
| @@ -31,7 +31,7 @@ class ReverseCPUKernel : public LiteKernel { | |||||
| ReverseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ReverseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) {} | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {} | |||||
| ~ReverseCPUKernel() { | ~ReverseCPUKernel() { | ||||
| if (tmp_ != nullptr) { | if (tmp_ != nullptr) { | ||||
| free(tmp_); | free(tmp_); | ||||
| @@ -52,7 +52,6 @@ class ReverseCPUKernel : public LiteKernel { | |||||
| int strides_[REVERSE_STRIDE_MAX_SIZE]; | int strides_[REVERSE_STRIDE_MAX_SIZE]; | ||||
| int inCount_[REVERSE_STRIDE_MAX_SIZE]; | int inCount_[REVERSE_STRIDE_MAX_SIZE]; | ||||
| int outCount_[REVERSE_STRIDE_MAX_SIZE]; | int outCount_[REVERSE_STRIDE_MAX_SIZE]; | ||||
| const Context *ctx_; | |||||
| int thread_count_; | int thread_count_; | ||||
| int *tmp_ = nullptr; | int *tmp_ = nullptr; | ||||
| float *in_ptr_; | float *in_ptr_; | ||||
| @@ -30,8 +30,6 @@ using mindspore::schema::PrimitiveType_ScatterND; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | namespace { | ||||
| constexpr int kScatterNDInputNum = 3; | |||||
| constexpr int kScatterNDOutputNum = 1; | |||||
| constexpr int kScatterShapeIndex = 0; | constexpr int kScatterShapeIndex = 0; | ||||
| constexpr int kScatterIndicesIndex = 1; | constexpr int kScatterIndicesIndex = 1; | ||||
| constexpr int kScatterUpdateIndex = 2; | constexpr int kScatterUpdateIndex = 2; | ||||
| @@ -26,10 +26,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Shape; | using mindspore::schema::PrimitiveType_Shape; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr int kShapeInputNum = 1; | |||||
| constexpr int kShapeOutputNum = 1; | |||||
| } // namespace | |||||
| int ShapeCPUKernel::Init() { return RET_OK; } | int ShapeCPUKernel::Init() { return RET_OK; } | ||||
| int ShapeCPUKernel::ReSize() { return RET_OK; } | int ShapeCPUKernel::ReSize() { return RET_OK; } | ||||
| @@ -27,11 +27,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Squeeze; | using mindspore::schema::PrimitiveType_Squeeze; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr int kSqueezeInputNum = 1; | |||||
| constexpr int kSqueezeOutputNum = 1; | |||||
| } // namespace | |||||
| int SqueezeCPUKernel::Init() { return RET_OK; } | int SqueezeCPUKernel::Init() { return RET_OK; } | ||||
| int SqueezeCPUKernel::ReSize() { return RET_OK; } | int SqueezeCPUKernel::ReSize() { return RET_OK; } | ||||
| @@ -29,10 +29,6 @@ using mindspore::lite::RET_OP_EXECUTE_FAILURE; | |||||
| using mindspore::schema::PrimitiveType_Transpose; | using mindspore::schema::PrimitiveType_Transpose; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr int kTransposeInputNum = 1; | |||||
| constexpr int kTransposeOutputNum = 1; | |||||
| } // namespace | |||||
| int TransposeCPUKernel::Init() { | int TransposeCPUKernel::Init() { | ||||
| TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_); | TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_); | ||||
| num_unit_ = static_cast<int>(in_tensors_[kInputIndex]->shape().at(param->perm_[kNHWC_H])); | num_unit_ = static_cast<int>(in_tensors_[kInputIndex]->shape().at(param->perm_[kNHWC_H])); | ||||
| @@ -27,9 +27,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_ZerosLike; | using mindspore::schema::PrimitiveType_ZerosLike; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| constexpr int kInputNum = 1; | |||||
| constexpr int kOutputNum = 1; | |||||
| int ZerosLikeCPUKernel::Init() { return RET_OK; } | int ZerosLikeCPUKernel::Init() { return RET_OK; } | ||||
| int ZerosLikeCPUKernel::Run() { | int ZerosLikeCPUKernel::Run() { | ||||
| @@ -92,7 +92,7 @@ int QuantizedAddCPUKernel::Run() { | |||||
| input0_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | input0_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | ||||
| input1_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | input1_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | ||||
| ArithmeticParameter tile_para = {0}; | |||||
| ArithmeticParameter tile_para; | |||||
| tile_para.ndim_ = out_tensors_.at(0)->shape().size(); | tile_para.ndim_ = out_tensors_.at(0)->shape().size(); | ||||
| for (size_t i = 0; i < tile_para.ndim_; i++) { | for (size_t i = 0; i < tile_para.ndim_; i++) { | ||||
| tile_para.in_shape0_[i] = in_tensors_.at(0)->DimensionSize(i); | tile_para.in_shape0_[i] = in_tensors_.at(0)->DimensionSize(i); | ||||
| @@ -45,7 +45,7 @@ class ArithmeticSelfInt8CPUKernel : public LiteKernel { | |||||
| explicit ArithmeticSelfInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | explicit ArithmeticSelfInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | |||||
| switch (parameter->type_) { | switch (parameter->type_) { | ||||
| case PrimitiveType_Round: | case PrimitiveType_Round: | ||||
| arithmeticSelf_run_ = Int8ElementRound; | arithmeticSelf_run_ = Int8ElementRound; | ||||
| @@ -98,7 +98,6 @@ class ArithmeticSelfInt8CPUKernel : public LiteKernel { | |||||
| size_t data_size_; | size_t data_size_; | ||||
| ArithmeticSelfParameter *para_; | ArithmeticSelfParameter *para_; | ||||
| ArithmeticSelfInt8Run arithmeticSelf_run_; | ArithmeticSelfInt8Run arithmeticSelf_run_; | ||||
| const Context *ctx_; | |||||
| int thread_count_; | int thread_count_; | ||||
| int8_t *in_ptr_; | int8_t *in_ptr_; | ||||
| int8_t *out_ptr_; | int8_t *out_ptr_; | ||||
| @@ -104,7 +104,7 @@ int DivInt8CPUKernel::Run() { | |||||
| } | } | ||||
| if (broadcast_) { | if (broadcast_) { | ||||
| ArithmeticParameter tile_para = {0}; | |||||
| ArithmeticParameter tile_para; | |||||
| tile_para.ndim_ = out_tensors_.at(0)->shape().size(); | tile_para.ndim_ = out_tensors_.at(0)->shape().size(); | ||||
| for (size_t i = 0; i < tile_para.ndim_; i++) { | for (size_t i = 0; i < tile_para.ndim_; i++) { | ||||
| tile_para.in_shape0_[i] = in_tensors_.at(0)->DimensionSize(i); | tile_para.in_shape0_[i] = in_tensors_.at(0)->DimensionSize(i); | ||||
| @@ -77,7 +77,7 @@ int MulInt8CPUKernel::Run() { | |||||
| input0_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | input0_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | ||||
| input1_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | input1_data_ = static_cast<int8_t *>(ctx_->allocator->Malloc(out_tensors_.at(0)->Size())); | ||||
| ArithmeticParameter tile_para = {0}; | |||||
| ArithmeticParameter tile_para; | |||||
| tile_para.ndim_ = out_tensors_.at(0)->shape().size(); | tile_para.ndim_ = out_tensors_.at(0)->shape().size(); | ||||
| for (size_t i = 0; i < tile_para.ndim_; i++) { | for (size_t i = 0; i < tile_para.ndim_; i++) { | ||||
| tile_para.in_shape0_[i] = in_tensors_.at(0)->DimensionSize(i); | tile_para.in_shape0_[i] = in_tensors_.at(0)->DimensionSize(i); | ||||
| @@ -30,12 +30,6 @@ using mindspore::lite::RET_NULL_PTR; | |||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| namespace { | |||||
| constexpr int kInputNum = 1; | |||||
| constexpr int kOutputNum = 1; | |||||
| constexpr size_t kRank = 4; | |||||
| } // namespace | |||||
| int ResizeInt8CPUKernel::Init() { | int ResizeInt8CPUKernel::Init() { | ||||
| auto ret = ResizeBaseCPUKernel::Init(); | auto ret = ResizeBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -128,7 +128,7 @@ int SubInt8CPUKernel::Run() { | |||||
| } | } | ||||
| if (broadcast_) { | if (broadcast_) { | ||||
| ArithmeticParameter tile_para = {0}; | |||||
| ArithmeticParameter tile_para; | |||||
| tile_para.ndim_ = out_tensors_.at(0)->shape().size(); | tile_para.ndim_ = out_tensors_.at(0)->shape().size(); | ||||
| for (size_t i = 0; i < tile_para.ndim_; i++) { | for (size_t i = 0; i < tile_para.ndim_; i++) { | ||||
| tile_para.in_shape0_[i] = in_tensors_.at(0)->DimensionSize(i); | tile_para.in_shape0_[i] = in_tensors_.at(0)->DimensionSize(i); | ||||
| @@ -30,7 +30,7 @@ class Unsqueezeint8CPUKernel : public LiteKernel { | |||||
| Unsqueezeint8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | Unsqueezeint8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), ctx_(ctx), thread_count_(ctx->thread_num_) { | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { | |||||
| Unsq_para_ = reinterpret_cast<UnSqueezeParameter *>(op_parameter_); | Unsq_para_ = reinterpret_cast<UnSqueezeParameter *>(op_parameter_); | ||||
| Unsq_para_->thread_count_ = op_parameter_->thread_num_; | Unsq_para_->thread_count_ = op_parameter_->thread_num_; | ||||
| } | } | ||||
| @@ -42,14 +42,12 @@ class Unsqueezeint8CPUKernel : public LiteKernel { | |||||
| int DoUnsqueeze(int task_id); | int DoUnsqueeze(int task_id); | ||||
| private: | private: | ||||
| UnSqueezeQuantArg *quant_Unsqueeze_parm_; | |||||
| UnSqueezeParameter *Unsq_para_; | UnSqueezeParameter *Unsq_para_; | ||||
| int thread_sz_count_; | int thread_sz_count_; | ||||
| int thread_sz_stride_; | int thread_sz_stride_; | ||||
| int data_size_; | int data_size_; | ||||
| float *in_ptr_; | float *in_ptr_; | ||||
| float *out_ptr_; | float *out_ptr_; | ||||
| const Context *ctx_; | |||||
| int thread_count_; | int thread_count_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -57,7 +57,6 @@ class SubGraphOpenCLKernel : public SubGraphKernel { | |||||
| std::vector<std::vector<kernel::LiteKernel *>> *out_kernels, bool is_from); | std::vector<std::vector<kernel::LiteKernel *>> *out_kernels, bool is_from); | ||||
| private: | private: | ||||
| SubGraphOpenCLParameter *subgraph_ocl_parameter_; | |||||
| lite::opencl::OpenCLAllocator *allocator_; | lite::opencl::OpenCLAllocator *allocator_; | ||||
| std::vector<lite::tensor::Tensor *> in_convert_tensors_; | std::vector<lite::tensor::Tensor *> in_convert_tensors_; | ||||
| std::vector<lite::tensor::Tensor *> out_convert_tensors_; | std::vector<lite::tensor::Tensor *> out_convert_tensors_; | ||||
| @@ -17,5 +17,10 @@ else() | |||||
| target_link_libraries(timeprofile mindspore-lite pthread) | target_link_libraries(timeprofile mindspore-lite pthread) | ||||
| endif() | endif() | ||||
| install(TARGETS timeprofile | |||||
| RUNTIME DESTINATION ${MAIN_DIR}/time_profile COMPONENT ${COMPONENT_NAME}) | |||||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | |||||
| install(TARGETS timeprofile | |||||
| RUNTIME DESTINATION ${MAIN_DIR}/time_profile COMPONENT ${COMPONENT_NAME}) | |||||
| else() | |||||
| install(TARGETS timeprofile | |||||
| RUNTIME DESTINATION ${MAIN_DIR}/time_profile COMPONENT ${RUN_X86_COMPONENT_NAME}) | |||||
| endif() | |||||