Merge pull request !4867 from wangshaocong/lite_cleantags/v0.7.0-beta
| @@ -64,14 +64,25 @@ set(CMAKE_VERBOSE_MAKEFILE on) | |||
| add_compile_definitions(USE_ANDROID_LOG) | |||
| add_compile_definitions(NO_DLIB) | |||
| add_compile_options(-fPIC) | |||
| if("${CMAKE_BUILD_TYPE}" STREQUAL "Release") | |||
| #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") | |||
| string(REPLACE "-g" " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |||
| endif() | |||
| if (NOT PLATFORM_ARM64 AND NOT PLATFORM_ARM32) | |||
| if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") | |||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DDebug -g") | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDebug -g") | |||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=default") | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=default") | |||
| else () | |||
| ## enable for binscope for release | |||
| set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations ${CMAKE_C_FLAGS}") | |||
| set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes -Wno-deprecated-declarations ${CMAKE_CXX_FLAGS}") | |||
| set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}") | |||
| set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}") | |||
| string(REPLACE " -g " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |||
| endif () | |||
| endif () | |||
| if (BUILD_DEVICE) | |||
| add_compile_definitions(BUILD_DEVICE) | |||
| endif() | |||
| endif () | |||
| if (SUPPORT_TRAIN) | |||
| add_compile_definitions(SUPPORT_TRAIN) | |||
| endif() | |||
| @@ -86,17 +97,17 @@ if (SUPPORT_GPU) | |||
| add_definitions(-DMS_OPENCL_PROFILE=false) | |||
| add_definitions(-DCL_HPP_TARGET_OPENCL_VERSION=200) | |||
| add_compile_definitions(SUPPORT_GPU) | |||
| if(OFFLINE_COMPILE) | |||
| if (OFFLINE_COMPILE) | |||
| add_compile_definitions(PROGRAM_WITH_IL) | |||
| endif() | |||
| endif () | |||
| include_directories(${TOP_DIR}/third_party/OpenCL-Headers) | |||
| include_directories(${TOP_DIR}/third_party/OpenCL-CLHPP/include) | |||
| endif() | |||
| endif () | |||
| if (WIN32) | |||
| add_compile_definitions(LITE_EXPORTS) | |||
| add_compile_definitions(BUILDING_DLL) | |||
| endif() | |||
| endif () | |||
| set(ANF_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../core/ir/meta_tensor.cc | |||
| @@ -110,26 +121,26 @@ if (BUILD_CONVERTER) | |||
| MESSAGE(FATAL_ERROR "Cannot build converter in arm platform") | |||
| endif() | |||
| find_package(Python3 3.7 COMPONENTS Interpreter Development) | |||
| if(Python3_FOUND) | |||
| if (Python3_FOUND) | |||
| set(PYTHON_INCLUDE_DIRS "${Python3_INCLUDE_DIRS}") | |||
| set(PYTHON_LIBRARIES "${Python3_LIBRARIES}") | |||
| if (WIN32) | |||
| if (Python3_DIR) | |||
| message("Python3_DIR set already: " ${Python3_DIR}) | |||
| else() | |||
| else () | |||
| string(LENGTH ${PYTHON_LIBRARIES} PYTHON_LIBRARIES_LEN) | |||
| string(LENGTH "libpythonxx.a" Python3_NAME_LEN) | |||
| math(EXPR Python3_DIR_LEN ${PYTHON_LIBRARIES_LEN}-${Python3_NAME_LEN}) | |||
| string(SUBSTRING ${Python3_LIBRARIES} 0 ${Python3_DIR_LEN} Python3_DIR) | |||
| message("Python3_DIR: " ${Python3_DIR}) | |||
| endif() | |||
| endif () | |||
| link_directories(${Python3_DIR}) | |||
| endif() | |||
| else() | |||
| endif () | |||
| else () | |||
| find_python_package(py_inc py_lib) | |||
| set(PYTHON_INCLUDE_DIRS "${py_inc}") | |||
| set(PYTHON_LIBRARIES "${py_lib}") | |||
| endif() | |||
| endif () | |||
| include_directories(${PYTHON_INCLUDE_DIRS}) | |||
| include(${TOP_DIR}/cmake/external_libs/json.cmake) | |||
| include(${TOP_DIR}/cmake/external_libs/pybind11.cmake) | |||
| @@ -137,27 +148,27 @@ if (BUILD_CONVERTER) | |||
| include_directories(${TOP_DIR}/third_party/protobuf/build/include) | |||
| link_directories(${TOP_DIR}/third_party/protobuf/build/lib) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/converter) | |||
| endif() | |||
| endif () | |||
| if (BUILD_DEVICE) | |||
| if (PLATFORM_ARM32 OR PLATFORM_ARM64) | |||
| if (NOT DEFINED ENV{ANDROID_NDK}) | |||
| message(FATAL_ERROR "env ANDROID_NDK should be setted for ARM compile") | |||
| endif() | |||
| endif () | |||
| add_compile_definitions(ENABLE_ARM) | |||
| endif() | |||
| endif () | |||
| if (PLATFORM_ARM32) | |||
| add_definitions(-mfloat-abi=softfp -mfpu=neon) | |||
| add_compile_definitions(ENABLE_ARM32) | |||
| endif() | |||
| endif () | |||
| if (PLATFORM_ARM64) | |||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16") | |||
| add_compile_definitions(ENABLE_ARM64) | |||
| if (ENABLE_FP16) | |||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16") | |||
| endif () | |||
| endif() | |||
| endif() | |||
| endif () | |||
| endif () | |||
| if (BUILD_MINDDATA) | |||
| # opencv | |||
| @@ -167,7 +178,7 @@ if (BUILD_MINDDATA) | |||
| # json | |||
| if (NOT BUILD_CONVERTER) | |||
| include(${TOP_DIR}/cmake/external_libs/json.cmake) | |||
| endif() | |||
| endif () | |||
| # eigen | |||
| include_directories(${TOP_DIR}/third_party/eigen/) | |||
| # jpeg-turbo | |||
| @@ -183,7 +194,7 @@ if (BUILD_MINDDATA) | |||
| add_compile_definitions(ENABLE_ANDROID) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata) | |||
| endif() | |||
| endif () | |||
| if (BUILD_DEVICE) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src) | |||
| @@ -191,7 +202,7 @@ if (BUILD_DEVICE) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test) | |||
| add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/time_profile) | |||
| endif() | |||
| endif() | |||
| endif () | |||
| endif () | |||
| include(${TOP_DIR}/cmake/package_lite.cmake) | |||
| @@ -37,11 +37,6 @@ static constexpr int kNHWCDimNumber = 4; | |||
| static constexpr int TENSOR_MAX_REFCOUNT = 999; | |||
| static const char *DELIM_COLON = ":"; | |||
| static const char *DELIM_COMMA = ","; | |||
| static const char *DELIM_SLASH = "/"; | |||
| static const char *DELIM_DOUBLE_BACKSLASH = "\\"; | |||
| // quantization relative | |||
| static const char QUANTIZED_UINT8[] = "QUANTIZED_UINT8"; | |||
| static const char QUANTIZED_INT8[] = "QUANTIZED_INT8"; | |||
| @@ -103,7 +103,7 @@ int WriteToBin(const std::string &file_path, void *data, size_t size) { | |||
| int CompareOutputData(float *output_data, float *correct_data, int data_size) { | |||
| float error = 0; | |||
| for (size_t i = 0; i < data_size; i++) { | |||
| for (int i = 0; i < data_size; i++) { | |||
| float abs = fabs(output_data[i] - correct_data[i]); | |||
| if (abs > 0.00001) { | |||
| error += abs; | |||
| @@ -237,7 +237,7 @@ std::string Tensor::ToString() const { | |||
| if (data == nullptr) { | |||
| return "Data of tensor is nullptr"; | |||
| } else { | |||
| for (size_t i = 0; i < 40 && i < this->ElementsNum(); i++) { | |||
| for (int i = 0; i < 40 && i < this->ElementsNum(); i++) { | |||
| oss << " " << data[i]; | |||
| } | |||
| } | |||
| @@ -247,7 +247,7 @@ std::string Tensor::ToString() const { | |||
| if (data == nullptr) { | |||
| return "Data of tensor is nullptr"; | |||
| } else { | |||
| for (size_t i = 0; i < 40 && i < this->ElementsNum(); i++) { | |||
| for (int i = 0; i < 40 && i < this->ElementsNum(); i++) { | |||
| oss << " " << data[i]; | |||
| } | |||
| } | |||
| @@ -187,8 +187,8 @@ class Tensor : public mindspore::tensor::MetaTensor { | |||
| protected: | |||
| void *data_ = nullptr; | |||
| void *device_data_ = nullptr; | |||
| schema::NodeType tensorType; | |||
| schema::Format format_; | |||
| schema::NodeType tensorType; | |||
| size_t refCount = 0; | |||
| std::vector<tensor::QuantArg> quant_params_; | |||
| mindspore::lite::Allocator *allocator_ = nullptr; | |||
| @@ -154,11 +154,11 @@ class LiteKernel { | |||
| KernelKey desc_; | |||
| std::string name_; | |||
| OpParameter *op_parameter_ = nullptr; | |||
| const mindspore::lite::PrimitiveC *primitive_ = nullptr; | |||
| const lite::Context *context_ = nullptr; | |||
| // tensor will free in ~lite_session() | |||
| std::vector<lite::tensor::Tensor *> in_tensors_; | |||
| std::vector<lite::tensor::Tensor *> out_tensors_; | |||
| const mindspore::lite::PrimitiveC *primitive_ = nullptr; | |||
| const lite::Context *context_ = nullptr; | |||
| std::vector<LiteKernel *> in_kernels_; | |||
| std::vector<LiteKernel *> out_kernels_; | |||
| bool train_mode_ = false; | |||
| @@ -66,7 +66,7 @@ int LiteSession::ConvertTensors(const lite::Model *model) { | |||
| } | |||
| auto quant_params = srcTensor->quantParams(); | |||
| if (quant_params != nullptr) { | |||
| for (int j = 0; j < quant_params->size(); j++) { | |||
| for (size_t j = 0; j < quant_params->size(); j++) { | |||
| tensor::QuantArg quant_arg{}; | |||
| quant_arg.scale = quant_params->Get(j)->scale(); | |||
| quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint(); | |||
| @@ -93,9 +93,7 @@ void LiteSession::InitGraphInputTensors(const lite::Model *model) { | |||
| } | |||
| void LiteSession::InitGraphInputMSTensors(const lite::Model *model) { | |||
| auto meta_graph = model->GetMetaGraph(); | |||
| MS_ASSERT(this->input_vec_.empty()); | |||
| MS_ASSERT(meta_graph != nullptr); | |||
| for (auto &input_tensor : this->inputs_) { | |||
| MS_ASSERT(input_tensor != nullptr); | |||
| this->input_vec_.emplace_back(new lite::tensor::LiteTensor(input_tensor)); | |||
| @@ -52,10 +52,9 @@ int Fill::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor:: | |||
| } | |||
| std::vector<int> output_shape; | |||
| for (int i = 0; i < GetDims().size(); i++) { | |||
| for (size_t i = 0; i < GetDims().size(); i++) { | |||
| output_shape.push_back(GetDims()[i]); | |||
| } | |||
| // (void)output_shape.insert(output_shape.begin(), GetDims().begin(), GetDims().end()); | |||
| output->set_shape(output_shape); | |||
| return RET_OK; | |||
| } | |||
| @@ -64,7 +64,7 @@ int FullConnection::InferShape(std::vector<lite::tensor::Tensor *> inputs_, | |||
| } | |||
| int new_k = 1; | |||
| if (GetUseAxis()) { | |||
| for (int i = GetAxis(); i < input0->shape().size(); ++i) { | |||
| for (size_t i = GetAxis(); i < input0->shape().size(); ++i) { | |||
| new_k *= input0->shape()[i]; | |||
| } | |||
| if (new_k != input1->shape()[1]) { | |||
| @@ -86,7 +86,7 @@ int FullConnection::InferShape(std::vector<lite::tensor::Tensor *> inputs_, | |||
| out_shape[GetAxis()] = input1->shape()[0]; | |||
| } else { | |||
| int total = 1; | |||
| for (int i = 0; i < input0->shape().size(); ++i) { | |||
| for (size_t i = 0; i < input0->shape().size(); ++i) { | |||
| total *= input0->shape()[i]; | |||
| } | |||
| out_shape.resize(2); | |||
| @@ -43,7 +43,6 @@ void Pad::SetPaddingMode(int padding_mode) {} | |||
| void Pad::SetConstantValue(float constant_value) {} | |||
| #endif | |||
| namespace { | |||
| const size_t kPaddingsSize = 8; | |||
| const size_t kInputRank = 4; | |||
| } // namespace | |||
| int Pad::InferShape(std::vector<tensor::Tensor *> inputs, std::vector<tensor::Tensor *> outputs) { | |||
| @@ -145,10 +145,9 @@ int Reshape::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tenso | |||
| } | |||
| } | |||
| } else if (inputs_.size() == kSingleNum) { | |||
| for (int i = 0; i < GetShape().size(); ++i) { | |||
| for (size_t i = 0; i < GetShape().size(); ++i) { | |||
| out_shape.push_back(GetShape()[i]); | |||
| } | |||
| // std::copy(GetShape().begin(), GetShape().end(), std::back_inserter(out_shape)); | |||
| } else { | |||
| MS_LOG(ERROR) << "inputs tensor size invalid."; | |||
| return RET_INFER_ERR; | |||
| @@ -75,7 +75,7 @@ int Split::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor: | |||
| int split_dim = GetSplitDim(); | |||
| std::vector<int> input_shape = input->shape(); | |||
| std::vector<int> size_split; | |||
| for (int i = 0; i < GetSizeSplits().size(); ++i) { | |||
| for (size_t i = 0; i < GetSizeSplits().size(); ++i) { | |||
| size_split.push_back(GetSizeSplits()[i]); | |||
| } | |||
| for (int i = 0; i < number_split; ++i) { | |||
| @@ -60,10 +60,9 @@ int Tile::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<tensor:: | |||
| MS_ASSERT(tile_prim != nullptr); | |||
| std::vector<int> out_shape; | |||
| std::vector<int> multiples; | |||
| for (int i = 0; i < GetMultiples().size(); ++i) { | |||
| for (size_t i = 0; i < GetMultiples().size(); ++i) { | |||
| multiples.push_back(GetMultiples()[i]); | |||
| } | |||
| // std::copy(GetMultiples().begin(), GetMultiples().end(), std::back_inserter(multiples)); | |||
| for (size_t i = 0; i < input->shape().size(); ++i) { | |||
| int tmp = input->shape()[i] * multiples[i]; | |||
| out_shape.push_back(tmp); | |||
| @@ -59,10 +59,9 @@ int Transpose::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<ten | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<int> perm; | |||
| for (int i = 0; i < GetPerm().size(); i++) { | |||
| for (size_t i = 0; i < GetPerm().size(); i++) { | |||
| perm.push_back(GetPerm()[i]); | |||
| } | |||
| // perm.insert(perm.begin(), GetPerm().begin(), GetPerm().end()); | |||
| std::vector<int> in_shape = input->shape(); | |||
| std::vector<int> out_shape; | |||
| out_shape.resize(perm.size()); | |||
| @@ -246,7 +246,7 @@ OpParameter *PopulatePreluParameter(const mindspore::lite::PrimitiveC *primitive | |||
| } | |||
| prelu_param->op_parameter_.type_ = primitive->Type(); | |||
| auto temp = param->GetSlope(); | |||
| for (int i = 0; i < temp.size(); i++) { | |||
| for (size_t i = 0; i < temp.size(); i++) { | |||
| prelu_param->slope_[i] = temp[i]; | |||
| } | |||
| return reinterpret_cast<OpParameter *>(prelu_param); | |||
| @@ -404,7 +404,6 @@ OpParameter *PopulateConvDwParameter(const mindspore::lite::PrimitiveC *primitiv | |||
| conv_param->stride_h_ = conv_primitive->GetStrideH(); | |||
| conv_param->stride_w_ = conv_primitive->GetStrideW(); | |||
| auto pad_mode = conv_primitive->GetPadMode(); | |||
| auto convdw_lite_primitive = (lite::DepthwiseConv2D *)primitive; | |||
| MS_ASSERT(nullptr != convdw_lite_primitive); | |||
| conv_param->pad_u_ = convdw_lite_primitive->PadUp(); | |||
| @@ -828,7 +827,7 @@ OpParameter *PopulateTileParameter(const mindspore::lite::PrimitiveC *primitive) | |||
| auto param = dynamic_cast<const mindspore::lite::Tile *>(primitive); | |||
| auto multiples = param->GetMultiples(); | |||
| tile_param->in_dim_ = multiples.size(); | |||
| for (size_t i = 0; i < tile_param->in_dim_; ++i) { | |||
| for (int i = 0; i < tile_param->in_dim_; ++i) { | |||
| tile_param->multiples_[i] = multiples[i]; | |||
| } | |||
| return reinterpret_cast<OpParameter *>(tile_param); | |||
| @@ -1231,7 +1230,7 @@ OpParameter *PopulateCropParameter(const mindspore::lite::PrimitiveC *primitive) | |||
| crop_param->op_parameter_.type_ = primitive->Type(); | |||
| crop_param->axis_ = param->GetAxis(); | |||
| crop_param->offset_size_ = param_offset.size(); | |||
| for (int i = 0; i < param_offset.size(); ++i) { | |||
| for (size_t i = 0; i < param_offset.size(); ++i) { | |||
| crop_param->offset_[i] = param_offset[i]; | |||
| } | |||
| return reinterpret_cast<OpParameter *>(crop_param); | |||
| @@ -43,8 +43,8 @@ class CaffePreluBaseCPUKernel : public LiteKernel { | |||
| int Run() override { return 0; } | |||
| protected: | |||
| int thread_count_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| CaffePreluParameter *prelu_param_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -43,9 +43,9 @@ class ConcatBaseCPUKernel : public LiteKernel { | |||
| int Run() override { return 0; } | |||
| protected: | |||
| int thread_count_; | |||
| int axis_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| ConcatParameter *concat_param_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -121,7 +121,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() { | |||
| uint8_t per_channel = 0b0; | |||
| if (conv_quant_arg_->input_arg_num_ != kPerTensor) { | |||
| int in_channel = conv_param_->input_channel_; | |||
| if (conv_quant_arg_->input_arg_num_ != in_channel) { | |||
| if (static_cast<int>(conv_quant_arg_->input_arg_num_) != in_channel) { | |||
| MS_LOG(ERROR) << "input per channel quant param length is not equal to input channel."; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -130,7 +130,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() { | |||
| if (conv_quant_arg_->filter_arg_num_ != kPerTensor) { | |||
| int filter_num = conv_param_->output_channel_; | |||
| if (conv_quant_arg_->filter_arg_num_ != filter_num) { | |||
| if (static_cast<int>(conv_quant_arg_->filter_arg_num_) != filter_num) { | |||
| MS_LOG(ERROR) << "weight per channel quant param length is not equal to filter num."; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -139,7 +139,7 @@ int ConvolutionBaseCPUKernel::SetIfPerChannel() { | |||
| if (conv_quant_arg_->output_arg_num_ != kPerTensor) { | |||
| int out_channel = conv_param_->output_channel_; | |||
| if (conv_quant_arg_->output_arg_num_ != out_channel) { | |||
| if (static_cast<int>(conv_quant_arg_->output_arg_num_) != out_channel) { | |||
| MS_LOG(ERROR) << "output per channel quant param length is not equal to output channel."; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -218,11 +218,6 @@ int ConvolutionBaseCPUKernel::SetInputTensorQuantParam() { | |||
| // per channel | |||
| MS_LOG(ERROR) << "Not Support Per Channel for input now."; | |||
| return RET_ERROR; | |||
| // auto input_quant_arg = input_tensor->GetQuantParams(); | |||
| // for (int i = 0; i < in_arg_num; ++i) { | |||
| // conv_quant_arg_->input_quant_args_[i].zp_ = input_quant_arg[i].zeroPoint; | |||
| // conv_quant_arg_->input_quant_args_[i].scale_ = input_quant_arg[i].scale; | |||
| // } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -236,7 +231,7 @@ int ConvolutionBaseCPUKernel::SetFilterTensorQuantParam() { | |||
| conv_quant_arg_->filter_quant_args_[0].scale_ = weight_quant_arg.scale; | |||
| } else { | |||
| auto weight_quant_arg = weight_tensor->GetQuantParams(); | |||
| for (int i = 0; i < weight_arg_num; ++i) { | |||
| for (size_t i = 0; i < weight_arg_num; ++i) { | |||
| conv_quant_arg_->filter_quant_args_[i].zp_ = weight_quant_arg[i].zeroPoint; | |||
| conv_quant_arg_->filter_quant_args_[i].scale_ = weight_quant_arg[i].scale; | |||
| } | |||
| @@ -62,11 +62,11 @@ class ConvolutionBaseCPUKernel : public LiteKernel { | |||
| void FreeQuantParam(); | |||
| protected: | |||
| int thread_count_; | |||
| int tile_num_; | |||
| void *bias_data_ = nullptr; | |||
| void *nhwc4_input_ = nullptr; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| ConvParameter *conv_param_; | |||
| ConvQuantArg *conv_quant_arg_; | |||
| LayoutConvertor convert_func_; | |||
| @@ -41,9 +41,9 @@ class FullconnectionBaseCPUKernel : public LiteKernel { | |||
| protected: | |||
| MatMulParameter *fc_param_; | |||
| int thread_count_; | |||
| int thread_stride_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -41,9 +41,9 @@ class MatmulBaseCPUKernel : public LiteKernel { | |||
| protected: | |||
| MatMulParameter *params_; | |||
| int thread_count_; | |||
| int thread_stride_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -43,8 +43,8 @@ class PoolingBaseCPUKernel : public LiteKernel { | |||
| void FreeQuantParam(); | |||
| protected: | |||
| int thread_count_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| PoolingParameter *pooling_param_; | |||
| QuantArg **pooling_quant_arg_ = nullptr; | |||
| }; | |||
| @@ -41,8 +41,8 @@ class PriorBoxCPUKernel : public LiteKernel { | |||
| int PriorBoxImpl(int task_id); | |||
| protected: | |||
| int thread_count_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| private: | |||
| std::vector<float> output_; | |||
| @@ -76,7 +76,7 @@ int ReduceBaseCPUKernel::CheckParameters() { | |||
| } | |||
| if (num_axes_ == 0) { | |||
| for (int i = 0; i < input_rank; i++) { | |||
| for (size_t i = 0; i < input_rank; i++) { | |||
| axes_[i] = i; | |||
| } | |||
| num_axes_ = static_cast<int>(input_rank); | |||
| @@ -45,7 +45,7 @@ int SliceBaseCPUKernel::ReSize() { | |||
| param_->begin_[DIMENSION_4D - j] = param_->begin_[i]; | |||
| param_->size_[DIMENSION_4D - j] = param_->size_[i]; | |||
| } | |||
| for (size_t i = 0; i < DIMENSION_4D - param_->param_length_; i++) { | |||
| for (int i = 0; i < DIMENSION_4D - param_->param_length_; i++) { | |||
| param_->begin_[i] = 0; | |||
| param_->size_[i] = 1; | |||
| } | |||
| @@ -37,8 +37,8 @@ class SoftmaxBaseCPUKernel : public LiteKernel { | |||
| int Run() override { return 0; } | |||
| protected: | |||
| int thread_count_; | |||
| const lite::Context *ctx_; | |||
| int thread_count_; | |||
| SoftmaxParameter *softmax_param_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -39,8 +39,8 @@ class SplitBaseCPUKernel : public LiteKernel { | |||
| int Run() override { return 0; } | |||
| protected: | |||
| int thread_count_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| int thread_n_stride_; | |||
| int thread_n_num_; | |||
| int num_unit_; | |||
| @@ -41,9 +41,9 @@ class SqueezeBaseCPUKernel : public LiteKernel { | |||
| int Run() override { return 0; } | |||
| protected: | |||
| int thread_count_; | |||
| int *axis_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -44,7 +44,7 @@ int AddNCPUKernel::ReSize() { return RET_OK; } | |||
| int AddNCPUKernel::AddNParallelRun(int thread_id) { | |||
| int count_per_thread = UP_DIV(elements_num_, op_parameter_->thread_num_); | |||
| int count = MSMIN(count_per_thread, elements_num_ - thread_id * count_per_thread); | |||
| int count = MSMIN(count_per_thread, static_cast<int>(elements_num_ - thread_id * count_per_thread)); | |||
| auto stride = count_per_thread * thread_id; | |||
| auto ret = ElementAdd(in1_addr_ + stride, in2_addr_ + stride, out_addr_ + stride, count); | |||
| if (ret != NNACL_OK) { | |||
| @@ -64,9 +64,9 @@ int AddNCPUKernel::Run() { | |||
| auto input0_data = reinterpret_cast<float *>(in_tensors_[0]->Data()); | |||
| auto input1_data = reinterpret_cast<float *>(in_tensors_[1]->Data()); | |||
| auto output_data = reinterpret_cast<float *>(out_tensors_[0]->Data()); | |||
| if (elements_num_ < op_parameter_->thread_num_) { | |||
| if (static_cast<int>(elements_num_) < op_parameter_->thread_num_) { | |||
| ElementAdd(input0_data, input1_data, output_data, elements_num_); | |||
| for (int i = 2; i < in_tensors_.size(); ++i) { | |||
| for (size_t i = 2; i < in_tensors_.size(); ++i) { | |||
| ElementAdd(reinterpret_cast<float *>(in_tensors_[i]->Data()), output_data, output_data, elements_num_); | |||
| } | |||
| return RET_OK; | |||
| @@ -36,7 +36,7 @@ int ArithmeticSelfCPUKernel::Init() { | |||
| int ArithmeticSelfCPUKernel::ReSize() { | |||
| data_size_ = in_tensors_[0]->ElementsNum(); | |||
| thread_sz_count_ = MSMIN(thread_count_, data_size_); | |||
| thread_sz_count_ = MSMIN(thread_count_, static_cast<int>(data_size_)); | |||
| thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_); | |||
| return RET_OK; | |||
| } | |||
| @@ -52,7 +52,7 @@ int ArithmeticSelfRuns(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| } | |||
| int ArithmeticSelfCPUKernel::DoArithmeticSelf(int task_id) { | |||
| int size = MSMIN(thread_sz_stride_, data_size_ - task_id * thread_sz_stride_); | |||
| int size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - task_id * thread_sz_stride_)); | |||
| if (size <= 0) { | |||
| return RET_OK; | |||
| } | |||
| @@ -97,13 +97,13 @@ class ArithmeticSelfCPUKernel : public LiteKernel { | |||
| int DoArithmeticSelf(int task_id); | |||
| private: | |||
| int thread_count_; | |||
| int thread_sz_count_; | |||
| int thread_sz_stride_; | |||
| size_t data_size_; | |||
| ArithmeticSelfParameter *arithmeticSelfParameter_; | |||
| ArithmeticSelfRun arithmeticSelf_run_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| float *in_ptr_; | |||
| float *out_ptr_; | |||
| }; | |||
| @@ -79,7 +79,7 @@ int BatchnormCPUKernel::ReSize() { | |||
| auto n_dim = input_shapes.size(); | |||
| batchnorm_param_->channel_ = input_shapes[n_dim - 1]; | |||
| batchnorm_param_->unit_ = 1; | |||
| for (int i = 0; i < n_dim - 1; i++) { | |||
| for (size_t i = 0; i < n_dim - 1; i++) { | |||
| batchnorm_param_->unit_ *= input_shapes[i]; | |||
| } | |||
| batchnorm_param_->op_parameter_.thread_num_ = | |||
| @@ -32,7 +32,7 @@ int BiasCPUKernel::ReSize() { | |||
| auto dims = in_tensors_[0]->shape(); | |||
| MS_ASSERT(dims.size() <= 5); | |||
| bias_param_->ndim_ = dims.size(); | |||
| for (int i = 0; i < bias_param_->ndim_; i++) { | |||
| for (size_t i = 0; i < bias_param_->ndim_; i++) { | |||
| bias_param_->in_shape0_[i] = dims[i]; | |||
| bias_param_->in_shape1_[i] = 1; | |||
| bias_param_->out_shape_[i] = dims[i]; | |||
| @@ -43,8 +43,8 @@ class CaffePReluCPUKernel : public LiteKernel { | |||
| int DoExcute(int task_id); | |||
| protected: | |||
| int thread_count_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| CaffePReluParameter *prelu_param_; | |||
| private: | |||
| @@ -52,7 +52,7 @@ int CastCPUKernel::ReSize() { | |||
| if (data_num_ == 0) { | |||
| return RET_OK; | |||
| } | |||
| op_parameter_->thread_num_ = MSMIN(op_parameter_->thread_num_, data_num_); | |||
| op_parameter_->thread_num_ = MSMIN(op_parameter_->thread_num_, static_cast<int>(data_num_)); | |||
| stride_ = UP_DIV(data_num_, op_parameter_->thread_num_); | |||
| return RET_OK; | |||
| } | |||
| @@ -41,7 +41,10 @@ int EluCPUKernel::ReSize() { | |||
| return RET_OK; | |||
| } | |||
| int EluCPUKernel::DoExcute(int task_id) { Elu(input_addr, output_addr, elu_parameter_, task_id); } | |||
| int EluCPUKernel::DoExcute(int task_id) { | |||
| Elu(input_addr, output_addr, elu_parameter_, task_id); | |||
| return RET_OK; | |||
| } | |||
| int EluRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| auto EluData = reinterpret_cast<EluCPUKernel *>(cdata); | |||
| @@ -36,8 +36,8 @@ class EluCPUKernel : public LiteKernel { | |||
| int DoExcute(int task_id); | |||
| protected: | |||
| int thread_count_; | |||
| const lite::Context *ctx_; | |||
| int thread_count_; | |||
| EluParameter *elu_parameter_; | |||
| private: | |||
| @@ -40,12 +40,12 @@ int EmbeddingLookupCPUKernel::ReSize() { | |||
| embedding_lookup_parameter_->layer_size_ = 1; | |||
| auto in_shape = in_tensors_.front()->shape(); | |||
| for (int i = 1; i < in_shape.size(); ++i) { | |||
| for (size_t i = 1; i < in_shape.size(); ++i) { | |||
| embedding_lookup_parameter_->layer_size_ *= in_shape[i]; | |||
| } | |||
| embedding_lookup_parameter_->layer_num_ = 0; | |||
| for (int i = 0; i < in_tensors_.size() - 1; ++i) { | |||
| for (size_t i = 0; i < in_tensors_.size() - 1; ++i) { | |||
| embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0]; | |||
| } | |||
| @@ -94,7 +94,7 @@ int EmbeddingLookupCPUKernel::Run() { | |||
| } | |||
| int dest_loc = 0; | |||
| for (int i = 0; i < in_tensors_.size() - 1; i++) { | |||
| for (size_t i = 0; i < in_tensors_.size() - 1; i++) { | |||
| auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->Data()); | |||
| memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum()); | |||
| dest_loc += in_tensors_.at(i)->ElementsNum(); | |||
| @@ -43,8 +43,8 @@ class EmbeddingLookupCPUKernel : public LiteKernel { | |||
| int DoExcute(int task_id); | |||
| protected: | |||
| int thread_count_; | |||
| const lite::Context *ctx_; | |||
| int thread_count_; | |||
| EmbeddingLookupParameter *embedding_lookup_parameter_; | |||
| private: | |||
| @@ -37,13 +37,13 @@ int ExpandDimsCPUKernel::Init() { | |||
| int ExpandDimsCPUKernel::ReSize() { | |||
| data_size_ = in_tensors_.at(0)->ElementsNum(); | |||
| thread_sz_count_ = MSMIN(thread_count_, data_size_); | |||
| thread_sz_count_ = MSMIN(thread_count_, static_cast<int>(data_size_)); | |||
| thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_); | |||
| return RET_OK; | |||
| } | |||
| int ExpandDimsCPUKernel::DoExpandDims(int task_id) { | |||
| size_t size = MSMIN(thread_sz_stride_, data_size_ - task_id * thread_sz_stride_); | |||
| size_t size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - task_id * thread_sz_stride_)); | |||
| if (size == 0) { | |||
| return RET_OK; | |||
| } | |||
| @@ -41,13 +41,13 @@ class ExpandDimsCPUKernel : public LiteKernel { | |||
| int DoExpandDims(int task_id); | |||
| private: | |||
| int thread_count_; | |||
| int thread_sz_count_; | |||
| int thread_sz_stride_; | |||
| size_t data_size_; | |||
| float *in_ptr_; | |||
| float *out_ptr_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -39,13 +39,13 @@ class FillCPUKernel : public LiteKernel { | |||
| int DoFill(int task_id); | |||
| private: | |||
| int thread_count_; | |||
| int thread_sz_count_; | |||
| int thread_sz_stride_; | |||
| int data_size_; | |||
| float src_data_; | |||
| float *out_ptr_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -37,7 +37,7 @@ int FlattenCPUKernel::Init() { | |||
| int FlattenCPUKernel::ReSize() { | |||
| auto output_shape = out_tensors_[0]->shape(); | |||
| flatten_param_->size = sizeof(float); | |||
| for (int i = 0; i < output_shape.size(); i++) { | |||
| for (size_t i = 0; i < output_shape.size(); i++) { | |||
| flatten_param_->size *= output_shape[i]; | |||
| } | |||
| return RET_OK; | |||
| @@ -98,7 +98,7 @@ int FusedBatchnormCPUKernel::ReSize() { | |||
| auto n_dim = input_shapes.size(); | |||
| batchnorm_param_->channel_ = input_shapes[n_dim - 1]; | |||
| batchnorm_param_->unit_ = 1; | |||
| for (int i = 0; i < n_dim - 1; i++) { | |||
| for (size_t i = 0; i < n_dim - 1; i++) { | |||
| batchnorm_param_->unit_ *= input_shapes[i]; | |||
| } | |||
| batchnorm_param_->op_parameter_.thread_num_ = | |||
| @@ -57,7 +57,7 @@ int GatherCPUKernel::DoGather(int task_id) { | |||
| int indices_element_size = indices_tensor->ElementsNum(); | |||
| const int limit = in_shape[axis_]; | |||
| for (size_t i = 0; i < indices_element_size; ++i) { | |||
| for (int i = 0; i < indices_element_size; ++i) { | |||
| if (indices_ptr[i] >= limit) { | |||
| MS_LOG(ERROR) << " indice data: " << indices_ptr[i] << " is not in [ 0, " << limit - 1 << " ]"; | |||
| return RET_ERROR; | |||
| @@ -41,7 +41,6 @@ class GatherNdCPUKernel : public LiteKernel { | |||
| int DoGatherNd(int task_id); | |||
| private: | |||
| int thread_count_; | |||
| int thread_sz_count_; | |||
| int thread_sz_stride_; | |||
| int count_; | |||
| @@ -50,6 +49,7 @@ class GatherNdCPUKernel : public LiteKernel { | |||
| float *in_ptr_; | |||
| float *out_ptr_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -59,7 +59,7 @@ int MatmulCPUKernel::ReSize() { | |||
| } | |||
| } | |||
| for (int i = 0; i < a_shape.size() - 2; ++i) { | |||
| for (size_t i = 0; i < a_shape.size() - 2; ++i) { | |||
| batch *= a_shape[i]; | |||
| } | |||
| params_->batch = batch; | |||
| @@ -62,7 +62,7 @@ int PadCPUKernel::ReSize() { | |||
| return RET_ERROR; | |||
| } | |||
| for (int i = 0; i < rank; i++) { | |||
| for (size_t i = 0; i < rank; i++) { | |||
| in_[DEFAULT_PAD_NDIMS - rank + i] = input->shape()[i]; | |||
| out_[DEFAULT_PAD_NDIMS - rank + i] = output->shape()[i]; | |||
| } | |||
| @@ -43,8 +43,8 @@ class PReluCPUKernel : public LiteKernel { | |||
| int DoExcute(int task_id); | |||
| protected: | |||
| int thread_count_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| PReluParameter *prelu_param_; | |||
| private: | |||
| @@ -113,7 +113,7 @@ int ReduceCPUKernel::Run() { | |||
| } | |||
| tmp_shape_ = in_tensors_.at(0)->shape(); | |||
| src_data_ = static_cast<float *>(in_tensors_.at(0)->Data()); | |||
| for (int i = 0; i < data_buffers_.size(); ++i) { | |||
| for (size_t i = 0; i < data_buffers_.size(); ++i) { | |||
| dst_data_ = data_buffers_[i]; | |||
| int axis = axes_[i]; | |||
| outer_size_ = 1; | |||
| @@ -167,8 +167,8 @@ int ReduceCPUKernel::MallocTmpBuffer() { | |||
| for (auto i = 0; i < num_axes_ - 1; i++) { | |||
| int axis = axes_[i]; | |||
| size_t size = 1; | |||
| for (auto j = 0; j < input_shape.size(); j++) { | |||
| if (static_cast<size_t>(axis) != j) { | |||
| for (size_t j = 0; j < input_shape.size(); j++) { | |||
| if (axis != static_cast<int>(j)) { | |||
| size *= input_shape[j]; | |||
| } | |||
| } | |||
| @@ -36,7 +36,7 @@ class ReduceCPUKernel : public ReduceBaseCPUKernel { | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : ReduceBaseCPUKernel(param, inputs, outputs, ctx, primitive) {} | |||
| ~ReduceCPUKernel() { | |||
| for (auto i = 0; i < data_buffers_.size(); i++) { | |||
| for (size_t i = 0; i < data_buffers_.size(); i++) { | |||
| float *buffer = data_buffers_[i]; | |||
| if (buffer != nullptr) { | |||
| free(buffer); | |||
| @@ -30,8 +30,8 @@ using mindspore::schema::PrimitiveType_Reverse; | |||
| namespace mindspore::kernel { | |||
| int ReverseCPUKernel::Stride(int index) { | |||
| int i, stride = 1; | |||
| for (i = index + 1; i < in_tensors_[0]->shape().size(); ++i) { | |||
| int stride = 1; | |||
| for (size_t i = index + 1; i < in_tensors_[0]->shape().size(); ++i) { | |||
| stride *= in_tensors_[0]->shape()[i]; | |||
| } | |||
| return stride; | |||
| @@ -44,7 +44,7 @@ int ReverseCPUKernel::ReSize() { | |||
| auto *param = reinterpret_cast<ReverseParameter *>(op_parameter_); | |||
| auto input_shape = in_tensors_[0]->shape(); | |||
| if (param->num_axis_ > input_shape.size()) { | |||
| if (param->num_axis_ > static_cast<int>(input_shape.size())) { | |||
| MS_LOG(ERROR) << "Reverse dims : " << param->num_axis_ | |||
| << "is greater than input shape size :" << input_shape.size(); | |||
| return RET_ERROR; | |||
| @@ -46,7 +46,6 @@ class ReverseCPUKernel : public LiteKernel { | |||
| int DoReverse(int task_id); | |||
| private: | |||
| int thread_count_; | |||
| int thread_sz_count_; | |||
| int thread_sz_stride_; | |||
| int data_size_; | |||
| @@ -54,6 +53,7 @@ class ReverseCPUKernel : public LiteKernel { | |||
| int inCount_[REVERSE_STRIDE_MAX_SIZE]; | |||
| int outCount_[REVERSE_STRIDE_MAX_SIZE]; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| int *tmp_ = nullptr; | |||
| float *in_ptr_; | |||
| float *out_ptr_; | |||
| @@ -45,7 +45,7 @@ int ReverseSequenceCPUKernel::CalcCountPreAxis(const std::vector<int> shape, int | |||
| } | |||
| int ReverseSequenceCPUKernel::CalcCountAfterAxis(const std::vector<int> shape, int axis) { | |||
| int count = 1; | |||
| for (int i = axis + 1; i < shape.size(); ++i) { | |||
| for (size_t i = axis + 1; i < shape.size(); ++i) { | |||
| count *= shape[i]; | |||
| } | |||
| return count; | |||
| @@ -53,10 +53,8 @@ int ReverseSequenceCPUKernel::CalcCountAfterAxis(const std::vector<int> shape, i | |||
| int ReverseSequenceCPUKernel::ReSize() { | |||
| auto input0 = in_tensors_.at(0); | |||
| auto input1 = in_tensors_.at(1); | |||
| auto output = out_tensors_.at(0); | |||
| MS_ASSERT(input0 != nullptr); | |||
| MS_ASSERT(input1 != nullptr); | |||
| MS_ASSERT(output != nullptr); | |||
| auto para = reinterpret_cast<ReverseSequenceParameter *>(op_parameter_); | |||
| @@ -91,14 +91,14 @@ int ScaleCPUKernel::InitParameter() { | |||
| for (int i = 0; i < scale_param_->axis_; i++) { | |||
| scale_param_->outer_size_ *= in_shape[i]; | |||
| } | |||
| for (int i = 0; i < scale_shape.size(); i++) { | |||
| for (size_t i = 0; i < scale_shape.size(); i++) { | |||
| if (in_shape[i + scale_param_->axis_] != scale_shape[i]) { | |||
| MS_LOG(ERROR) << "Scale tensor shape is incorrect."; | |||
| return RET_ERROR; | |||
| } | |||
| scale_param_->axis_size_ *= in_shape[i + scale_param_->axis_]; | |||
| } | |||
| for (int i = scale_param_->axis_ + scale_shape.size(); i < in_shape.size(); i++) { | |||
| for (size_t i = scale_param_->axis_ + scale_shape.size(); i < in_shape.size(); i++) { | |||
| scale_param_->inner_size_ *= in_shape[i]; | |||
| } | |||
| return RET_OK; | |||
| @@ -68,7 +68,7 @@ int ScatterNDCPUKernel::ReSize() { | |||
| // check consistency of the shape indices and shape | |||
| auto update_rank = static_cast<int>(update->shape().size()); | |||
| auto indices_shape = indices->shape(); | |||
| if (update_rank != indices->shape().size() - 1 + shape_rank - indice_unit_rank) { | |||
| if (update_rank != static_cast<int>(indices->shape().size() - 1 + shape_rank - indice_unit_rank)) { | |||
| MS_LOG(ERROR) << "Update, shape rank and indices rank inconsistent."; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -51,7 +51,7 @@ int ShapeCPUKernel::Run() { | |||
| return RET_ERROR; | |||
| } | |||
| for (int i = 0; i < in_tensor->shape().size(); i++) { | |||
| for (size_t i = 0; i < in_tensor->shape().size(); i++) { | |||
| reinterpret_cast<int *>(out_tensor->Data())[i] = in_tensor->shape()[i]; | |||
| } | |||
| @@ -42,7 +42,7 @@ int SliceLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| int SliceCPUKernel::ReSize() { | |||
| auto *param = reinterpret_cast<SliceParameter *>(op_parameter_); | |||
| auto input_shape = in_tensors_[0]->shape(); | |||
| if (input_shape.size() != param->param_length_) { | |||
| if (static_cast<int>(input_shape.size()) != param->param_length_) { | |||
| MS_LOG(ERROR) << "Input begin's lenth " << param->param_length_ << "is not equal to input shape size " | |||
| << input_shape.size(); | |||
| return RET_ERROR; | |||
| @@ -42,8 +42,8 @@ class SparseToDenseCPUKernel : public LiteKernel { | |||
| int DoExcute(int task_id); | |||
| protected: | |||
| int thread_count_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| SparseToDenseParameter *s2d_param_; | |||
| private: | |||
| @@ -38,7 +38,7 @@ int TopKCPUKernel::ReSize() { | |||
| TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_); | |||
| parameter->last_dim_size_ = input->shape()[input->shape().size() - 1]; | |||
| parameter->loop_num_ = 1; | |||
| for (int i = 0; i < input->shape().size() - 1; ++i) { | |||
| for (size_t i = 0; i < input->shape().size() - 1; ++i) { | |||
| parameter->loop_num_ *= input->shape()[i]; | |||
| } | |||
| return RET_OK; | |||
| @@ -42,10 +42,10 @@ int UnstackCPUKernel::ReSize() { | |||
| if (para->axis_ < 0) { | |||
| para->axis_ += shape_size; | |||
| } | |||
| for (size_t i = 0; i < shape_size; i++) { | |||
| if (i < para->axis_) { | |||
| for (size_t i = 0; i < static_cast<size_t>(shape_size); i++) { | |||
| if (static_cast<int>(i) < para->axis_) { | |||
| para->pre_dims_ *= input->DimensionSize(i); | |||
| } else if (i > para->axis_) { | |||
| } else if (static_cast<int>(i) > para->axis_) { | |||
| para->after_dims_ *= input->DimensionSize(i); | |||
| } else { | |||
| para->axis_dim_ = input->DimensionSize(i); | |||
| @@ -42,8 +42,8 @@ class WhereCPUKernel : public LiteKernel { | |||
| int DoExcute(int task_id); | |||
| protected: | |||
| int thread_count_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| WhereParameter *where_param_; | |||
| private: | |||
| @@ -60,7 +60,7 @@ int ArithmeticSelfInt8CPUKernel::Init() { | |||
| int ArithmeticSelfInt8CPUKernel::ReSize() { | |||
| data_size_ = in_tensors_[0]->ElementsNum(); | |||
| thread_sz_count_ = MSMIN(thread_count_, data_size_); | |||
| thread_sz_count_ = MSMIN(thread_count_, static_cast<int>(data_size_)); | |||
| thread_sz_stride_ = UP_DIV(data_size_, thread_sz_count_); | |||
| return RET_OK; | |||
| } | |||
| @@ -76,7 +76,7 @@ int ArithmeticSelfInt8Runs(int task_id, LiteParallelGroupEnv *penv, void *cdata) | |||
| } | |||
| int ArithmeticSelfInt8CPUKernel::DoArithmeticSelf(int task_id) { | |||
| int size = MSMIN(thread_sz_stride_, data_size_ - task_id * thread_sz_stride_); | |||
| int size = MSMIN(thread_sz_stride_, static_cast<int>(data_size_ - task_id * thread_sz_stride_)); | |||
| if (size <= 0) { | |||
| return RET_OK; | |||
| } | |||
| @@ -93,13 +93,13 @@ class ArithmeticSelfInt8CPUKernel : public LiteKernel { | |||
| int DoArithmeticSelf(int task_id); | |||
| private: | |||
| int thread_count_; | |||
| int thread_sz_count_; | |||
| int thread_sz_stride_; | |||
| size_t data_size_; | |||
| ArithmeticSelfParameter *para_; | |||
| ArithmeticSelfInt8Run arithmeticSelf_run_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| int8_t *in_ptr_; | |||
| int8_t *out_ptr_; | |||
| }; | |||
| @@ -143,7 +143,7 @@ int BatchnormInt8CPUKernel::Init() { | |||
| auto n_dim = input_shapes.size(); | |||
| batchnorm_param_->channel_ = input_shapes[n_dim - 1]; | |||
| batchnorm_param_->units_ = 1; | |||
| for (int i = 0; i < n_dim - 1; i++) { | |||
| for (size_t i = 0; i < n_dim - 1; i++) { | |||
| batchnorm_param_->units_ *= input_shapes[i]; | |||
| } | |||
| batchnorm_param_->op_parameter_.thread_num_ = | |||
| @@ -169,7 +169,7 @@ int BatchnormInt8CPUKernel::Init() { | |||
| int BatchnormInt8CPUKernel::ReSize() { | |||
| auto input_shapes = in_tensors_[0]->shape(); | |||
| batchnorm_param_->unit_ = 1; | |||
| for (int i = 0; i < input_shapes.size() - 1; i++) { | |||
| for (size_t i = 0; i < input_shapes.size() - 1; i++) { | |||
| batchnorm_param_->unit_ *= input_shapes[i]; | |||
| } | |||
| return RET_OK; | |||
| @@ -36,7 +36,7 @@ int BiasAddInt8CPUKernel::ReSize() { | |||
| auto bias_param = reinterpret_cast<ArithmeticParameter *>(op_parameter_); | |||
| auto dims = in_tensors_[0]->shape(); | |||
| bias_param->ndim_ = dims.size(); | |||
| for (int i = 0; i < bias_param->ndim_; i++) { | |||
| for (size_t i = 0; i < bias_param->ndim_; i++) { | |||
| bias_param->in_shape0_[i] = dims[i]; | |||
| bias_param->in_shape1_[i] = 1; | |||
| bias_param->out_shape_[i] = dims[i]; | |||
| @@ -65,9 +65,6 @@ int ConcatInt8CPUKernel::ReSize() { | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| if (concat_param_->input_shapes_ != nullptr) { | |||
| // free(concat_param_->input_shapes_); | |||
| } | |||
| auto input_num = in_tensors_.size(); | |||
| concat_param_->input_num_ = input_num; | |||
| concat_param_->input_shapes_ = reinterpret_cast<const int **>(malloc(sizeof(int *) * input_num)); | |||
| @@ -82,7 +79,7 @@ int ConcatInt8CPUKernel::ReSize() { | |||
| int64_t after_axis_size = 1; | |||
| auto output_tensor = out_tensors_.at(kOutputIndex); | |||
| int output_dim = output_tensor->shape().size(); | |||
| size_t output_dim = output_tensor->shape().size(); | |||
| concat_param_->output_shapes_ = output_tensor->shape().data(); | |||
| for (size_t i = axis_ + 1; i < output_dim; i++) { | |||
| after_axis_size *= concat_param_->output_shapes_[i]; | |||
| @@ -102,7 +99,7 @@ int ConcatInt8CPUKernel::Run() { | |||
| count_unit_ = thread_count_ > 1 ? UP_DIV(before_axis_size, thread_count_) : before_axis_size; | |||
| concat_param_->count_unit_ = count_unit_; | |||
| for (size_t i = 0; i < input_num; i++) { | |||
| for (int i = 0; i < input_num; i++) { | |||
| input_data_[i] = static_cast<int8_t *>(in_tensors_.at(i)->Data()); | |||
| } | |||
| output_data_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->Data()); | |||
| @@ -122,7 +122,7 @@ int DeConvInt8CPUKernel::InitParam() { | |||
| /* optimize normal -> same data layout */ | |||
| input_trans_func_ = RowMajor2Row16x4MajorInt8; | |||
| size_t oc4 = UP_DIV(conv_param_->output_channel_, C4NUM); | |||
| int oc4 = UP_DIV(conv_param_->output_channel_, C4NUM); | |||
| thread_count_ = MSMIN(op_parameter_->thread_num_, oc4); | |||
| thread_stride_ = UP_DIV(oc4, thread_count_); | |||
| return RET_OK; | |||
| @@ -38,7 +38,7 @@ int MatmulInt8CPUKernel::ReSize() { | |||
| int batch = 1; | |||
| auto x_shape = in_tensors_[0]->shape(); | |||
| auto o_shape = out_tensors_[0]->shape(); | |||
| for (int i = 0; i < x_shape.size() - 2; ++i) { | |||
| for (size_t i = 0; i < x_shape.size() - 2; ++i) { | |||
| batch *= x_shape[i]; | |||
| } | |||
| params_->batch = batch; | |||
| @@ -57,6 +57,7 @@ int PreluInt8CPUKernel::ReSize() { | |||
| quant_prelu_parm_->element_num = in_tensors_[0]->Size(); | |||
| quant_prelu_parm_->in_shape_ = input_tensor->shape().data(); | |||
| quant_prelu_parm_->out_shape_ = out_tensor->shape().data(); | |||
| return RET_OK; | |||
| } | |||
| int PreluInt8CPUKernel::Run() { | |||
| @@ -184,8 +184,8 @@ int ReduceInt8CPUKernel::MallocTmpBuffer() { | |||
| for (auto i = 0; i < num_axes_ - 1; i++) { | |||
| int axis = axes_[i]; | |||
| size_t size = 1; | |||
| for (auto j = 0; j < input_shape.size(); j++) { | |||
| if (static_cast<size_t>(axis) != j) { | |||
| for (size_t j = 0; j < input_shape.size(); j++) { | |||
| if (axis != static_cast<int>(j)) { | |||
| size *= input_shape[j]; | |||
| } | |||
| } | |||
| @@ -258,7 +258,7 @@ int ReduceInt8CPUKernel::Run() { | |||
| tmp_shape_ = in_tensors_.at(0)->shape(); | |||
| src_data_ = begin_src_data_; | |||
| for (int i = 0; i < data_buffers_.size(); ++i) { | |||
| for (size_t i = 0; i < data_buffers_.size(); ++i) { | |||
| if (mode_ == static_cast<int>(schema::ReduceMode_ReduceMean)) { | |||
| quant_arg_.mean_multiplier_ = mean_multipliers_[i]->multiplier_; | |||
| quant_arg_.mean_left_shift_ = mean_multipliers_[i]->left_shift_; | |||
| @@ -133,7 +133,7 @@ int SqueezeInt8CPUKernel::Run() { | |||
| auto input_type = in_tensors_[i]->data_type(); | |||
| if (input_type == kNumberTypeUInt8) { | |||
| uint8_t *input_tmp = reinterpret_cast<uint8_t *>(in_tensors_[i]->Data()); | |||
| for (size_t j = 0; j < input_size; j++) { | |||
| for (int j = 0; j < input_size; j++) { | |||
| inputs_array[i][j] = (int8_t)(input_tmp[j] - 128); | |||
| } | |||
| for (size_t j = 0; j < input_dim; j++) { | |||
| @@ -148,12 +148,12 @@ int SqueezeInt8CPUKernel::Run() { | |||
| auto output_type = out_tensors_[0]->data_type(); | |||
| if (output_type == kNumberTypeUInt8) { | |||
| auto output_size = quant_Squeeze_parm_->output_size_; | |||
| for (size_t i = 0; i < output_size; i++) { | |||
| for (int i = 0; i < output_size; i++) { | |||
| output_addr[i] = (uint8_t)(output_addr[i] + 128); | |||
| } | |||
| } | |||
| for (int i = 0; i < input_dim; i++) { | |||
| for (size_t i = 0; i < input_dim; i++) { | |||
| free(*(inputs_array + i)); | |||
| } | |||
| @@ -40,7 +40,7 @@ int TopKInt8CPUKernel::ReSize() { | |||
| lite::tensor::Tensor *input = in_tensors_.at(0); | |||
| parameter->last_dim_size_ = input->shape()[input->shape().size() - 1]; | |||
| parameter->loop_num_ = 1; | |||
| for (int i = 0; i < input->shape().size() - 1; ++i) { | |||
| for (size_t i = 0; i < input->shape().size() - 1; ++i) { | |||
| parameter->loop_num_ *= input->shape()[i]; | |||
| } | |||
| return RET_OK; | |||
| @@ -44,13 +44,13 @@ class Unsqueezeint8CPUKernel : public LiteKernel { | |||
| private: | |||
| UnSqueezeQuantArg *quant_Unsqueeze_parm_; | |||
| UnSqueezeParameter *Unsq_para_; | |||
| int thread_count_; | |||
| int thread_sz_count_; | |||
| int thread_sz_stride_; | |||
| int data_size_; | |||
| float *in_ptr_; | |||
| float *out_ptr_; | |||
| const Context *ctx_; | |||
| int thread_count_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -61,7 +61,7 @@ void ComputeStrides(int *shape, int *strides, int ndim) { | |||
| } | |||
| void CalcMultiplesAndStrides(ArithmeticParameter *param) { | |||
| for (auto i = 0; i < param->ndim_; i++) { | |||
| for (size_t i = 0; i < param->ndim_; i++) { | |||
| param->multiples0_[i] = param->out_shape_[i] / param->in_shape0_[i]; | |||
| param->multiples1_[i] = param->out_shape_[i] / param->in_shape1_[i]; | |||
| } | |||
| @@ -50,7 +50,6 @@ void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, i | |||
| const int *crops, int data_size) { | |||
| int block_h = block[0]; | |||
| int block_w = block[1]; | |||
| int in_n = in_shape[0]; | |||
| int in_h = in_shape[1]; | |||
| int in_w = in_shape[2]; | |||
| int in_c = in_shape[3]; | |||
| @@ -69,7 +69,7 @@ int DeConvPostFp32C8x8(const float *src, float *tmp, const float *bias, float *d | |||
| int src_index = ih * src_ih_stride + iw * src_iw_stride + kh * src_kh_stride + kw * src_kw_stride; | |||
| int dst_index = oh * dst_oh_stride + ow * dst_ow_stride + kh * dst_kh_stride + kw * dst_kw_stride; | |||
| float *tmp_dst = dst_ptr + dst_index; | |||
| float *tmp_src = src_ptr + src_index; | |||
| const float *tmp_src = src_ptr + src_index; | |||
| #ifdef ENABLE_ARM64 | |||
| asm volatile( | |||
| "mov x0, %[tmp_src] \n" | |||
| @@ -35,8 +35,6 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingPar | |||
| int scale = param->scale_; | |||
| int pooled_height = param->pooledH_; | |||
| int pooled_width = param->pooledW_; | |||
| int *in_strides = &(param->in_strides_); | |||
| int *out_strides = &(param->out_strides_); | |||
| int roi_stride = 5; | |||
| int roi_ind_st = roi_st * roi_stride; | |||
| float *max_c = malloc(channels_ * sizeof(float)); | |||
| @@ -55,9 +53,8 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingPar | |||
| float bin_size_h = (float)roi_height / (float)pooled_height; | |||
| float bin_size_w = (float)roi_width / (float)pooled_width; | |||
| float *batch_data = in_ptr + in_strides[kNHWC_N] * roi_batch_ind; | |||
| float *batch_data = in_ptr + param->in_strides_[kNHWC_N] * roi_batch_ind; | |||
| int out_ind = i * out_strides[0]; | |||
| for (int ph = 0; ph < pooled_height; ++ph) { | |||
| for (int pw = 0; pw < pooled_width; ++pw) { | |||
| int hstart = (int)floorf(ph * bin_size_h); // block xi_1 | |||
| @@ -76,17 +73,17 @@ int ROIPooling(float *in_ptr, float *out_ptr, float *roi, int tid, ROIPoolingPar | |||
| max_c[j] = 0; | |||
| } | |||
| } | |||
| int pooled_index = i * out_strides[0] + ph * out_strides[1] + pw * out_strides[2]; | |||
| int bd_index = hstart * in_strides[1]; | |||
| int pooled_index = i * param->out_strides_[0] + ph * param->out_strides_[1] + pw * param->out_strides_[2]; | |||
| int bd_index = hstart * param->in_strides_[1]; | |||
| for (int h = hstart; h < hend; ++h) { | |||
| int wi = bd_index + wstart * in_strides[2]; | |||
| int wi = bd_index + wstart * param->in_strides_[2]; | |||
| for (int w = wstart; w < wend; ++w) { | |||
| for (int c = 0; c < channels_; ++c) { | |||
| max_c[c] = MSMAX(batch_data[wi + c], max_c[c]); | |||
| } | |||
| wi += in_strides[2]; | |||
| wi += param->in_strides_[2]; | |||
| } // in_w end; | |||
| bd_index += in_strides[1]; | |||
| bd_index += param->in_strides_[1]; | |||
| } // in_h end | |||
| for (int j = 0; j < channels_; ++j) { | |||
| out_ptr[pooled_index + j] = max_c[j]; | |||
| @@ -141,7 +141,7 @@ int SpaceToBatch(const float *input, float *output, SpaceToBatchParameter param, | |||
| if (input == NULL || output == NULL) { | |||
| return NNACL_NULL_PTR; | |||
| } | |||
| auto ret = | |||
| int ret = | |||
| SpaceToBatchForNHWC(input, output, param.padded_in_shape_, param.n_dims_, param.block_sizes_, h_start, h_end); | |||
| return ret; | |||
| } | |||
| @@ -58,7 +58,6 @@ void BatchToSpaceForNHWCInt8(const int8_t *input, int8_t *output, const int *in_ | |||
| const int *crops, QuantArg *in_quant_arg, QuantArg *out_quant_arg) { | |||
| int block_h = block[0]; | |||
| int block_w = block[1]; | |||
| int in_n = in_shape[0]; | |||
| int in_h = in_shape[1]; | |||
| int in_w = in_shape[2]; | |||
| int in_c = in_shape[3]; | |||
| @@ -31,7 +31,7 @@ void Crop(const int8_t *input, int8_t *output, int task_id, CropParameter *para) | |||
| Crop3D(input, output, task_id, para); | |||
| break; | |||
| case 4: | |||
| Crop4D(input, output, task_id, para); | |||
| Int8Crop4D(input, output, task_id, para); | |||
| break; | |||
| } | |||
| } | |||
| @@ -266,7 +266,7 @@ int ReduceMinLastAxis(const int outer_size, const int inner_size, const int axis | |||
| } | |||
| int32_t tmp_scaled = | |||
| RoundingDivideByPOT(SaturatingRoundingDoublingHighMul( | |||
| (tmp - quant->in_zp_) * (1 << (unsigned int)quant->in_out_left_shift_ + base_offset), | |||
| (tmp - quant->in_zp_) * (1 << ((unsigned int)quant->in_out_left_shift_ + base_offset)), | |||
| quant->in_out_multiplier_), | |||
| quant->in_out_right_shift_ + base_offset); | |||
| if (isAddOverflow(tmp_scaled, quant->out_zp_)) { | |||
| @@ -33,14 +33,13 @@ int ResizeBilinearInt8(const int8_t *input_data, int8_t *output_data, const int | |||
| int32_t new_height = output_shape[1]; | |||
| int32_t new_width = output_shape[2]; | |||
| int32_t height_scale, width_scale; | |||
| int32_t height_scale = 0, width_scale = 0; | |||
| ComputeScale(in_h, new_height, align_corners, &height_scale); | |||
| ComputeScale(in_w, new_width, align_corners, &width_scale); | |||
| int n, h, w, c; | |||
| for (n = 0; n < in_n; n++) { | |||
| for (h = tid; h < new_height; h += thread_num) { | |||
| // float actual_y = (float)h * height_scale; | |||
| const int base_offset = 20; | |||
| int scaled_actual_y; | |||
| int bottom, top; | |||
| @@ -99,10 +98,10 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat | |||
| for (batch = 0; batch < output_shape[0]; batch++) { | |||
| for (y = tid; y < output_shape[1]; y += thread_num) { | |||
| int input_y; | |||
| int input_y = 0; | |||
| ComputeNearestNeighborInt(y, in_h, new_height, align_corners, &input_y); | |||
| for (x = 0; x < output_shape[2]; x++) { | |||
| int input_x; | |||
| int input_x = 0; | |||
| ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x); | |||
| int in_offset = offset(input_shape, batch, input_y, input_x, 0); | |||
| int out_offset = offset(output_shape, batch, y, x, 0); | |||
| @@ -159,10 +158,10 @@ int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, con | |||
| for (batch = 0; batch < output_shape[0]; batch++) { | |||
| for (y = tid; y < output_shape[1]; y += thread_num) { | |||
| int input_y; | |||
| int input_y = 0; | |||
| ComputeNearestNeighborInt(y, in_h, new_height, align_corners, &input_y); | |||
| for (x = 0; x < output_shape[2]; x++) { | |||
| int input_x; | |||
| int input_x = 0; | |||
| ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x); | |||
| for (c = 0; c < output_shape[3]; c++) { | |||
| int in_offset = offset(input_shape, batch, input_y, input_x, c); | |||
| @@ -961,7 +961,7 @@ void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int | |||
| #endif | |||
| } | |||
| for (; c < channel; c++) { | |||
| float *src_ptr = src_batch + hw * channel + c; | |||
| const float *src_ptr = src_batch + hw * channel + c; | |||
| float *dst_ptr = dst_batch + c * plane + hw; | |||
| for (size_t i = 0; i < C8NUM; i++) { | |||
| dst_ptr[i] = src_ptr[i * channel]; | |||
| @@ -969,7 +969,7 @@ void PackNHWCToNCHWFp32(const void *src, void *dst, int batches, int plane, int | |||
| } | |||
| } | |||
| for (; hw < plane; hw++) { | |||
| float *src_ptr = src_batch + hw * channel; | |||
| const float *src_ptr = src_batch + hw * channel; | |||
| float *dst_ptr = dst_batch + hw; | |||
| for (size_t i = 0; i < channel; i++) { | |||
| dst_ptr[i * plane] = src_ptr[i]; | |||
| @@ -1023,10 +1023,10 @@ void PackDepthwiseInt8Input(const int8_t *src, int16_t *dst, const ConvParameter | |||
| int unit = conv_param->input_h_ * conv_param->input_w_; | |||
| for (int b = 0; b < conv_param->input_batch_; b++) { | |||
| int8_t *src_b = src + b * unit * conv_param->input_channel_; | |||
| const int8_t *src_b = src + b * unit * conv_param->input_channel_; | |||
| int16_t *dst_b = dst + b * unit * ic4 * C4NUM; | |||
| for (int k = 0; k < unit; k++) { | |||
| int8_t *src_k = src_b + k * conv_param->input_channel_; | |||
| const int8_t *src_k = src_b + k * conv_param->input_channel_; | |||
| int16_t *dst_k = dst_b + k * ic4 * C4NUM; | |||
| for (int c = 0; c < conv_param->input_channel_; c++) { | |||
| dst_k[c] = (int16_t)(src_k[c] - input_zp); | |||
| @@ -1044,10 +1044,10 @@ void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight | |||
| } | |||
| int c4_block_num = c / C4NUM; | |||
| int c4_block_rem = c % C4NUM; | |||
| int8_t *src_c = origin_weight + c * unit; | |||
| const int8_t *src_c = origin_weight + c * unit; | |||
| int16_t *dst_c = packed_weight_ + c4_block_num * unit * C4NUM; | |||
| for (int k = 0; k < unit; k++) { | |||
| int8_t *src_kernel = src_c + k; | |||
| const int8_t *src_kernel = src_c + k; | |||
| int16_t *dst_kernel = dst_c + C4NUM * k + c4_block_rem; | |||
| *dst_kernel = (int16_t)(src_kernel[0] - weight_zp); | |||
| } | |||
| @@ -30,14 +30,14 @@ void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantiz | |||
| if (quantized_multiplier == NULL || right_shift == NULL) { | |||
| return; | |||
| } | |||
| int shift; | |||
| int shift = 0; | |||
| QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift); | |||
| *right_shift = -shift; | |||
| } | |||
| void QuantizeRoundParameter(double double_multiplier, int32_t *quantized_multiplier, int *left_shift, | |||
| int *right_shift) { | |||
| int shift; | |||
| int shift = 0; | |||
| QuantizeMultiplierSmallerThanOne(double_multiplier, quantized_multiplier, &shift); | |||
| shift = -shift; | |||
| if (shift < 0) { | |||
| @@ -913,7 +913,7 @@ void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weigh | |||
| int src_oc_offset = o * iC8 * C8NUM * kernel_plane; | |||
| int dst_oc_offset = oc4_block_num * C4NUM * iC8 * C8NUM * input_unit * input_unit + oc4_block_rem; | |||
| for (int i = 0; i < iC8; i++) { | |||
| int16_t *src_ic8_ptr = weight_data + src_oc_offset + i * kernel_plane * C8NUM; | |||
| const int16_t *src_ic8_ptr = weight_data + src_oc_offset + i * kernel_plane * C8NUM; | |||
| int16_t *dst_ic8_ptr = trans_weight + dst_oc_offset + i * C4NUM * C8NUM; | |||
| #ifdef ENABLE_ARM | |||
| int16x8_t g00 = vld1q_s16(src_ic8_ptr); | |||
| @@ -1107,7 +1107,7 @@ void Conv3x3Int8FilterTransform(const int16_t *weight_data, int16_t *trans_weigh | |||
| dst_ic8_ptr[28 + 15 * dst_step] = m33[7]; | |||
| #else | |||
| for (int j = 0; j < C8NUM; j++) { | |||
| int16_t *local_ptr = src_ic8_ptr + j; | |||
| const int16_t *local_ptr = src_ic8_ptr + j; | |||
| int16_t dst00 = local_ptr[0] * 2; | |||
| int16_t dst01 = (local_ptr + 8)[0] * 2; | |||
| int16_t dst02 = (local_ptr + 16)[0] * 2; | |||
| @@ -29,6 +29,7 @@ int ParallelExecutor::Prepare(std::vector<mindspore::kernel::LiteKernel *> &kern | |||
| for (mindspore::kernel::LiteKernel *kernel : kernels) { | |||
| refCount[kernel] = kernel->out_kernels().size(); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| void ParallelExecutor::PrepareReadyKernels(const std::vector<mindspore::kernel::LiteKernel *> &kernels) { | |||
| @@ -235,17 +235,17 @@ bool ThreadPool::SetThreadPool() { | |||
| } else if (localMaxThreadNums > kDefaultMaxThreadNums) { | |||
| localMaxThreadNums = kDefaultMaxThreadNums; | |||
| } | |||
| if (configThreadNums > kDefaultMaxThreadNums) { | |||
| if (configThreadNums > static_cast<int>(kDefaultMaxThreadNums)) { | |||
| configThreadNums = kDefaultMaxThreadNums; | |||
| } | |||
| int addNum = 0; | |||
| if (configThreadNums > kDefaultMaxThreadNums) { | |||
| if (configThreadNums > static_cast<int>(kDefaultMaxThreadNums)) { | |||
| addNum = configThreadNums - curThreadRunNums; | |||
| } else if (localMaxThreadNums > curThreadNums) { | |||
| } else if (static_cast<int>(localMaxThreadNums) > curThreadNums) { | |||
| addNum = localMaxThreadNums - curThreadNums; | |||
| } | |||
| AddNewThread(addNum); | |||
| if (curThreadRunNums > localMaxThreadNums) { | |||
| if (curThreadRunNums > static_cast<int>(localMaxThreadNums)) { | |||
| SubRunThread(localMaxThreadNums); | |||
| } else { | |||
| AddRunThread(localMaxThreadNums); | |||
| @@ -376,7 +376,7 @@ bool ThreadPool::DistributeTask(ThreadPoolTask *task, int numTask) { | |||
| void ThreadPool::AddRunThread(int num) { | |||
| int activeNums = num - curThreadRunNums; | |||
| if (activeNums <= 0 || activateList.size() < activeNums) { | |||
| if (activeNums <= 0 || static_cast<int>(activateList.size()) < activeNums) { | |||
| return; | |||
| } | |||
| for (int i = curThreadRunNums - 1, j = 0; j < activeNums; ++i, ++j) { | |||
| @@ -6,6 +6,10 @@ include_directories(${TOP_DIR}) | |||
| include_directories(${TEST_DIR}) | |||
| include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/dependency_gtest.cmake) | |||
| string(REPLACE " -Werror " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") | |||
| string(REPLACE " -Werror " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |||
| STRING(REPLACE " fvisibility=hidden " " -fvisibility=default " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") | |||
| STRING(REPLACE " fvisibility=hidden " " -fvisibility=default " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |||
| ### anf src | |||
| set(ANF_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../../core/ir/meta_tensor.cc | |||
| @@ -28,6 +28,10 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| static const char *DELIM_COLON = ":"; | |||
| static const char *DELIM_COMMA = ","; | |||
| static const char *DELIM_SLASH = "/"; | |||
| int Benchmark::GenerateRandomData(size_t size, void *data) { | |||
| MS_ASSERT(data != nullptr); | |||
| char *castedData = static_cast<char *>(data); | |||
| @@ -85,7 +89,7 @@ int Benchmark::ReadInputFile() { | |||
| MS_LOG(ERROR) << "Not supported image input"; | |||
| return RET_ERROR; | |||
| } else { | |||
| for (auto i = 0; i < _flags->input_data_list.size(); i++) { | |||
| for (size_t i = 0; i < _flags->input_data_list.size(); i++) { | |||
| auto cur_tensor = msInputs.at(i); | |||
| MS_ASSERT(cur_tensor != nullptr); | |||
| size_t size; | |||
| @@ -35,16 +35,6 @@ OpDefCopyer GetSimpleOpCopyer() { | |||
| newCNode->quantType = inCNode->quantType; | |||
| newCNode->primitive = std::make_unique<schema::PrimitiveT>(); | |||
| newCNode->primitive->value.type = inCNode->primitive->value.type; | |||
| // newCNode->quantParam.clear(); | |||
| // for (size_t i = 0; i < inCNode->quantParam.size(); i++) { | |||
| // auto &quantParam = inCNode->quantParam.at(i); | |||
| // auto quantParamCopy = CopyQuantParamArrayT(quantParam); | |||
| // if (quantParamCopy == nullptr) { | |||
| // //MS_LOG(ERROR)("CopyQuantParamArray return nullptr, node: %s", inOpDef->name.c_str()); | |||
| // return nullptr; | |||
| // } | |||
| // newCNode->quantParam.emplace_back(std::move(quantParamCopy)); | |||
| // } | |||
| return std::move(newCNode); | |||
| }; | |||
| } | |||
| @@ -139,20 +129,18 @@ STATUS IsolateNode(schema::MetaGraphT *graphT, CNodeT *node) { | |||
| auto inputTensorIdxes = node->inputIndex; | |||
| auto outputTensorIdxes = node->outputIndex; | |||
| if (inputTensorIdxes.empty()) { | |||
| // MS_LOG(ERROR)("Node %s should has no inputs", node->name.c_str()); | |||
| MS_LOG(ERROR) << "Node " << node->name.c_str() << "should has no inputs"; | |||
| return RET_ERROR; | |||
| } | |||
| if (outputTensorIdxes.size() != 1) { | |||
| // MS_LOG(ERROR)("FakeQuantNode %s should has 1 output, in fact: %zu", node->name.c_str(), | |||
| // outputTensorIdxes.size()); | |||
| MS_LOG(ERROR) << "FakeQuantNode " << node->name.c_str() \ | |||
| << "should has 1 output, in fact: " << outputTensorIdxes.size(); | |||
| return RET_ERROR; | |||
| } | |||
| auto inDataTensorIdx = inputTensorIdxes.front(); | |||
| auto outDataTensorIdx = outputTensorIdxes.front(); | |||
| MS_ASSERT(graphT->allTensors.size() > inDataTensorIdx); | |||
| const auto &inDataTensor = graphT->allTensors.at(inDataTensorIdx); | |||
| MS_ASSERT(inDataTensor != nullptr); | |||
| auto &gOutTensorIdx = graphT->outputIndex; | |||
| for (auto iter = gOutTensorIdx.begin(); iter != gOutTensorIdx.end(); iter++) { | |||
| if (*iter == outDataTensorIdx) { | |||
| @@ -186,20 +174,13 @@ STATUS IsolateNode(schema::MetaGraphT *graphT, CNodeT *node) { | |||
| STATUS IsolateOneWayNode(schema::MetaGraphT *graph, size_t subGraphIdx, size_t nodeIdx, bool removeTensor) { | |||
| MS_ASSERT(graph != nullptr); | |||
| /* | |||
| if (graph->subgraphs.size() <= subGraphIdx) { | |||
| //MS_LOG(ERROR)("subGraphIdx out of range: %zu", subGraphIdx); | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| */ | |||
| // return IsolateOneWayNode(graph->subgraphs.at(subGraphIdx).get(), nodeIdx, removeTensor); | |||
| return IsolateOneWayNode(graph, nodeIdx, removeTensor); | |||
| } | |||
| STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool removeTensor) { | |||
| MS_ASSERT(graphT != nullptr); | |||
| if (graphT->nodes.size() <= nodeIdx) { | |||
| // MS_LOG(ERROR)("nodeIdx out of range: %zu", nodeIdx); | |||
| MS_LOG(ERROR) << "nodeIdx out of range: " << nodeIdx; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| @@ -208,11 +189,11 @@ STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool remove | |||
| auto outputTensorIdxes = node->outputIndex; | |||
| auto preNodeIdxes = GetInputNodeIdx(*graphT, nodeIdx); | |||
| if (preNodeIdxes.size() > 1 || outputTensorIdxes.size() > 1) { | |||
| // MS_LOG(ERROR)("Only support node who has no more than one input and one output"); | |||
| MS_LOG(ERROR) << "Only support node who has no more than one input and one output"; | |||
| return RET_ERROR; | |||
| } | |||
| if (inputTensorIdxes.empty()) { | |||
| // MS_LOG(ERROR)("Error, %zuth node has no input tensor", nodeIdx); | |||
| MS_LOG(ERROR) << "Error, " << nodeIdx << "th node has no input tensor"; | |||
| return RET_ERROR; | |||
| } | |||
| auto inDataTensorIdx = inputTensorIdxes.front(); | |||
| @@ -247,7 +228,7 @@ STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, size_t nodeIdx, bool remove | |||
| // remove all node's outputTensors | |||
| auto status = RemoveTensor(graphT, outputTensorIdxes); | |||
| if (status != RET_OK) { | |||
| // MS_LOG(ERROR)("RemoveOutputTensors of node %s failed", node->name.c_str()); | |||
| MS_LOG(ERROR) << "RemoveOutputTensors of node " << node->name.c_str() << "failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| @@ -270,7 +251,7 @@ STATUS IsolateOneWayNode(schema::MetaGraphT *graphT, CNodeT *node, bool removeTe | |||
| } | |||
| } | |||
| if (!isSubNode) { | |||
| // MS_LOG(ERROR)("Node %s is not in graphT %s", node->name.c_str(), graphT->name.c_str()); | |||
| MS_LOG(ERROR) << "Node " << node->name.c_str() << "is not in graphT " << graphT->name.c_str(); | |||
| return RET_PARAM_INVALID; | |||
| } else { | |||
| return IsolateOneWayNode(graphT, nodeIdx, removeTensor); | |||
| @@ -343,7 +324,7 @@ STATUS UpdateNodeIndex(CNodeT *node, uint32_t deleteIdx) { | |||
| STATUS AddTensor2Node(schema::MetaGraphT *graphT, uint32_t nodeIdx, std::unique_ptr<TensorT> tensor, | |||
| InsertPlace place) { | |||
| if (nodeIdx >= graphT->nodes.size()) { | |||
| // MS_LOG(ERROR)("nodeIdx out of range: %du", nodeIdx); | |||
| MS_LOG(ERROR) << "nodeIdx out of range: " << nodeIdx; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| graphT->allTensors.emplace_back(std::move(tensor)); | |||
| @@ -360,16 +341,16 @@ STATUS AddTensor2Node(schema::MetaGraphT *graphT, uint32_t nodeIdx, std::unique_ | |||
| STATUS ReplaceTensorOfNode(schema::MetaGraphT *graphT, uint32_t nodeIdx, uint32_t inTensorIdx, | |||
| std::unique_ptr<TensorT> tensor) { | |||
| if (nodeIdx >= graphT->nodes.size()) { | |||
| // MS_LOG(ERROR)("nodeIdx out of range: %du", nodeIdx); | |||
| MS_LOG(ERROR) << "nodeIdx out of range: " << nodeIdx; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| auto node = graphT->nodes.at(nodeIdx).get(); | |||
| if (inTensorIdx >= graphT->allTensors.size()) { | |||
| // MS_LOG(ERROR)("inTensorIdx out of range: %du", nodeIdx); | |||
| MS_LOG(ERROR) << "inTensorIdx out of range: " << nodeIdx; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| if (!IsContain(node->inputIndex, inTensorIdx)) { | |||
| // MS_LOG(ERROR)("inTensorIdx(%du) is not a inputIdx of node(%du)", inTensorIdx, nodeIdx); | |||
| MS_LOG(ERROR) << "inTensorIdx(" << inTensorIdx << ") is not a inputIdx of node(" << nodeIdx << ")"; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| graphT->allTensors.at(inTensorIdx).swap(tensor); | |||
| @@ -379,7 +360,7 @@ STATUS ReplaceTensorOfNode(schema::MetaGraphT *graphT, uint32_t nodeIdx, uint32_ | |||
| NodeIter InsertNode(schema::MetaGraphT *graphT, uint32_t existNodeIdx, InsertPlace place, size_t inoutIndex, | |||
| std::unique_ptr<CNodeT> toAddNode, STATUS *errorCode, OpDefCopyer opDefCopyer) { | |||
| if (existNodeIdx >= graphT->nodes.size()) { | |||
| // MS_LOG(ERROR)("nodeIdx out of range: %du", existNodeIdx); | |||
| MS_LOG(ERROR) << "nodeIdx out of range: " << existNodeIdx; | |||
| return graphT->nodes.end(); | |||
| } | |||
| auto nodeIter = graphT->nodes.begin() + existNodeIdx; | |||
| @@ -447,17 +428,14 @@ NodeIter InsertNodeBefore(schema::MetaGraphT *graphT, NodeIter existNodeIter, si | |||
| existNodeIter++; | |||
| } else { | |||
| std::vector<std::unique_ptr<CNodeT>> toAddNodes; | |||
| int i = 0; | |||
| for (size_t preNodeIdx : preNodeIdxes) { | |||
| MS_ASSERT(graphT->nodes.size() > preNodeIdx); | |||
| auto &preNode = graphT->nodes.at(preNodeIdx); | |||
| MS_ASSERT(preNode != nullptr); | |||
| for (size_t i = 0; i < preNodeIdxes.size(); i++) { | |||
| MS_ASSERT(graphT->nodes.size() > preNodeIdxes.at(i)); | |||
| auto &preTensor = graphT->allTensors.at(preTensorIdx); | |||
| MS_ASSERT(preTensor != nullptr); | |||
| auto toAddTensor = CopyTensorDefT(preTensor); | |||
| if (toAddTensor == nullptr) { | |||
| *errorCode = RET_NULL_PTR; | |||
| // MS_LOG(ERROR)("Copy TensorT failed"); | |||
| MS_LOG(ERROR) << "Copy TensorT failed"; | |||
| return graphT->nodes.end(); | |||
| } | |||
| if (toAddNodeIn->primitive->value.type == schema::PrimitiveType_QuantDTypeCast) { | |||
| @@ -468,7 +446,7 @@ NodeIter InsertNodeBefore(schema::MetaGraphT *graphT, NodeIter existNodeIter, si | |||
| size_t toAddTensorIdx = graphT->allTensors.size() - 1; | |||
| auto toAddNode = opDefCopyer(toAddNodeIn.get()); | |||
| if (toAddNode == nullptr) { | |||
| // MS_LOG(ERROR)("copy toAddNodeIn failed"); | |||
| MS_LOG(ERROR) << "copy toAddNodeIn failed"; | |||
| *errorCode = RET_NULL_PTR; | |||
| return graphT->nodes.end(); | |||
| } | |||
| @@ -509,7 +487,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz | |||
| MS_ASSERT(postTensor != nullptr); | |||
| auto toAddTensor = CopyTensorDefT(postTensor); | |||
| if (toAddTensor == nullptr) { | |||
| // MS_LOG(ERROR)("Copy TensorT failed"); | |||
| MS_LOG(ERROR) << "Copy TensorT failed"; | |||
| *errorCode = RET_NULL_PTR; | |||
| return graphT->nodes.end(); | |||
| } | |||
| @@ -521,7 +499,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz | |||
| size_t toAddTensorIdx = graphT->allTensors.size() - 1; | |||
| auto toAddNode = opDefCopyer(toAddNodeIn.get()); | |||
| if (toAddNode == nullptr) { | |||
| // MS_LOG(ERROR)("copy toAddNodeIn failed"); | |||
| MS_LOG(ERROR) << "copy toAddNodeIn failed"; | |||
| *errorCode = RET_NULL_PTR; | |||
| return graphT->nodes.end(); | |||
| } | |||
| @@ -548,7 +526,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz | |||
| MS_ASSERT(postTensor != nullptr); | |||
| auto toAddTensor = CopyTensorDefT(postTensor); | |||
| if (toAddTensor == nullptr) { | |||
| // MS_LOG(ERROR)("Copy TensorT failed"); | |||
| MS_LOG(ERROR) << "Copy TensorT failed"; | |||
| *errorCode = RET_NULL_PTR; | |||
| return graphT->nodes.end(); | |||
| } | |||
| @@ -560,7 +538,7 @@ NodeIter InsertNodeAfter(schema::MetaGraphT *graphT, NodeIter existNodeIter, siz | |||
| size_t toAddTensorIdx = graphT->allTensors.size() - 1; | |||
| auto toAddNode = opDefCopyer(toAddNodeIn.get()); | |||
| if (toAddNode == nullptr) { | |||
| // MS_LOG(ERROR)("copy toAddNodeIn failed"); | |||
| MS_LOG(ERROR) << "copy toAddNodeIn failed"; | |||
| *errorCode = RET_NULL_PTR; | |||
| return graphT->nodes.end(); | |||
| } | |||
| @@ -612,12 +590,12 @@ std::string GetModelName(const std::string &modelFile) { | |||
| OpGraphT *OpGraphT::Build(const schema::MetaGraphT *subGraphDef) { | |||
| if (subGraphDef == nullptr) { | |||
| // MS_LOG(ERROR)("subGraphDef is nullptr"); | |||
| MS_LOG(ERROR) << "subGraphDef is nullptr"; | |||
| return nullptr; | |||
| } | |||
| auto graph = std::unique_ptr<OpGraphT>(new OpGraphT()); | |||
| if (graph == nullptr) { | |||
| // MS_LOG(ERROR)("malloc opgraph failed"); | |||
| MS_LOG(ERROR) << "malloc opgraph failed"; | |||
| return nullptr; | |||
| } | |||
| @@ -626,7 +604,7 @@ OpGraphT *OpGraphT::Build(const schema::MetaGraphT *subGraphDef) { | |||
| for (auto &opDef : opDefs) { | |||
| auto ret = graph->AddEdge(opDef.get(), &opDefs); | |||
| if (ret != RET_OK) { | |||
| // MS_LOG(ERROR)("%s add edge failed. ret:%d", opDef->name.c_str(), ret); | |||
| MS_LOG(ERROR) << opDef->name.c_str() << " add edge failed. ret: " << ret; | |||
| return nullptr; | |||
| } | |||
| } | |||
| @@ -644,7 +622,7 @@ int OpGraphT::AddEdge(const schema::CNodeT *srcNodeDef, const std::vector<std::u | |||
| for (auto &dstNodeDef : *nodeDefs) { | |||
| bool find = false; | |||
| auto inputIndex = dstNodeDef->inputIndex; | |||
| if (std::any_of(inputIndex.begin(), inputIndex.end(), [&index](int i) { return i == index; })) { | |||
| if (std::any_of(inputIndex.begin(), inputIndex.end(), [&index](size_t i) { return i == index; })) { | |||
| find = true; | |||
| } | |||
| @@ -664,13 +642,13 @@ int OpGraphT::AddEdge(const schema::CNodeT *srcNodeDef, const std::vector<std::u | |||
| int OpGraphT::AddEdge(NODE_ID srcId, NODE_ID dstId) { | |||
| auto srcNode = AddNode(srcId); | |||
| if (srcNode == nullptr) { | |||
| // MS_LOG(ERROR)("add srcNode failed"); | |||
| MS_LOG(ERROR) << "add srcNode failed"; | |||
| return RET_ERROR; | |||
| } | |||
| srcNode->AddOutEdge(dstId); | |||
| auto dstNode = AddNode(dstId); | |||
| if (dstNode == nullptr) { | |||
| // MS_LOG(ERROR)("add dstNode failed"); | |||
| MS_LOG(ERROR) << "add dstNode failed"; | |||
| return RET_ERROR; | |||
| } | |||
| dstNode->AddInEdge(srcId); | |||
| @@ -109,12 +109,79 @@ STATUS NodeUtils::ConvertDims(mindspore::lite::Format src_format, const std::vec | |||
| } | |||
| break; | |||
| default: | |||
| // MS_LOG(ERROR)("Not support dst format: %d", dst_format); | |||
| MS_LOG(ERROR) << "Not support dst format: " << dst_format; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| STATUS GetFilterDim(const std::vector<int32_t> &oriDims, kTransFilterType type, int32_t* filterK, int32_t* filterC, | |||
| int32_t* filterH, int32_t* filterW) { | |||
| MS_ASSERT(oriDims.size() == 4); | |||
| if (type == kKCHW2HWCK || type == kKCHW2HWKC || type == kKCHW2KHWC || type == kKCHW2CKHW) { | |||
| *filterK = oriDims.at(KCHW_K); | |||
| *filterC = oriDims.at(KCHW_C); | |||
| *filterH = oriDims.at(KCHW_H); | |||
| *filterW = oriDims.at(KCHW_W); | |||
| } else if (type == kCKHW2HWCK || type == kCKHW2HWKC || type == kCKHW2KHWC) { | |||
| *filterC = oriDims.at(CKHW_C); | |||
| *filterK = oriDims.at(CKHW_K); | |||
| *filterH = oriDims.at(CKHW_H); | |||
| *filterW = oriDims.at(CKHW_W); | |||
| } else if (type == kHWCK2KCHW || type == kHWCK2CKHW) { | |||
| *filterH = oriDims.at(HWCK_H); | |||
| *filterW = oriDims.at(HWCK_W); | |||
| *filterC = oriDims.at(HWCK_C); | |||
| *filterK = oriDims.at(HWCK_K); | |||
| } else if (type == kHWKC2KCHW || type == kHWKC2CKHW) { | |||
| *filterH = oriDims.at(HWKC_H); | |||
| *filterW = oriDims.at(HWKC_W); | |||
| *filterK = oriDims.at(HWKC_K); | |||
| *filterC = oriDims.at(HWKC_C); | |||
| } else if (type == kNHWC2KCHW || type == kNHWC2HWCK || type == kNHWC2CKHW) { | |||
| *filterK = oriDims.at(NHWC_N); | |||
| *filterH = oriDims.at(NHWC_H); | |||
| *filterW = oriDims.at(NHWC_W); | |||
| *filterC = oriDims.at(NHWC_C); | |||
| } else if (type == kCHWK2HWCK || type == kCHWK2KHWC) { | |||
| *filterC = oriDims.at(CHWK_C); | |||
| *filterH = oriDims.at(CHWK_H); | |||
| *filterW = oriDims.at(CHWK_W); | |||
| *filterK = oriDims.at(CHWK_K); | |||
| } else if (type == kKHWC2HWCK || type == kKHWC2CHWK) { | |||
| *filterK = oriDims.at(KHWC_K); | |||
| *filterH = oriDims.at(KHWC_H); | |||
| *filterW = oriDims.at(KHWC_W); | |||
| *filterC = oriDims.at(KHWC_C); | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported transFilterType: " << type; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| STATUS SetFilterDim(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC, | |||
| int32_t filterH, int32_t filterW) { | |||
| MS_ASSERT(tensor != nullptr); | |||
| if (type == kKCHW2HWCK || type == kCKHW2HWCK || type == kNHWC2HWCK || type == kKHWC2HWCK || type == kCHWK2HWCK) { | |||
| tensor->dims = {filterH, filterW, filterC, filterK}; | |||
| } else if (type == kKCHW2HWKC || type == kCKHW2HWKC) { | |||
| tensor->dims = {filterH, filterW, filterK, filterC}; | |||
| } else if (type == kHWCK2KCHW || type == kHWKC2KCHW || type == kNHWC2KCHW) { | |||
| tensor->dims = {filterK, filterC, filterH, filterW}; | |||
| } else if (type == kHWCK2CKHW || type == kHWKC2CKHW || type == kNHWC2CKHW || type == kKCHW2CKHW) { | |||
| tensor->dims = {filterC, filterK, filterH, filterW}; | |||
| } else if (type == kKHWC2CHWK) { | |||
| tensor->dims = {filterC, filterH, filterW, filterK}; | |||
| } else if (type == kKCHW2KHWC || type == kCKHW2KHWC || type == kCHWK2KHWC) { | |||
| tensor->dims = {filterK, filterH, filterW, filterC}; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported transFilterType: " << type; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| STATUS TransFilterFormat(schema::TensorT *tensor, schema::Format dstFormat) { | |||
| if (tensor == nullptr) { | |||
| return RET_NULL_PTR; | |||
| @@ -75,72 +75,10 @@ enum kTransFilterType { | |||
| kKCHW2CKHW // 20 | |||
| }; | |||
| static STATUS GetFilterDim(std::vector<int32_t> &oriDims, kTransFilterType type, int32_t &filterK, int32_t &filterC, | |||
| int32_t &filterH, int32_t &filterW) { | |||
| MS_ASSERT(oriDims.size() == 4); | |||
| if (type == kKCHW2HWCK || type == kKCHW2HWKC || type == kKCHW2KHWC || type == kKCHW2CKHW) { | |||
| filterK = oriDims.at(KCHW_K); | |||
| filterC = oriDims.at(KCHW_C); | |||
| filterH = oriDims.at(KCHW_H); | |||
| filterW = oriDims.at(KCHW_W); | |||
| } else if (type == kCKHW2HWCK || type == kCKHW2HWKC || type == kCKHW2KHWC) { | |||
| filterC = oriDims.at(CKHW_C); | |||
| filterK = oriDims.at(CKHW_K); | |||
| filterH = oriDims.at(CKHW_H); | |||
| filterW = oriDims.at(CKHW_W); | |||
| } else if (type == kHWCK2KCHW || type == kHWCK2CKHW) { | |||
| filterH = oriDims.at(HWCK_H); | |||
| filterW = oriDims.at(HWCK_W); | |||
| filterC = oriDims.at(HWCK_C); | |||
| filterK = oriDims.at(HWCK_K); | |||
| } else if (type == kHWKC2KCHW || type == kHWKC2CKHW) { | |||
| filterH = oriDims.at(HWKC_H); | |||
| filterW = oriDims.at(HWKC_W); | |||
| filterK = oriDims.at(HWKC_K); | |||
| filterC = oriDims.at(HWKC_C); | |||
| } else if (type == kNHWC2KCHW || type == kNHWC2HWCK || type == kNHWC2CKHW) { | |||
| filterK = oriDims.at(NHWC_N); | |||
| filterH = oriDims.at(NHWC_H); | |||
| filterW = oriDims.at(NHWC_W); | |||
| filterC = oriDims.at(NHWC_C); | |||
| } else if (type == kCHWK2HWCK || type == kCHWK2KHWC) { | |||
| filterC = oriDims.at(CHWK_C); | |||
| filterH = oriDims.at(CHWK_H); | |||
| filterW = oriDims.at(CHWK_W); | |||
| filterK = oriDims.at(CHWK_K); | |||
| } else if (type == kKHWC2HWCK || type == kKHWC2CHWK) { | |||
| filterK = oriDims.at(KHWC_K); | |||
| filterH = oriDims.at(KHWC_H); | |||
| filterW = oriDims.at(KHWC_W); | |||
| filterC = oriDims.at(KHWC_C); | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported transFilterType: " << type; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| static STATUS SetFilterDim(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC, | |||
| int32_t filterH, int32_t filterW) { | |||
| MS_ASSERT(tensor != nullptr); | |||
| if (type == kKCHW2HWCK || type == kCKHW2HWCK || type == kNHWC2HWCK || type == kKHWC2HWCK || type == kCHWK2HWCK) { | |||
| tensor->dims = {filterH, filterW, filterC, filterK}; | |||
| } else if (type == kKCHW2HWKC || type == kCKHW2HWKC) { | |||
| tensor->dims = {filterH, filterW, filterK, filterC}; | |||
| } else if (type == kHWCK2KCHW || type == kHWKC2KCHW || type == kNHWC2KCHW) { | |||
| tensor->dims = {filterK, filterC, filterH, filterW}; | |||
| } else if (type == kHWCK2CKHW || type == kHWKC2CKHW || type == kNHWC2CKHW || type == kKCHW2CKHW) { | |||
| tensor->dims = {filterC, filterK, filterH, filterW}; | |||
| } else if (type == kKHWC2CHWK) { | |||
| tensor->dims = {filterC, filterH, filterW, filterK}; | |||
| } else if (type == kKCHW2KHWC || type == kCKHW2KHWC || type == kCHWK2KHWC) { | |||
| tensor->dims = {filterK, filterH, filterW, filterC}; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported transFilterType: " << type; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| STATUS GetFilterDim(const std::vector<int32_t> &oriDims, kTransFilterType type, int32_t* filterK, int32_t* filterC, | |||
| int32_t* filterH, int32_t* filterW); | |||
| STATUS SetFilterDim(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC, | |||
| int32_t filterH, int32_t filterW); | |||
| template <typename T> | |||
| static STATUS TransFilterData(schema::TensorT *tensor, kTransFilterType type, int32_t filterK, int32_t filterC, | |||
| @@ -356,7 +294,7 @@ static STATUS TransFilterFormat(schema::TensorT *tensor, kTransFilterType type) | |||
| int32_t filterW; | |||
| int32_t filterC; | |||
| int32_t filterK; | |||
| auto status = GetFilterDim(oriDims, type, filterK, filterC, filterH, filterW); | |||
| auto status = GetFilterDim(oriDims, type, &filterK, &filterC, &filterH, &filterW); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "GetFilterDim failed: " << status; | |||
| return status; | |||
| @@ -42,107 +42,6 @@ std::unique_ptr<schema::QuantParamT> CopyQuantParamT(const std::unique_ptr<schem | |||
| return std::move(dstQuantParam); | |||
| } | |||
| std::unique_ptr<QuantParamT> CopyQuantParamArrayT(const std::unique_ptr<QuantParamT> &srcQuantParamArray) { | |||
| MS_ASSERT(srcQuantParamArray != nullptr); | |||
| auto dstQuantParamArrayT = std::unique_ptr<QuantParamT>(new (std::nothrow) QuantParamT()); | |||
| if (dstQuantParamArrayT == nullptr) { | |||
| // MS_LOG(ERROR)("new dstQuantParamArrayT failed"); | |||
| return nullptr; | |||
| } | |||
| /* | |||
| for (size_t i = 0; i < srcQuantParamArray->param.size(); i++) { | |||
| auto &srcQuantParam = srcQuantParamArray->param.at(i); | |||
| MS_ASSERT(srcQuantParam != nullptr); | |||
| std::unique_ptr<QuantParamT> dstQuantParam(new (std::nothrow) QuantParamT()); | |||
| if (dstQuantParam == nullptr) { | |||
| //MS_LOG(ERROR)("new dstQuantParam failed"); | |||
| dstQuantParamArrayT.release(); | |||
| return nullptr; | |||
| } | |||
| dstQuantParam->scale = srcQuantParam->scale; | |||
| dstQuantParam->zeroPoint = srcQuantParam->zeroPoint; | |||
| dstQuantParam->min = srcQuantParam->min; | |||
| dstQuantParam->max = srcQuantParam->max; | |||
| dstQuantParam->narrowRange = srcQuantParam->narrowRange; | |||
| dstQuantParam->numBits = srcQuantParam->numBits; | |||
| dstQuantParamArrayT->param.emplace_back(std::move(dstQuantParam)); | |||
| } | |||
| */ | |||
| return std::move(dstQuantParamArrayT); | |||
| } | |||
| std::unique_ptr<QuantParamT> GetInTensorQuantParamArray(const MetaGraphT &graphT, size_t tensorIdx) { | |||
| auto preNodeIdxes = GetLinkedPreIdx(graphT, tensorIdx); | |||
| MS_ASSERT(preNodeIdxes.size() <= 1); | |||
| if (preNodeIdxes.empty()) { | |||
| // MS_LOGD("the %zuth tensor has no preNode", tensorIdx); | |||
| return nullptr; | |||
| } | |||
| auto preNodeIdx = preNodeIdxes.front(); | |||
| MS_ASSERT(preNodeIdx < graphT.nodes.size()); | |||
| auto &preNode = graphT.nodes.at(preNodeIdx); | |||
| MS_ASSERT(preNode != nullptr); | |||
| MS_ASSERT(preNode->inputIndex.size() + preNode->outputIndex.size() == preNode->quantParam.size()); | |||
| /* | |||
| for (size_t i = 0; i < preNode->outputIndex.size(); i++) { | |||
| if (preNode->outputIndex.at(i) == tensorIdx) { | |||
| auto &quantPArray = preNode->quantParam.at(preNode->inputIndex.size() + i); | |||
| MS_ASSERT(quantPArray->param.size() == 1); // only support prelayer | |||
| MS_ASSERT(quantPArray->param.front() != nullptr); | |||
| if (quantPArray->param.front()->min == FLT_MAX) { | |||
| //MS_LOGD("the %zuth tensor's preNode's relative quantParam has not be inited", tensorIdx); | |||
| return nullptr; | |||
| } else { | |||
| return std::move(CopyQuantParamArrayT(quantPArray)); | |||
| } | |||
| } | |||
| } | |||
| */ | |||
| MS_ASSERT(false); | |||
| return nullptr; | |||
| } | |||
| std::unique_ptr<QuantParamT> GetOutTensorQuantParamArray(const MetaGraphT &graphT, size_t tensorIdx) { | |||
| auto postNodeIdxes = GetLinkedPostIdx(graphT, tensorIdx); | |||
| if (postNodeIdxes.empty()) { | |||
| // MS_LOGD("the %zuth tensor has no postNode", tensorIdx); | |||
| return nullptr; | |||
| } | |||
| // find one postNode which can give valid quantParamArray | |||
| for (auto postNodeIdx : postNodeIdxes) { | |||
| MS_ASSERT(postNodeIdx < graphT.nodes.size()); | |||
| auto &postNode = graphT.nodes.at(postNodeIdx); | |||
| MS_ASSERT(postNode != nullptr); | |||
| MS_ASSERT(postNode->inputIndex.size() + postNode->outputIndex.size() == postNode->quantParam.size()); | |||
| /* | |||
| for (size_t i = 0; i < postNode->inputIndex.size(); i++) { | |||
| if (postNode->inputIndex.at(i) == tensorIdx) { | |||
| auto &quantPArray = postNode->quantParam.at(i); | |||
| MS_ASSERT(quantPArray->param.size() == 1); // only support prelayer | |||
| MS_ASSERT(quantPArray->param.front() != nullptr); | |||
| // check if postNode has valid quantParam | |||
| if (quantPArray->param.front()->min == FLT_MAX) { | |||
| continue; | |||
| } | |||
| MS_ASSERT(graphT.allTensors.size() > postNode->inputIndex.at(i)); | |||
| auto &tensor = graphT.allTensors.at(postNode->inputIndex.at(i)); | |||
| MS_ASSERT(tensor != nullptr); | |||
| if (tensor->refCount == schema::NodeType_ValueNode) { | |||
| continue; | |||
| } | |||
| // find valid quantParam return | |||
| auto paramArray = CopyQuantParamArrayT(quantPArray); | |||
| if (paramArray == nullptr) { | |||
| //MS_LOG(ERROR)("CopyQuantParamArrayT return nullptr"); | |||
| return nullptr; | |||
| } | |||
| return std::move(paramArray); | |||
| } | |||
| }*/ | |||
| } | |||
| return nullptr; | |||
| } | |||
| size_t GetElementSize(const TensorT &tensor) { return GetElementSize(TypeId(tensor.dataType)); } | |||
| size_t GetElementSize(const TypeId &dataType) { | |||
| @@ -58,10 +58,6 @@ std::unique_ptr<schema::QuantParamT> CopyQuantParamT(const std::unique_ptr<schem | |||
| std::unique_ptr<schema::QuantParamT> CopyQuantParamArrayT( | |||
| const std::unique_ptr<schema::QuantParamT> &srcQuantParamArray); | |||
| std::unique_ptr<schema::QuantParamT> GetInTensorQuantParamArray(const schema::MetaGraphT &graphT, size_t tensorIdx); | |||
| std::unique_ptr<schema::QuantParamT> GetOutTensorQuantParamArray(const schema::MetaGraphT &graphT, size_t tensorIdx); | |||
| using MSGraphDefTPtr = std::shared_ptr<schema::MetaGraphT>; | |||
| enum TensorType { CONST = 0, GRAPH_INPUT = 1, OP_OUTPUT = 2, TF_CONST = 3 }; | |||
| @@ -37,6 +37,7 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| using FmkType = converter::FmkType; | |||
| static const char *DELIM_SLASH = "/"; | |||
| Converter::Converter() { | |||
| this->transform = new GraphDefTransform; | |||
| this->anfTransform = new AnfTransform; | |||
| @@ -333,7 +333,7 @@ STATUS BatchNormFoldFusionPass::GenNewWeightTensor() { | |||
| void *miData = muTensor->data.data(); | |||
| auto *castedMiData = static_cast<float *>(miData); | |||
| size_t stride = weightShapeSize / channelOut; | |||
| for (size_t i = 0; i < channelOut; i++) { | |||
| for (int i = 0; i < channelOut; i++) { | |||
| for (size_t j = 0; j < stride; j++) { | |||
| castedNewWeightData[i * stride + j] = castedOldWeightData[i * stride + j] * castedGammaData[i] / castedMiData[i]; | |||
| } | |||
| @@ -367,7 +367,7 @@ STATUS BatchNormFoldFusionPass::GenNewBiasTensor() { // bias has no quant | |||
| MS_ASSERT(sigmaTensor->dataType == DataType_DT_FLOAT); | |||
| void *sigmaData = sigmaTensor->data.data(); | |||
| auto *castedSigmaData = static_cast<float *>(sigmaData); | |||
| for (size_t i = 0; i < channelOut; i++) { | |||
| for (int i = 0; i < channelOut; i++) { | |||
| castedNewBiasData[i] = castedBetaData[i] - castedGammaData[i] * castedMiData[i] / castedSigmaData[i]; | |||
| } | |||
| return RET_OK; | |||
| @@ -19,8 +19,6 @@ | |||
| #include <memory> | |||
| #include "tools/converter/legacy_optimizer/fusion/format_trans_fusion_pass.h" | |||
| #include "utils/log_adapter.h" | |||
| #include "securec/include/securec.h" | |||
| // #include "utils/log_adapter.h" | |||
| #include "tools/common/graph_util.h" | |||
| #include "include/errorcode.h" | |||
| #include "mindspore/lite/schema/inner/model_generated.h" | |||
| @@ -44,7 +42,7 @@ STATUS FormatTransFusionPass::DefinePattern() { | |||
| std::unique_ptr<FusionPattern> nc2NhAndNh2NcFusionPattern(new (std::nothrow) | |||
| FusionPattern(kNc2NhAndNh2NcFusionPattern)); | |||
| if (nc2NhAndNh2NcFusionPattern == nullptr) { | |||
| // MS_LOG(ERROR) << "new %s failed", kNc2NhAndNh2NcFusionPattern); | |||
| MS_LOG(ERROR) << "new " << kNc2NhAndNh2NcFusionPattern << "failed"; | |||
| return RET_ERROR; | |||
| } | |||
| nc2NhAndNh2NcFusionPattern->AddPatternOp(nc2nhOp); | |||
| @@ -52,7 +50,6 @@ STATUS FormatTransFusionPass::DefinePattern() { | |||
| nc2NhAndNh2NcFusionPattern->Finish(); | |||
| this->patterns.emplace_back(nc2NhAndNh2NcFusionPattern.release()); | |||
| } | |||
| // nchw2nhwc + QuantDtypeCast + nhwc2nchw | |||
| { | |||
| auto nc2nhOp = std::make_shared<PatternOp>(); | |||
| nc2nhOp->id = kFormatTransNc2NhOp; | |||
| @@ -68,7 +65,7 @@ STATUS FormatTransFusionPass::DefinePattern() { | |||
| nh2ncOp->left = passOp; | |||
| std::unique_ptr<FusionPattern> nc2NhAndNh2NcPassFusionPattern(new FusionPattern(kNc2NhAndNh2NcPassFusionPattern)); | |||
| if (nc2NhAndNh2NcPassFusionPattern == nullptr) { | |||
| // MS_LOG(ERROR) << "new %s failed", kNc2NhAndNh2NcPassFusionPattern); | |||
| MS_LOG(ERROR) << "new " << kNc2NhAndNh2NcPassFusionPattern << "failed"; | |||
| return RET_ERROR; | |||
| } | |||
| nc2NhAndNh2NcPassFusionPattern->AddPatternOp(nc2nhOp); | |||
| @@ -90,7 +87,7 @@ STATUS FormatTransFusionPass::DefinePattern() { | |||
| std::unique_ptr<FusionPattern> nh2NcAndNc2NhFusionPattern(new (std::nothrow) | |||
| FusionPattern(kNh2NcAndNc2NhFusionPattern)); | |||
| if (nh2NcAndNc2NhFusionPattern == nullptr) { | |||
| // MS_LOG(ERROR) << "new %s failed", kNh2NcAndNc2NhFusionPattern); | |||
| MS_LOG(ERROR) << "new " << kNh2NcAndNc2NhFusionPattern << "failed"; | |||
| return RET_ERROR; | |||
| } | |||
| nh2NcAndNc2NhFusionPattern->AddPatternOp(nh2ncOp); | |||
| @@ -247,7 +247,7 @@ bool FusionPass::MatchTree(schema::MetaGraphT *graph, size_t nodeIdx, const std: | |||
| // path is setted and not pointer to this node | |||
| if (target->pathSetted) { | |||
| MS_ASSERT(target->path != nullptr); | |||
| if (target->path->nodeIdx != nodeIdx) { | |||
| if (target->path->nodeIdx != static_cast<int>(nodeIdx)) { | |||
| return false; | |||
| } | |||
| } | |||
| @@ -108,7 +108,6 @@ STATUS MatMulBiasAddFusionPass::DoFusion(MetaGraphT *graph, const std::string &p | |||
| transA = matMulNode->primitive->value.AsMatMul()->transposeA; | |||
| transB = matMulNode->primitive->value.AsMatMul()->transposeB; | |||
| MS_ASSERT(matMulNode->primitive->value.value != nullptr); | |||
| delete (matMulNode->primitive->value.value); | |||
| matMulNode->primitive->value.type = schema::PrimitiveType_FullConnection; | |||
| matMulNode->primitive->value.value = fcAttr.release(); | |||
| @@ -135,11 +134,6 @@ STATUS MatMulBiasAddFusionPass::DoFusion(MetaGraphT *graph, const std::string &p | |||
| STATUS MatMulBiasAddFusionPass::InsertTransposeNode(MetaGraphT *graph, const std::shared_ptr<Path> &matMulPath) { | |||
| MS_ASSERT(graph != nullptr); | |||
| MS_ASSERT(matMulPath != nullptr); | |||
| auto &matMulNode = graph->nodes.at(matMulPath->nodeIdx); | |||
| MS_ASSERT(graph->allTensors.size() > matMulNode->inputIndex.at(0)); | |||
| MS_ASSERT(graph->allTensors.size() > matMulNode->inputIndex.at(2)); | |||
| const auto &tensorA = graph->allTensors.at(matMulNode->inputIndex.at(0)); | |||
| const auto &tensorB = graph->allTensors.at(matMulNode->inputIndex.at(1)); | |||
| std::vector<size_t> insertNodeIdxList; | |||
| if (transA) { | |||