From: @hangangqiang Reviewed-by: Signed-off-by:pull/12009/MERGE
| @@ -21,105 +21,135 @@ | |||
| #include "nnacl/matmul_parameter.h" | |||
| namespace mindspore::lite { | |||
| float *DequantUtil::DequantWeight(lite::Tensor *input_tensor, bool channel_first) { | |||
| int DequantUtil::DequantWeight(lite::Tensor *input_tensor, bool channel_first, TypeId dst_data_type) { | |||
| MS_ASSERT(input_tensor != nullptr); | |||
| if (input_tensor->data_type() != kNumberTypeInt8 && input_tensor->data_type() != kNumberTypeInt16) { | |||
| MS_LOG(ERROR) << "Conv weight input type error." << input_tensor->data_type(); | |||
| return nullptr; | |||
| return RET_ERROR; | |||
| } | |||
| if (input_tensor->quant_params().empty()) { | |||
| MS_LOG(ERROR) << "No quant param."; | |||
| return nullptr; | |||
| return RET_ERROR; | |||
| } | |||
| if (input_tensor->data_type() == kNumberTypeInt16) { | |||
| return DequantData<int16_t>(input_tensor, channel_first); | |||
| if (input_tensor->data_type() == kNumberTypeInt16 && dst_data_type == kNumberTypeFloat32) { | |||
| auto new_const_data = DequantData<int16_t, float>(input_tensor, channel_first); | |||
| input_tensor->set_data(new_const_data); | |||
| input_tensor->set_own_data(true); | |||
| input_tensor->set_data_type(dst_data_type); | |||
| } else if (input_tensor->data_type() == kNumberTypeInt16 && dst_data_type == kNumberTypeFloat16) { | |||
| #if defined(ENABLE_ARM64) && defined(ENABLE_FP16) | |||
| auto new_const_data = DequantData<int16_t, float16_t>(input_tensor, channel_first); | |||
| input_tensor->set_data(new_const_data); | |||
| input_tensor->set_own_data(true); | |||
| input_tensor->set_data_type(dst_data_type); | |||
| #else | |||
| MS_LOG(ERROR) << "Float16 is not supported"; | |||
| return RET_NOT_SUPPORT; | |||
| #endif | |||
| } else if (input_tensor->data_type() == kNumberTypeInt8 && dst_data_type == kNumberTypeFloat32) { | |||
| auto new_const_data = DequantData<int8_t, float>(input_tensor, channel_first); | |||
| input_tensor->set_data(new_const_data); | |||
| input_tensor->set_own_data(true); | |||
| input_tensor->set_data_type(dst_data_type); | |||
| } else if (input_tensor->data_type() == kNumberTypeInt8 && dst_data_type == kNumberTypeFloat16) { | |||
| #if defined(ENABLE_ARM64) && defined(ENABLE_FP16) | |||
| auto new_const_data = DequantData<int8_t, float16_t>(input_tensor, channel_first); | |||
| input_tensor->set_data(new_const_data); | |||
| input_tensor->set_own_data(true); | |||
| input_tensor->set_data_type(dst_data_type); | |||
| #else | |||
| MS_LOG(ERROR) << "Float16 is not supported"; | |||
| return RET_NOT_SUPPORT; | |||
| #endif | |||
| } else { | |||
| return DequantData<int8_t>(input_tensor, channel_first); | |||
| MS_LOG(ERROR) << "Unsupported dequant from data_type(" << (input_tensor->data_type()) << ") to data_type(" | |||
| << dst_data_type << ")"; | |||
| return RET_NOT_SUPPORT; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DequantUtil::UnPackToInt(const schema::Tensor *input_tensor, void *unpack_int_data) { | |||
| MS_ASSERT(input_tensor != nullptr); | |||
| MS_ASSERT(unpack_int_data != nullptr); | |||
| auto quant_params = input_tensor->quantParams(); | |||
| if (quant_params == nullptr) { | |||
| MS_LOG(ERROR) << "low bits quantparams is empty."; | |||
| return RET_ERROR; | |||
| int DequantUtil::DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) { | |||
| MS_ASSERT(dst_tensor != nullptr); | |||
| if (!dst_tensor->IsConst() || !src_tensor.enableHuffmanCode()) { | |||
| return RET_NO_CHANGE; | |||
| } | |||
| auto enable_huffman_code = input_tensor->enableHuffmanCode(); | |||
| if (enable_huffman_code) { | |||
| std::string encode_str(input_tensor->data()->begin(), input_tensor->data()->end()); | |||
| auto huffman_decode = std::make_unique<lite::HuffmanDecode>(); | |||
| auto ret = huffman_decode->DoHuffmanDecode(encode_str, unpack_int_data); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "DoHuffmanDecode failed."; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| auto data = reinterpret_cast<const char *>(src_tensor.data()->data()); | |||
| MS_ASSERT(data != nullptr); | |||
| std::string encode_str(data, src_tensor.data()->size()); | |||
| dst_tensor->set_data(nullptr); | |||
| auto ret = dst_tensor->MallocData(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Malloc tensor data failed"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| int origin_bit = quant_params->Get(0)->numBits(); | |||
| if (origin_bit < 8 && origin_bit > 0) { | |||
| UnPackUtil<int8_t, uint8_t>(input_tensor, origin_bit, unpack_int_data); | |||
| } else if (origin_bit < 16 && origin_bit > 8) { | |||
| UnPackUtil<int16_t, uint16_t>(input_tensor, origin_bit, unpack_int_data); | |||
| auto dst_data = dst_tensor->data_c(); | |||
| MS_ASSERT(dst_data != nullptr); | |||
| ret = HuffmanDecode::DoHuffmanDecode(encode_str, dst_data); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "DoHuffmanDecode failed."; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| std::map<Tensor *, std::pair<TypeId, void *>> DequantUtil::DequantTensor(OpParameter *op_param, | |||
| const std::vector<Tensor *> &in_tensors, | |||
| TypeId data_type, bool need_restore) { | |||
| std::map<Tensor *, std::pair<TypeId, void *>> tensor_origin_data; | |||
| if (data_type == TypeId::kNumberTypeFloat32 || data_type == TypeId::kNumberTypeFloat16) { | |||
| auto input_i = 0; | |||
| for (auto weight_tensor : in_tensors) { | |||
| MS_ASSERT(weight_tensor != nullptr); | |||
| input_i++; | |||
| auto channel_first = true; | |||
| if (op_param->type_ == schema::PrimitiveType_MatMul && weight_tensor->shape().size() == 2) { | |||
| auto param = reinterpret_cast<MatMulParameter *>(op_param); | |||
| if (input_i == 1) { | |||
| channel_first = !param->a_transpose_; | |||
| } else if (input_i == 2) { | |||
| channel_first = param->b_transpose_; | |||
| } else { | |||
| MS_LOG(WARNING) << "unexpected input_i"; | |||
| } | |||
| } | |||
| auto *restore_data = weight_tensor->data_c(); | |||
| auto restore_type = weight_tensor->data_type(); | |||
| bool dequant_flag = !weight_tensor->quant_params().empty() && weight_tensor->quant_params().front().inited && | |||
| restore_data != nullptr && | |||
| (restore_type == kNumberTypeInt8 || restore_type == kNumberTypeInt16); | |||
| if (dequant_flag) { | |||
| auto *dequant_weight = DequantUtil::DequantWeight(weight_tensor, channel_first); | |||
| if (dequant_weight == nullptr) { | |||
| MS_LOG(ERROR) << "dequant data is nullptr."; | |||
| return tensor_origin_data; | |||
| } | |||
| if (need_restore) { | |||
| tensor_origin_data[weight_tensor] = {restore_type, restore_data}; | |||
| } else { | |||
| weight_tensor->FreeData(); | |||
| } | |||
| weight_tensor->set_data(dequant_weight); | |||
| weight_tensor->set_data_type(kNumberTypeFloat32); | |||
| } | |||
| } | |||
| int DequantUtil::UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) { | |||
| MS_ASSERT(dst_tensor != nullptr); | |||
| if (!dst_tensor->IsConst()) { | |||
| return RET_NO_CHANGE; | |||
| } | |||
| auto quant_params = src_tensor.quantParams(); | |||
| if (quant_params == nullptr || quant_params->size() == 0) { | |||
| return RET_NO_CHANGE; | |||
| } | |||
| auto quant_param = quant_params->Get(0); | |||
| if (quant_param == nullptr || !quant_param->inited()) { | |||
| return RET_NO_CHANGE; | |||
| } | |||
| auto dst_data = dst_tensor->data_c(); | |||
| if (dst_data != nullptr) { | |||
| MS_LOG(ERROR) << "lite Tensor has already malloced data"; | |||
| return RET_ERROR; | |||
| } | |||
| auto ret = dst_tensor->MallocData(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Malloc tensor data failed"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| dst_data = dst_tensor->data_c(); | |||
| int origin_bit = quant_param->numBits(); | |||
| if (origin_bit < 8 && origin_bit > 0) { | |||
| UnPackUtil<int8_t, uint8_t>(&src_tensor, origin_bit, dst_data); | |||
| return RET_OK; | |||
| } else if (origin_bit < 16 && origin_bit > 8) { | |||
| UnPackUtil<int16_t, uint16_t>(&src_tensor, origin_bit, dst_data); | |||
| return RET_OK; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported bit number: " << origin_bit; | |||
| return RET_NOT_SUPPORT; | |||
| } | |||
| return tensor_origin_data; | |||
| } | |||
| void DequantUtil::RestoreTensorData(const std::map<Tensor *, std::pair<TypeId, void *>> &tensor_origin_data_map) { | |||
| for (auto &kv : tensor_origin_data_map) { | |||
| auto *tensor = kv.first; | |||
| auto type_id = kv.second.first; | |||
| auto data = kv.second.second; | |||
| tensor->FreeData(); | |||
| tensor->set_data_type(type_id); | |||
| tensor->set_data(data); | |||
| Tensor *DequantUtil::DequantTensor(Tensor *tensor, TypeId data_type, bool channel_first, TypeId dst_data_type) { | |||
| MS_ASSERT(tensor != nullptr); | |||
| Tensor *restore_tensor = nullptr; | |||
| if (!tensor->IsConst() || !(data_type == TypeId::kNumberTypeFloat32 || data_type == TypeId::kNumberTypeFloat16)) { | |||
| return nullptr; | |||
| } | |||
| auto restore_type = tensor->data_type(); | |||
| bool need_dequant = !tensor->quant_params().empty() && tensor->quant_params().front().inited && | |||
| (restore_type == kNumberTypeInt8 || restore_type == kNumberTypeInt16); | |||
| if (!need_dequant) { | |||
| return nullptr; | |||
| } | |||
| restore_tensor = Tensor::CopyTensor(*tensor, false); | |||
| restore_tensor->set_data(tensor->data_c()); | |||
| restore_tensor->set_own_data(tensor->own_data()); | |||
| auto ret = DequantUtil::DequantWeight(tensor, channel_first, dst_data_type); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Dequant data failed: " << ret; | |||
| return nullptr; | |||
| } | |||
| return restore_tensor; | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -29,19 +29,16 @@ | |||
| namespace mindspore::lite { | |||
| class DequantUtil { | |||
| public: | |||
| static float *DequantWeight(lite::Tensor *input_tensor, bool); | |||
| static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor); | |||
| static int UnPackToInt(const schema::Tensor *input_tensor, void *weight_unpack_data); | |||
| static int DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor); | |||
| static std::map<Tensor *, std::pair<TypeId, void *>> DequantTensor(OpParameter *op_param, | |||
| const std::vector<Tensor *> &in_tensors, | |||
| TypeId data_type, bool need_restore = true); | |||
| static void RestoreTensorData(const std::map<Tensor *, std::pair<TypeId, void *>> &tensor_origin_data_map); | |||
| static Tensor *DequantTensor(Tensor *tensor, TypeId data_type, bool channel_first = true, | |||
| TypeId dst_data_type = kNumberTypeFloat32); | |||
| template <typename ST, typename DT = float> | |||
| static DT *DequantData(lite::Tensor *input_tensor, bool channel_first = true) { | |||
| const auto *quant_datas = static_cast<const ST *>(input_tensor->MutableData()); | |||
| const auto *quant_datas = static_cast<const ST *>(input_tensor->data_c()); | |||
| if (quant_datas == nullptr) { | |||
| MS_LOG(ERROR) << "Get quant tensor failed."; | |||
| return nullptr; | |||
| @@ -138,6 +135,8 @@ class DequantUtil { | |||
| } | |||
| private: | |||
| static int DequantWeight(lite::Tensor *input_tensor, bool channel_first, TypeId dst_data_type = kNumberTypeFloat32); | |||
| template <typename T1, typename T2> | |||
| static void UnPackData(int origin_bit, const T2 &packed_data, std::queue<bool> *unpack_bit_data, void *unpack_int, | |||
| size_t *count, bool is_last) { | |||
| @@ -15,10 +15,10 @@ | |||
| */ | |||
| #include "src/huffman_decode.h" | |||
| #include <queue> | |||
| namespace mindspore { | |||
| namespace lite { | |||
| STATUS HuffmanDecode::DoHuffmanDecode(const std::string &input_str, void *decoded_data) { | |||
| if (decoded_data == nullptr) { | |||
| MS_LOG(ERROR) << "decoded_data is nullptr."; | |||
| @@ -26,8 +26,7 @@ STATUS HuffmanDecode::DoHuffmanDecode(const std::string &input_str, void *decode | |||
| } | |||
| int status; | |||
| std::string huffman_decoded_str = ""; | |||
| std::string huffman_decoded_str; | |||
| auto key_pos = input_str.find_first_of('#'); | |||
| auto code_pos = input_str.find_first_of('#', key_pos + 1); | |||
| auto key = input_str.substr(0, key_pos); | |||
| @@ -60,7 +59,7 @@ STATUS HuffmanDecode::DoHuffmanDecode(const std::string &input_str, void *decode | |||
| size_t len = huffman_decoded_str.length(); | |||
| memcpy(decoded_data, huffman_decoded_str.c_str(), len); | |||
| delete root; | |||
| FreeHuffmanNodeTree(root); | |||
| return RET_OK; | |||
| } | |||
| @@ -91,7 +90,6 @@ STATUS HuffmanDecode::RebuildHuffmanTree(std::string keys, std::string codes, co | |||
| MS_LOG(ERROR) << "new HuffmanNode failed."; | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| this->huffman_nodes_.push_back(new_node); | |||
| new_node->left = nullptr; | |||
| new_node->right = nullptr; | |||
| new_node->parent = cur_node; | |||
| @@ -157,11 +155,23 @@ STATUS HuffmanDecode::DoHuffmanDecompress(HuffmanNodePtr root, std::string encod | |||
| return RET_OK; | |||
| } | |||
| HuffmanDecode::~HuffmanDecode() { | |||
| for (auto &node : this->huffman_nodes_) { | |||
| delete node; | |||
| void HuffmanDecode::FreeHuffmanNodeTree(HuffmanNodePtr root) { | |||
| if (root == nullptr) { | |||
| return; | |||
| } | |||
| std::queue<HuffmanNodePtr> node_queue; | |||
| node_queue.push(root); | |||
| while (!node_queue.empty()) { | |||
| auto cur_node = node_queue.front(); | |||
| node_queue.pop(); | |||
| if (cur_node->left != nullptr) { | |||
| node_queue.push(cur_node->left); | |||
| } | |||
| if (cur_node->right != nullptr) { | |||
| node_queue.push(cur_node->right); | |||
| } | |||
| delete (cur_node); | |||
| } | |||
| this->huffman_nodes_.resize(0); | |||
| } | |||
| } // namespace lite | |||
| @@ -27,7 +27,6 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| const int PSEUDO_EOF = 128; | |||
| struct HuffmanNode { | |||
| @@ -36,23 +35,25 @@ struct HuffmanNode { | |||
| std::string code; | |||
| HuffmanNode *left, *right, *parent; | |||
| }; | |||
| using HuffmanNodePtr = HuffmanNode *; | |||
| class HuffmanDecode { | |||
| public: | |||
| HuffmanDecode() = default; | |||
| ~HuffmanDecode(); | |||
| virtual ~HuffmanDecode() = default; | |||
| STATUS DoHuffmanDecode(const std::string &input_str, void *decoded_data); | |||
| static STATUS DoHuffmanDecode(const std::string &input_str, void *decoded_data); | |||
| private: | |||
| std::vector<HuffmanNodePtr> huffman_nodes_; | |||
| STATUS RebuildHuffmanTree(std::string key, std::string code, const HuffmanNodePtr &root); | |||
| HuffmanDecode() = default; | |||
| static void FreeHuffmanNodeTree(HuffmanNodePtr root); | |||
| static STATUS RebuildHuffmanTree(std::string key, std::string code, const HuffmanNodePtr &root); | |||
| STATUS DoHuffmanDecompress(HuffmanNodePtr root, std::string encoded_data, std::string *decoded_str); | |||
| static STATUS DoHuffmanDecompress(HuffmanNodePtr root, std::string encoded_data, std::string *decoded_str); | |||
| std::vector<std::string> Str2Vec(std::string s) { | |||
| static std::vector<std::string> Str2Vec(std::string s) { | |||
| size_t i = 0; | |||
| std::vector<std::string> vec; | |||
| while (i < s.length()) { | |||
| @@ -17,7 +17,6 @@ | |||
| #include "include/errorcode.h" | |||
| #include "src/ops/populate/populate_register.h" | |||
| #include "src/common/version_manager.h" | |||
| #include "src/common/prim_util.h" | |||
| #include "nnacl/pooling_parameter.h" | |||
| #include "src/reg_kernels.h" | |||
| #ifdef ENABLE_ARM64 | |||
| @@ -120,21 +119,24 @@ KernelRegistry::~KernelRegistry() { | |||
| } | |||
| } | |||
| int KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const InnerContext *ctx, const kernel::KernelKey &key, OpParameter *parameter, | |||
| kernel::LiteKernel **kernel) { | |||
| bool KernelRegistry::SupportKernel(const KernelKey &key) { | |||
| auto kernel_creator = GetCreator(key); | |||
| return kernel_creator != nullptr; | |||
| } | |||
| kernel::LiteKernel *KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, const InnerContext *ctx, | |||
| const kernel::KernelKey &key, OpParameter *parameter) { | |||
| MS_ASSERT(ctx != nullptr); | |||
| MS_ASSERT(kernel != nullptr); | |||
| auto creator = GetCreator(key); | |||
| if (creator != nullptr) { | |||
| *kernel = creator(in_tensors, out_tensors, parameter, ctx, key); | |||
| if (*kernel != nullptr) { | |||
| (*kernel)->set_desc(key); | |||
| return RET_OK; | |||
| auto kernel = creator(in_tensors, out_tensors, parameter, ctx, key); | |||
| if (kernel != nullptr) { | |||
| kernel->set_desc(key); | |||
| return kernel; | |||
| } | |||
| return RET_ERROR; | |||
| } | |||
| return RET_NOT_SUPPORT; | |||
| return nullptr; | |||
| } | |||
| #ifdef MS_COMPILE_IOS | |||
| @@ -37,7 +37,6 @@ class KernelRegistry { | |||
| static KernelRegistry *GetInstance(); | |||
| static int Init(); | |||
| virtual kernel::KernelCreator GetCreator(const kernel::KernelKey &desc); | |||
| const kernel::KernelCreator *GetCreatorArrays(); | |||
| int GetCreatorFuncIndex(kernel::KernelKey desc); | |||
| void RegKernel(kernel::KernelKey desc, kernel::KernelCreator creator); | |||
| void RegKernel(kernel::KERNEL_ARCH arch, TypeId data_type, int type, kernel::KernelCreator creator); | |||
| @@ -45,6 +44,9 @@ class KernelRegistry { | |||
| int GetKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const InnerContext *ctx, const kernel::KernelKey &key, OpParameter *op_parameter, | |||
| kernel::LiteKernel **kernel); | |||
| bool SupportKernel(const kernel::KernelKey &key); | |||
| kernel::LiteKernel *GetKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const InnerContext *ctx, const kernel::KernelKey &key, OpParameter *op_parameter); | |||
| #ifdef MS_COMPILE_IOS | |||
| void RegisterAllKernels(); | |||
| #endif | |||
| @@ -42,20 +42,38 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| // this method will not check whether tensor_idx is a weight tensor index, caller should ensure this. | |||
| static bool WeightTensorNeedCopy(const lite::Model *model, const uint32_t tensor_idx) { | |||
| #ifdef SUPPORT_TRAIN | |||
| return false; | |||
| #endif | |||
| MS_ASSERT(model != nullptr); | |||
| auto post_node_idxes = GetLinkedPostNodeIdx(model, tensor_idx); | |||
| return std::none_of(post_node_idxes.begin(), post_node_idxes.end(), [&](const size_t &post_node_idx) { | |||
| auto node = model->all_nodes_[post_node_idx]; | |||
| MS_ASSERT(node != nullptr); | |||
| return IsPackedOp(GetPrimitiveType(node->primitive_)); | |||
| }); | |||
| namespace { | |||
| int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) { | |||
| MS_ASSERT(dst_tensor != nullptr); | |||
| bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 && | |||
| src_tensor.quantParams()->Get(0) != nullptr && src_tensor.quantParams()->Get(0)->inited(); | |||
| if (need_bit_unpack) { | |||
| auto num_bits = src_tensor.quantParams()->Get(0)->numBits(); | |||
| need_bit_unpack = ((num_bits > 0 && num_bits < 8) || (num_bits > 8 && num_bits < 16)); | |||
| } | |||
| if (!src_tensor.enableHuffmanCode() && !need_bit_unpack) { | |||
| return RET_NO_CHANGE; | |||
| } | |||
| // huffman code and bit pack are not assumed to be performed at same time | |||
| STATUS ret = RET_ERROR; | |||
| if (src_tensor.enableHuffmanCode()) { | |||
| ret = DequantUtil::DecodeHuffmanCode(src_tensor, dst_tensor); | |||
| if (ret != RET_OK && ret != RET_NO_CHANGE) { | |||
| MS_LOG(ERROR) << "Decode huffman code failed: " << ret; | |||
| return ret; | |||
| } | |||
| } else if (need_bit_unpack) { | |||
| ret = DequantUtil::UnPackToInt(src_tensor, dst_tensor); | |||
| if (ret != RET_OK && ret != RET_NO_CHANGE) { | |||
| MS_LOG(ERROR) << "Unpack to int8 failed: " << ret; | |||
| return ret; | |||
| } | |||
| } else { | |||
| ret = RET_OK; | |||
| } | |||
| return ret; | |||
| } | |||
| } // namespace | |||
| LiteSession::LiteSession() { this->is_running_.store(false); } | |||
| @@ -78,7 +96,6 @@ void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lit | |||
| dst_tensor->AddQuantParam(quant_arg); | |||
| } | |||
| } | |||
| dst_tensor->set_enable_huffman_code(src_tensor->enableHuffmanCode()); | |||
| auto quant_clusters = src_tensor->quantClusters(); | |||
| if (quant_clusters != nullptr) { | |||
| std::vector<float> clusters; | |||
| @@ -93,57 +110,23 @@ int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_inde | |||
| lite::Tensor *dst_tensor) { | |||
| MS_ASSERT(src_tensor != nullptr); | |||
| MS_ASSERT(dst_tensor != nullptr); | |||
| auto NeedUnPack = [&src_tensor, &dst_tensor]() -> bool { | |||
| auto data_type = src_tensor->dataType(); | |||
| int pack_size = src_tensor->data()->size(); | |||
| int org_size = dst_tensor->Size(); | |||
| return (pack_size != org_size) && (data_type == kNumberTypeInt8 || data_type == kNumberTypeInt16); | |||
| }; | |||
| auto src_category = TensorCategory(src_tensor); | |||
| if ((src_category == Tensor::Category::CONST_TENSOR || src_category == Tensor::Category::CONST_SCALAR) && | |||
| src_tensor->data() != nullptr && src_tensor->data()->size() > 0) { | |||
| if (src_tensor->dataType() == kObjectTypeTensorType) { | |||
| auto tensor_list = reinterpret_cast<TensorList *>(dst_tensor); | |||
| if (src_tensor->data() == nullptr) { | |||
| MS_LOG(ERROR) << "src_tensor->data() is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| if (tensor_list->Decode(reinterpret_cast<const int *>(src_tensor->data()->data())) != RET_OK) { | |||
| MS_LOG(ERROR) << "Decode tensorlist data failed"; | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| if (WeightTensorNeedCopy(model, tensor_index)) { | |||
| auto dst_data = dst_tensor->MutableData(); | |||
| if (dst_data == nullptr) { | |||
| MS_LOG(ERROR) << "Data from tensor is nullptr"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| if (NeedUnPack()) { | |||
| auto ret = DequantUtil::UnPackToInt(src_tensor, dst_data); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "unpack to int failed."; | |||
| return RET_NULL_PTR; | |||
| } | |||
| } else { | |||
| memcpy(dst_data, src_tensor->data()->data(), dst_tensor->Size()); | |||
| } | |||
| copyed_tensor_idxes_.emplace_back(tensor_index); | |||
| } else { | |||
| if (NeedUnPack()) { | |||
| auto dst_data = dst_tensor->MutableData(); | |||
| if (dst_data == nullptr) { | |||
| MS_LOG(ERROR) << "Data from tensor is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| auto ret = DequantUtil::UnPackToInt(src_tensor, dst_data); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "unpack to int failed."; | |||
| return RET_ERROR; | |||
| } | |||
| copyed_tensor_idxes_.emplace_back(tensor_index); | |||
| } else { | |||
| dst_tensor->set_data(const_cast<unsigned char *>(src_tensor->data()->data())); | |||
| } | |||
| auto ret = DecompressTensor(*src_tensor, dst_tensor); | |||
| if (ret == RET_NO_CHANGE) { | |||
| dst_tensor->set_data(const_cast<unsigned char *>(src_tensor->data()->data())); | |||
| dst_tensor->set_own_data(false); | |||
| } else if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Decompress tensor data failed: " << ret; | |||
| return ret; | |||
| } | |||
| } | |||
| } | |||
| @@ -176,7 +159,6 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) { | |||
| int LiteSession::ConvertTensors(const lite::Model *model) { | |||
| MS_ASSERT(model != nullptr); | |||
| copyed_tensor_idxes_.clear(); | |||
| uint32_t tensor_count = model->all_tensors_.size(); | |||
| MS_ASSERT(!model->sub_graphs_.empty()); | |||
| auto model_input_indices = model->sub_graphs_.front()->input_indices_; | |||
| @@ -582,11 +564,11 @@ LiteSession::~LiteSession() { | |||
| for (auto *kernel : kernels_) { | |||
| delete kernel; | |||
| } | |||
| for (size_t i = 0; i < tensors_.size(); i++) { | |||
| auto *tensor = tensors_.at(i); | |||
| for (auto tensor : tensors_) { | |||
| MS_ASSERT(tensor != nullptr); | |||
| // data of weight tensor of node in packed_op can not be to free, we will free weight data when freeing meta_graph | |||
| if (tensor->IsConst() && !IsContain(this->inputs_, tensor) && !IsContain(copyed_tensor_idxes_, i)) { | |||
| // Data of const tensor which doesn't own data will not freed. | |||
| // Such as const data from meta_graph which will be freed when freeing meta_graph. | |||
| if (tensor->IsConst() && !tensor->own_data()) { | |||
| tensor->set_data(nullptr); | |||
| } | |||
| delete tensor; | |||
| @@ -115,7 +115,6 @@ class LiteSession : public session::LiteSession { | |||
| InnerContext *context_ = nullptr; | |||
| std::vector<kernel::LiteKernel *> kernels_; | |||
| std::vector<Tensor *> tensors_; | |||
| std::vector<size_t> copyed_tensor_idxes_; | |||
| // graph input tensors | |||
| std::vector<Tensor *> inputs_; | |||
| // graph output tensors | |||
| @@ -77,14 +77,12 @@ int CarryDataKernel::MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_ | |||
| } else { | |||
| dst_tensor->FreeData(); | |||
| dst_tensor->set_data(src_tensor->data_c()); | |||
| dst_tensor->set_own_data(true); | |||
| src_tensor->set_data(nullptr); | |||
| src_tensor->set_own_data(true); | |||
| } | |||
| } else { | |||
| auto ret = dst_tensor->set_root_tensor(src_tensor->root_tensor()); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set root tensor for tensor(" << dst_tensor->tensor_name() << ") failed"; | |||
| return ret; | |||
| } | |||
| dst_tensor->set_root_tensor(src_tensor->root_tensor()); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -121,11 +119,7 @@ int CarryDataKernel::MoveTensorListData(lite::TensorList *dst_tensor, lite::Tens | |||
| src_tensor->set_tensors({}); | |||
| } else { | |||
| dst_tensor->set_shape(src_tensor->shape()); | |||
| auto ret = dst_tensor->set_root_tensor(src_tensor->root_tensor()); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set root tensor for tensor(" << dst_tensor->tensor_name() << ") failed"; | |||
| return ret; | |||
| } | |||
| dst_tensor->set_root_tensor(src_tensor->root_tensor()); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -63,16 +63,8 @@ int MergeCPUKernel::Init() { | |||
| MS_ASSERT(in_tensors_[i] != nullptr); | |||
| MS_ASSERT(in_tensors_[i + stride] != nullptr); | |||
| if (in_tensors_[i] == in_tensors_[i + stride]) { | |||
| auto ret = in_tensors_[i]->set_root_tensor(in_tensors_[i]); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set root tensor for tensor(" << in_tensors_[i]->tensor_name() << ") failed"; | |||
| return ret; | |||
| } | |||
| ret = in_tensors_[i + stride]->set_root_tensor(in_tensors_[i + stride]); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set root tensor for tensor(" << in_tensors_[i + stride]->tensor_name() << ") failed"; | |||
| return ret; | |||
| } | |||
| in_tensors_[i]->set_root_tensor(in_tensors_[i]); | |||
| in_tensors_[i + stride]->set_root_tensor(in_tensors_[i + stride]); | |||
| } | |||
| } | |||
| return RET_OK; | |||
| @@ -25,10 +25,11 @@ extern "C" { | |||
| extern void Float32ToFloat16(const float *input, float16_t *output, int number); | |||
| extern void Float16ToFloat32(const float16_t *input, float *output, int number); | |||
| void Float32ToFloat16_fp16_handler(const void *input, void *output, int number) { | |||
| inline void Float32ToFloat16_fp16_handler(const void *input, void *output, int number) { | |||
| Float32ToFloat16(reinterpret_cast<const float *>(input), reinterpret_cast<float16_t *>(output), number); | |||
| } | |||
| void Float16ToFloat32_fp16_handler(const void *input, void *output, int number) { | |||
| inline void Float16ToFloat32_fp16_handler(const void *input, void *output, int number) { | |||
| Float16ToFloat32(reinterpret_cast<const float16_t *>(input), reinterpret_cast<float *>(output), number); | |||
| } | |||
| #endif | |||
| @@ -71,13 +71,20 @@ int MatmulBaseFP16CPUKernel::InitBias() { | |||
| if (in_tensors_.size() == 3) { | |||
| auto bias_tensor = in_tensors_[2]; | |||
| int max_bias_data = UP_ROUND(bias_tensor->ElementsNum(), C8NUM); | |||
| bias_ptr_ = reinterpret_cast<float16_t *>(malloc(max_bias_data * sizeof(float))); | |||
| bias_ptr_ = reinterpret_cast<float16_t *>(malloc(max_bias_data * sizeof(float16_t))); | |||
| if (bias_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc bias_ptr_ failed"; | |||
| return RET_ERROR; | |||
| } | |||
| memset(bias_ptr_, 0, max_bias_data * sizeof(float16_t)); | |||
| Float32ToFloat16(reinterpret_cast<float *>(in_tensors_[2]->data_c()), bias_ptr_, bias_tensor->ElementsNum()); | |||
| if (in_tensors_[2]->data_type() == kNumberTypeFloat32) { | |||
| Float32ToFloat16(reinterpret_cast<float *>(in_tensors_[2]->data_c()), bias_ptr_, bias_tensor->ElementsNum()); | |||
| } else if (in_tensors_[2]->data_type() == kNumberTypeFloat16) { | |||
| memcpy(bias_ptr_, in_tensors_[2]->data_c(), max_bias_data * sizeof(float16_t)); | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported bias data type : " << in_tensors_[2]->data_type(); | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -31,6 +31,7 @@ | |||
| #include "src/common/prim_util.h" | |||
| #include "src/runtime/infer_manager.h" | |||
| #include "src/dequant.h" | |||
| #include "nnacl/matmul_parameter.h" | |||
| #if GPU_OPENCL | |||
| #include "src/runtime/kernel/opencl/opencl_subgraph.h" | |||
| #include "src/runtime/gpu/opencl/opencl_runtime.h" | |||
| @@ -43,6 +44,10 @@ | |||
| #include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h" | |||
| #include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h" | |||
| #endif | |||
| #if defined(ENABLE_ARM64) && defined(ENABLE_FP16) | |||
| #include "src/runtime/kernel/arm/fp16/fp16_op_handler.h" | |||
| #endif | |||
| namespace mindspore::lite { | |||
| using kernel::KERNEL_ARCH::kCPU; | |||
| using kernel::KERNEL_ARCH::kGPU; | |||
| @@ -198,46 +203,168 @@ int Scheduler::InferSubGraphShape(size_t subgraph_index, bool *infer_shape_inter | |||
| return RET_OK; | |||
| } | |||
| namespace { | |||
| #ifndef SUPPORT_TRAIN | |||
| int CopyConstTensor(Tensor *tensor, std::map<Tensor *, Tensor *> *restored_origin_tensors, TypeId dst_data_type) { | |||
| MS_ASSERT(restored_origin_tensors != nullptr); | |||
| MS_ASSERT(tensor != nullptr); | |||
| if (dst_data_type != kNumberTypeFloat32 && dst_data_type != kNumberTypeFloat16) { | |||
| MS_LOG(ERROR) << "Only support fp32 or fp16 as dst_data_type."; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| // tensorlist not support fp16 now | |||
| if (!tensor->IsConst() || tensor->data_type() == kObjectTypeTensorType) { | |||
| return RET_OK; | |||
| } | |||
| auto origin_data = tensor->data_c(); | |||
| MS_ASSERT(origin_data != nullptr); | |||
| if (tensor->data_type() == kNumberTypeFloat32 && dst_data_type == kNumberTypeFloat16) { | |||
| #if defined(ENABLE_ARM64) && defined(ENABLE_FP16) | |||
| auto restore_tensor = Tensor::CopyTensor(*tensor, false); | |||
| restore_tensor->set_data(origin_data); | |||
| restore_tensor->set_own_data(tensor->own_data()); | |||
| tensor->set_data(nullptr); | |||
| tensor->set_data_type(kNumberTypeFloat16); | |||
| auto ret = tensor->MallocData(); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "malloc data failed"; | |||
| return ret; | |||
| } | |||
| auto new_tensor_data = tensor->data_c(); | |||
| MS_ASSERT(new_tensor_data != nullptr); | |||
| Float32ToFloat16_fp16_handler(origin_data, new_tensor_data, tensor->ElementsNum()); | |||
| (*restored_origin_tensors)[tensor] = restore_tensor; | |||
| #else | |||
| MS_LOG(ERROR) << "Unsupported dst data type: float16"; | |||
| return RET_ERROR; | |||
| #endif | |||
| } else { | |||
| tensor->set_data(nullptr); | |||
| auto ret = tensor->MallocData(); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "malloc data failed"; | |||
| return ret; | |||
| } | |||
| auto new_data = tensor->data_c(); | |||
| MS_ASSERT(new_data != nullptr); | |||
| memcpy(new_data, origin_data, tensor->Size()); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| #endif | |||
| inline void RestoreTensorData(const std::map<Tensor *, Tensor *> &restored_origin_tensors) { | |||
| for (auto &restored_origin_tensor : restored_origin_tensors) { | |||
| auto *origin_tensor = restored_origin_tensor.first; | |||
| auto *restored_tensor = restored_origin_tensor.second; | |||
| MS_ASSERT(origin_tensor != nullptr); | |||
| MS_ASSERT(restored_tensor != nullptr); | |||
| origin_tensor->FreeData(); | |||
| origin_tensor->set_data_type(restored_tensor->data_type()); | |||
| origin_tensor->set_data(restored_tensor->data_c()); | |||
| origin_tensor->set_own_data(restored_tensor->own_data()); | |||
| } | |||
| } | |||
| inline void FreeRestoreTensors(std::map<Tensor *, Tensor *> *restored_origin_tensors) { | |||
| MS_ASSERT(restored_origin_tensors != nullptr); | |||
| for (auto &restored_origin_tensor : *restored_origin_tensors) { | |||
| restored_origin_tensor.second->set_data(nullptr); | |||
| delete (restored_origin_tensor.second); | |||
| } | |||
| restored_origin_tensors->clear(); | |||
| } | |||
| inline bool IsChannelFirst(const std::vector<Tensor *> &in_tensors, OpParameter *op_parameter) { | |||
| MS_ASSERT(op_parameter != nullptr); | |||
| if (op_parameter->type_ == schema::PrimitiveType_MatMul) { | |||
| for (size_t i = 0; i < in_tensors.size(); i++) { | |||
| auto tensor = in_tensors.at(i); | |||
| MS_ASSERT(tensor != nullptr); | |||
| if (tensor->shape().size() != 2) { | |||
| continue; | |||
| } | |||
| const auto *param = reinterpret_cast<MatMulParameter *>(op_parameter); | |||
| if (i == 1) { | |||
| return !(param->a_transpose_); | |||
| } else if (i == 2) { | |||
| return param->b_transpose_; | |||
| } else { | |||
| // not care bias data | |||
| } | |||
| } | |||
| } | |||
| return true; | |||
| } | |||
| } // namespace | |||
| kernel::LiteKernel *Scheduler::FindCpuKernel(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, OpParameter *op_parameter, | |||
| const kernel::KernelKey &desc, TypeId kernel_data_type) { | |||
| MS_ASSERT(op_parameter != nullptr); | |||
| auto op_type = op_parameter->type_; | |||
| if (!KernelRegistry::GetInstance()->SupportKernel(desc)) { | |||
| return nullptr; | |||
| } | |||
| std::map<Tensor *, Tensor *> restored_origin_tensors; | |||
| for (auto &tensor : in_tensors) { | |||
| auto channel_first = IsChannelFirst(in_tensors, op_parameter); | |||
| auto *restore_tensor = DequantUtil::DequantTensor(tensor, desc.data_type, channel_first, kernel_data_type); | |||
| if (restore_tensor != nullptr) { | |||
| restored_origin_tensors[tensor] = restore_tensor; | |||
| } else { | |||
| #ifndef SUPPORT_TRAIN | |||
| if (!IsPackedOp(op_type) && !tensor->own_data()) { // && op_type != schema::PrimitiveType_LSTM | |||
| auto ret = CopyConstTensor(tensor, &restored_origin_tensors, kernel_data_type); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(DEBUG) << "CopyConstTensor failed: " << ret; | |||
| return nullptr; | |||
| } | |||
| } | |||
| #endif | |||
| } | |||
| } | |||
| auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, desc, op_parameter); | |||
| if (kernel != nullptr) { | |||
| MS_LOG(DEBUG) << "Get TypeId(" << kernel_data_type << ") op success: " << PrimitiveTypeName(op_type); | |||
| FreeRestoreTensors(&restored_origin_tensors); | |||
| } else { | |||
| RestoreTensorData(restored_origin_tensors); | |||
| } | |||
| return kernel; | |||
| } | |||
| kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, const Model::Node *node, | |||
| TypeId prefer_data_type) { | |||
| kernel::LiteKernel *kernel = nullptr; | |||
| TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors); | |||
| MS_ASSERT(node != nullptr); | |||
| bool need_dequant = node->quant_type_ == schema::QuantType_WeightQuant; | |||
| TypeId data_type = need_dequant ? kNumberTypeFloat32 : GetFirstFp32Fp16OrInt8Type(in_tensors); | |||
| OpParameter *op_parameter = op_parameters_[node->output_indices_.at(0)]; | |||
| if (op_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Can not find OpParameter!type: " << PrimitiveTypeName(GetPrimitiveType(node->primitive_)); | |||
| return nullptr; | |||
| } | |||
| bool infer_shape_interrupt = !op_parameter->infer_flag_; | |||
| bool need_restore = true; | |||
| if (node->quant_type_ == schema::QuantType_WeightQuant) { | |||
| data_type = kNumberTypeFloat32; | |||
| } | |||
| if (!IsPackedOp(op_parameter->type_)) { | |||
| need_restore = false; | |||
| } | |||
| kernel::KernelKey desc{kCPU, data_type, static_cast<schema::PrimitiveType>(op_parameter->type_)}; | |||
| #if SUPPORT_GPU | |||
| if (context_->IsGpuEnabled()) { | |||
| // support more data type like int32 | |||
| kernel::KernelKey gpu_desc{kGPU, kNumberTypeFloat32, desc.type}; | |||
| if (context_->IsGpuFloat16Enabled()) gpu_desc.data_type = kNumberTypeFloat16; | |||
| auto ret = | |||
| KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, gpu_desc, op_parameter, &kernel); | |||
| if (ret == RET_OK) { | |||
| auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, gpu_desc, op_parameter); | |||
| if (kernel != nullptr) { | |||
| MS_LOG(DEBUG) << "Get gpu op success: " << PrimitiveCurVersionTypeName(gpu_desc.type) << " " << node->name_; | |||
| return kernel; | |||
| } else { | |||
| MS_LOG(DEBUG) << "Get gpu op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(gpu_desc.type) << " " | |||
| << node->name_; | |||
| if (ret == RET_ERROR) { | |||
| ret = InferNodeShape(node, &infer_shape_interrupt); | |||
| if (ret == RET_INFER_INVALID || ret == RET_OK) { | |||
| op_parameter = op_parameters_[node->output_indices_.at(0)]; | |||
| } else { | |||
| MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; | |||
| return nullptr; | |||
| } | |||
| auto ret = InferNodeShape(node, &infer_shape_interrupt); | |||
| if (ret == RET_INFER_INVALID || ret == RET_OK) { | |||
| op_parameter = op_parameters_[node->output_indices_.at(0)]; | |||
| } else { | |||
| MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; | |||
| return nullptr; | |||
| } | |||
| } | |||
| } | |||
| @@ -253,22 +380,19 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in | |||
| } | |||
| } | |||
| kernel::KernelKey npu_desc{kNPU, desc.data_type, desc.type}; | |||
| auto ret = | |||
| KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, npu_desc, op_parameter, &kernel); | |||
| if (ret == RET_OK) { | |||
| auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, npu_desc, op_parameter); | |||
| if (kernel != nullptr) { | |||
| MS_LOG(DEBUG) << "Get npu op success: " << PrimitiveCurVersionTypeName(npu_desc.type) << " " << node->name_; | |||
| return kernel; | |||
| } else { | |||
| MS_LOG(DEBUG) << "Get npu op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(npu_desc.type) << " " | |||
| << node->name_; | |||
| if (ret == RET_ERROR) { | |||
| ret = InferNodeShape(node, &infer_shape_interrupt); | |||
| if (ret == RET_INFER_INVALID || ret == RET_OK) { | |||
| op_parameter = op_parameters_[node->output_indices_.at(0)]; | |||
| } else { | |||
| MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; | |||
| return nullptr; | |||
| } | |||
| auto ret = InferNodeShape(node, &infer_shape_interrupt); | |||
| if (ret == RET_INFER_INVALID || ret == RET_OK) { | |||
| op_parameter = op_parameters_[node->output_indices_.at(0)]; | |||
| } else { | |||
| MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; | |||
| return nullptr; | |||
| } | |||
| } | |||
| } | |||
| @@ -277,25 +401,18 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in | |||
| mindspore::lite::IsSupportFloat16() && | |||
| ((context_->IsCpuFloat16Enabled() && data_type == kNumberTypeFloat32) || data_type == kNumberTypeFloat16)) { | |||
| kernel::KernelKey fp16_cpu_desc{desc.arch, kNumberTypeFloat16, desc.type}; | |||
| auto tensor_origin_data_map = | |||
| DequantUtil::DequantTensor(op_parameter, in_tensors, fp16_cpu_desc.data_type, need_restore); | |||
| auto ret = | |||
| KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, fp16_cpu_desc, op_parameter, &kernel); | |||
| DequantUtil::RestoreTensorData(tensor_origin_data_map); | |||
| if (ret == RET_OK) { | |||
| MS_LOG(DEBUG) << "Get fp16 op success: " << PrimitiveCurVersionTypeName(fp16_cpu_desc.type) << " " << node->name_; | |||
| auto kernel = FindCpuKernel(in_tensors, out_tensors, op_parameter, fp16_cpu_desc, kNumberTypeFloat16); | |||
| if (kernel != nullptr) { | |||
| return kernel; | |||
| } else { | |||
| MS_LOG(DEBUG) << "Get fp16 op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(fp16_cpu_desc.type) | |||
| << " " << node->name_; | |||
| if (ret == RET_ERROR) { | |||
| ret = InferNodeShape(node, &infer_shape_interrupt); | |||
| if (ret == RET_INFER_INVALID || ret == RET_OK) { | |||
| op_parameter = op_parameters_[node->output_indices_.at(0)]; | |||
| } else { | |||
| MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; | |||
| return nullptr; | |||
| } | |||
| auto ret = InferNodeShape(node, &infer_shape_interrupt); | |||
| if (ret == RET_INFER_INVALID || ret == RET_OK) { | |||
| op_parameter = op_parameters_[node->output_indices_.at(0)]; | |||
| } else { | |||
| MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; | |||
| return nullptr; | |||
| } | |||
| } | |||
| } | |||
| @@ -304,20 +421,20 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in | |||
| desc.data_type = kNumberTypeFloat32; | |||
| } | |||
| if (prefer_data_type == kNumberTypeFloat32 || prefer_data_type == kTypeUnknown) { | |||
| auto tensor_origin_data_map = DequantUtil::DequantTensor(op_parameter, in_tensors, desc.data_type, need_restore); | |||
| auto ret = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, desc, op_parameter, &kernel); | |||
| DequantUtil::RestoreTensorData(tensor_origin_data_map); | |||
| if (ret == RET_OK) { | |||
| auto kernel = FindCpuKernel(in_tensors, out_tensors, op_parameter, desc, kNumberTypeFloat32); | |||
| if (kernel != nullptr) { | |||
| return kernel; | |||
| } else if (ret == RET_ERROR) { | |||
| ret = InferNodeShape(node, &infer_shape_interrupt); | |||
| } else { | |||
| auto ret = InferNodeShape(node, &infer_shape_interrupt); | |||
| if (!(ret == RET_INFER_INVALID || ret == RET_OK)) { | |||
| MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; | |||
| MS_LOG(ERROR) | |||
| << "Try repeat infer fail: " << node->name_; | |||
| } | |||
| } | |||
| } | |||
| return nullptr; | |||
| } | |||
| } // namespace mindspore::lite | |||
| kernel::LiteKernel *Scheduler::SchedulePartialToKernel(const lite::Model::Node *src_node) { | |||
| MS_ASSERT(src_model_ != nullptr); | |||
| @@ -59,6 +59,8 @@ class Scheduler { | |||
| kernel::LiteKernel *FindBackendKernel(const std::vector<Tensor *> &in_tensors, | |||
| const std::vector<Tensor *> &out_tensors, const Model::Node *node, | |||
| TypeId prefer_data_type = kTypeUnknown); | |||
| kernel::LiteKernel *FindCpuKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| OpParameter *op_parameter, const kernel::KernelKey &desc, TypeId kernel_data_type); | |||
| // schedule a partial node to a subgraph_kernel | |||
| kernel::LiteKernel *SchedulePartialToKernel(const lite::Model::Node *src_node); | |||
| // schedule a node to a kernel | |||
| @@ -205,7 +205,9 @@ void CpuFp16SubGraph::FreeOriginInputData() { | |||
| } | |||
| int CpuFp16SubGraph::Float32TensorToFloat16Tensor(lite::Tensor *tensor) { | |||
| MS_ASSERT(tensor != nullptr); | |||
| auto float32_data = tensor->data_c(); | |||
| auto own_data = tensor->own_data(); | |||
| if (float32_data == nullptr) { | |||
| MS_LOG(ERROR) << "tensor data is null."; | |||
| return lite::RET_NULL_PTR; | |||
| @@ -215,15 +217,14 @@ int CpuFp16SubGraph::Float32TensorToFloat16Tensor(lite::Tensor *tensor) { | |||
| auto ret = tensor->MallocData(); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "malloc data failed"; | |||
| this->FreeOriginInputData(); | |||
| return RET_ERROR; | |||
| } | |||
| MS_ASSERT(tensor->data_c() != nullptr); | |||
| Float32ToFloat16_fp16_handler(float32_data, tensor->data_c(), tensor->ElementsNum()); | |||
| auto *data_store = DataStore::CreateDataStore(float32_data, tensor->allocator(), this->context_->allocator.get()); | |||
| auto *data_store = | |||
| DataStore::CreateDataStore(float32_data, own_data, tensor->allocator(), this->context_->allocator.get()); | |||
| if (data_store == nullptr) { | |||
| MS_LOG(ERROR) << "Create DataStore failed"; | |||
| this->FreeOriginInputData(); | |||
| return RET_ERROR; | |||
| } | |||
| origin_input_data_[tensor] = data_store; | |||
| @@ -283,6 +284,7 @@ int CpuFp16SubGraph::PreProcess() { | |||
| ret = Float32TensorToFloat16Tensor(real_tensor); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed."; | |||
| this->FreeOriginInputData(); | |||
| return ret; | |||
| } | |||
| } else if (real_tensor->data_type() == kObjectTypeTensorType) { | |||
| @@ -293,6 +295,7 @@ int CpuFp16SubGraph::PreProcess() { | |||
| ret = Float32TensorToFloat16Tensor(inner_tensor); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed."; | |||
| this->FreeOriginInputData(); | |||
| return ret; | |||
| } | |||
| } | |||
| @@ -372,6 +375,7 @@ int CpuFp16SubGraph::PostProcess() { | |||
| real_tensor->FreeData(); | |||
| MS_ASSERT(origin_tensor_data->data_ != nullptr); | |||
| real_tensor->set_data(origin_tensor_data->data_); | |||
| real_tensor->set_own_data(origin_tensor_data->own_data_); | |||
| real_tensor->set_data_type(kNumberTypeFloat32); | |||
| origin_tensor_data->data_ = nullptr; | |||
| tensor_count++; | |||
| @@ -385,6 +389,7 @@ int CpuFp16SubGraph::PostProcess() { | |||
| inner_tensor->FreeData(); | |||
| MS_ASSERT(origin_tensor_data->data_ != nullptr); | |||
| inner_tensor->set_data(origin_tensor_data->data_); | |||
| inner_tensor->set_own_data(origin_tensor_data->own_data_); | |||
| inner_tensor->set_data_type(kNumberTypeFloat32); | |||
| origin_tensor_data->data_ = nullptr; | |||
| tensor_count++; | |||
| @@ -33,9 +33,10 @@ namespace mindspore::kernel { | |||
| // store origin data and allocator of input tensor of subgraph for PreProcess and PostProcess | |||
| struct DataStore { | |||
| void *data_ = nullptr; | |||
| mindspore::Allocator *allocator_ = nullptr; | |||
| static DataStore *CreateDataStore(void *data = nullptr, mindspore::Allocator *data_allocator = nullptr, | |||
| mindspore::Allocator *allocator = nullptr) { | |||
| Allocator *allocator_ = nullptr; | |||
| bool own_data_ = true; | |||
| static DataStore *CreateDataStore(void *data = nullptr, bool own_data = true, Allocator *data_allocator = nullptr, | |||
| Allocator *allocator = nullptr) { | |||
| DataStore *data_store = nullptr; | |||
| if (allocator == nullptr) { | |||
| data_store = static_cast<DataStore *>(malloc(sizeof(DataStore))); | |||
| @@ -47,6 +48,7 @@ struct DataStore { | |||
| return nullptr; | |||
| } | |||
| data_store->data_ = data; | |||
| data_store->own_data_ = own_data; | |||
| data_store->allocator_ = data_allocator; | |||
| return data_store; | |||
| } | |||
| @@ -25,7 +25,7 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| #define kMaxMallocSize 1024 * 1024 * 100 | |||
| #define kMaxMallocSize 1024 * 1024 * 300 | |||
| Tensor::Tensor(const TypeId data_type, std::vector<int> shape, const schema::Format &format, Category category) | |||
| : data_type_(data_type), shape_(std::move(shape)), format_(format), category_(category) {} | |||
| @@ -43,16 +43,9 @@ int Tensor::CopyTensorData(const Tensor &src_tensor, Tensor *dst_tensor) { | |||
| MS_LOG(ERROR) << "Size of dst tensor is not compatible with src tensor"; | |||
| return RET_ERROR; | |||
| } | |||
| if (dst_tensor->data_ == nullptr) { | |||
| if (data_size > kMaxMallocSize) { | |||
| MS_LOG(ERROR) << "Malloc size is too big while coping data, " << data_size << " bytes"; | |||
| return RET_ERROR; | |||
| } | |||
| dst_tensor->data_ = malloc(data_size); | |||
| if (dst_tensor->data_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc memory failed"; | |||
| return RET_ERROR; | |||
| } | |||
| if (dst_tensor->MallocData() != RET_OK) { | |||
| MS_LOG(ERROR) << "Malloc memory failed"; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(dst_tensor->data_, src_tensor.data_, data_size); | |||
| return RET_OK; | |||
| @@ -74,12 +67,13 @@ Tensor *Tensor::CopyTensor(const Tensor &src_tensor, bool copy_data) { | |||
| MS_LOG(ERROR) << "CopyTensorData error"; | |||
| return nullptr; | |||
| } | |||
| result->own_data_ = src_tensor.own_data_; | |||
| } | |||
| return result; | |||
| } | |||
| Tensor::~Tensor() { | |||
| if (nullptr != this->data_) { | |||
| if (nullptr != this->data_ && this->own_data_) { | |||
| if (this->allocator_ != nullptr) { | |||
| this->allocator_->Free(this->data_); | |||
| } else { | |||
| @@ -276,13 +270,13 @@ std::string Tensor::ToString() const { | |||
| return oss.str(); | |||
| } | |||
| int Tensor::set_root_tensor(Tensor *tensor) { | |||
| void Tensor::set_root_tensor(Tensor *tensor) { | |||
| this->root_tensor_ = tensor; | |||
| if (this->root_tensor_ == this) { | |||
| return RET_OK; | |||
| return; | |||
| } | |||
| if (this->root_tensor_ == nullptr) { | |||
| return RET_OK; | |||
| return; | |||
| } | |||
| this->shape_ = this->root_tensor_->shape_; | |||
| this->format_ = this->root_tensor_->format_; | |||
| @@ -290,7 +284,6 @@ int Tensor::set_root_tensor(Tensor *tensor) { | |||
| this->category_ = this->root_tensor_->category_; | |||
| this->quant_params_ = this->root_tensor_->quant_params_; | |||
| this->quant_clusters_ = this->root_tensor_->quant_clusters_; | |||
| return RET_OK; | |||
| } | |||
| int Tensor::MallocData(const mindspore::Allocator *allocator) { | |||
| @@ -300,16 +293,21 @@ int Tensor::MallocData(const mindspore::Allocator *allocator) { | |||
| if (allocator != nullptr) { | |||
| allocator_ = const_cast<mindspore::Allocator *>(allocator); | |||
| } | |||
| auto data_size = this->Size(); | |||
| if (data_size > kMaxMallocSize) { | |||
| MS_LOG(ERROR) << "Malloc size is too big while coping data, " << data_size << " bytes"; | |||
| return RET_ERROR; | |||
| } | |||
| if (allocator_ == nullptr) { | |||
| this->data_ = malloc(this->Size()); | |||
| this->data_ = malloc(data_size); | |||
| } else { | |||
| this->data_ = allocator_->Malloc(this->Size()); | |||
| this->data_ = allocator_->Malloc(data_size); | |||
| } | |||
| if (nullptr == this->data_) { | |||
| MS_LOG(ERROR) << "Malloc tensor data failed, size=" << this->Size(); | |||
| MS_LOG(ERROR) << "Malloc tensor data failed, size=" << data_size; | |||
| return RET_ERROR; | |||
| } | |||
| this->own_data_ = true; | |||
| return RET_OK; | |||
| } | |||
| @@ -317,6 +315,9 @@ void Tensor::FreeData() { | |||
| if (nullptr == this->data_) { | |||
| return; | |||
| } | |||
| if (!this->own_data_) { | |||
| return; | |||
| } | |||
| if (nullptr == allocator_) { | |||
| free(this->data_); | |||
| this->data_ = nullptr; | |||
| @@ -366,10 +367,6 @@ std::vector<float> Tensor::quant_clusters() const { return this->quant_clusters_ | |||
| void Tensor::set_quant_clusters(const std::vector<float> &clusters) { this->quant_clusters_ = clusters; } | |||
| bool Tensor::enable_huffman_code() const { return enable_huffman_code_; } | |||
| void Tensor::set_enable_huffman_code(bool enable_huffman_code) { this->enable_huffman_code_ = enable_huffman_code; } | |||
| std::vector<tensor::MSTensor *> TensorVectorCast(const std::vector<Tensor *> &src) { | |||
| std::vector<tensor::MSTensor *> target(src.size()); | |||
| std::transform(src.begin(), src.end(), target.begin(), [](Tensor *t) { return dynamic_cast<tensor::MSTensor *>(t); }); | |||
| @@ -121,7 +121,10 @@ class Tensor : public mindspore::tensor::MSTensor { | |||
| return data_; | |||
| } | |||
| void set_data(void *data) override { this->data_ = data; } | |||
| void set_data(void *data) override { | |||
| this->data_ = data; | |||
| this->own_data_ = true; | |||
| } | |||
| Category category() const { return this->category_; } | |||
| @@ -153,10 +156,6 @@ class Tensor : public mindspore::tensor::MSTensor { | |||
| void set_quant_clusters(const std::vector<float> &clusters); | |||
| bool enable_huffman_code() const; | |||
| void set_enable_huffman_code(bool enable_huffman_code); | |||
| virtual bool IsConst() const { | |||
| return (this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR) && this->data_ != nullptr; | |||
| } | |||
| @@ -173,7 +172,7 @@ class Tensor : public mindspore::tensor::MSTensor { | |||
| } | |||
| } | |||
| virtual int set_root_tensor(Tensor *tensor); | |||
| virtual void set_root_tensor(Tensor *tensor); | |||
| Tensor *root_tensor() const { return this->root_tensor_; } | |||
| @@ -181,6 +180,10 @@ class Tensor : public mindspore::tensor::MSTensor { | |||
| return this->IsConst() || (this->IsGraphInput() && this->data_ != nullptr) || this->ref_count_ >= 1; | |||
| } | |||
| bool own_data() const { return this->own_data_; } | |||
| void set_own_data(bool own_data) { this->own_data_ = own_data; } | |||
| private: | |||
| template <typename T> | |||
| std::string DataToString(void *data, size_t data_number) const { | |||
| @@ -208,7 +211,7 @@ class Tensor : public mindspore::tensor::MSTensor { | |||
| std::vector<float> quant_clusters_; | |||
| mindspore::Allocator *allocator_ = nullptr; | |||
| Tensor *root_tensor_ = nullptr; | |||
| bool enable_huffman_code_ = false; | |||
| bool own_data_{false}; | |||
| }; | |||
| inline size_t DataTypeSize(const TypeId type) { | |||
| @@ -199,23 +199,15 @@ int TensorList::CheckTensorListParam() { | |||
| return RET_OK; | |||
| } | |||
| int TensorList::set_root_tensor(Tensor *tensor) { | |||
| auto ret = Tensor::set_root_tensor(tensor); | |||
| if (ret != RET_OK) { | |||
| return ret; | |||
| } | |||
| if (this->data_type_ != kObjectTypeTensorType) { | |||
| return RET_OK; | |||
| void TensorList::set_root_tensor(Tensor *tensor) { | |||
| Tensor::set_root_tensor(tensor); | |||
| if (this->data_type_ != kObjectTypeTensorType || tensor == nullptr) { | |||
| return; | |||
| } | |||
| auto root_tensorlist = reinterpret_cast<TensorList *>(this->root_tensor_); | |||
| if (root_tensorlist == nullptr) { | |||
| MS_LOG(ERROR) << "root_tensor of tensorlist should be a tensorlist"; | |||
| return RET_INFER_INVALID; | |||
| } | |||
| this->element_shape_ = root_tensorlist->element_shape_; | |||
| this->max_elements_num_ = root_tensorlist->max_elements_num_; | |||
| this->tensors_data_type_ = root_tensorlist->tensors_data_type_; | |||
| return RET_OK; | |||
| } | |||
| Tensor *TensorList::GetTensor(int index) { | |||
| @@ -109,11 +109,10 @@ class TensorList : public Tensor { | |||
| bool IsConst() const override; | |||
| int set_root_tensor(Tensor *tensor) override; | |||
| void set_root_tensor(Tensor *tensor) override; | |||
| protected: | |||
| // The following functions must be masked. | |||
| void set_data(void *data) override {} | |||
| void *data_c() const override { return nullptr; } | |||
| void *MutableData() override { return nullptr; } | |||
| size_t Size() const override { return 0; } | |||
| @@ -37,7 +37,7 @@ adversarial_pruning.onnx 3 | |||
| residual_distill_res34_cifar10_bs_1_update.onnx 2 | |||
| residual_distill_res50_cifar10_bs_1_update.onnx 2 | |||
| #ml_voice_detect.onnx #out of float16 range because power op | |||
| hdc_ocr_attention.onnx 1 | |||
| hdc_ocr_attention.onnx 1.6 | |||
| hdc_ocr_detect.onnx 30 #one of the output has small values | |||
| ml_edu_kit_hand_detection.onnx 2 | |||
| ml_edu_kit_hand_key_position.onnx 2 | |||