codetags/v1.1.0
| @@ -24,7 +24,6 @@ set(LITE_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/allocator.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_api.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/thread_pool.c | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/workspace_pool.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/executor.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/inner_context.cc | |||
| @@ -18,12 +18,10 @@ | |||
| #include <fcntl.h> | |||
| #include <cstdlib> | |||
| #include <climits> | |||
| #include <cmath> | |||
| #include "securec/include/securec.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| #define MAX_FILENAME_LEN 1024 | |||
| char *ReadFile(const char *file, size_t *size) { | |||
| if (file == nullptr) { | |||
| MS_LOG(ERROR) << "file is nullptr"; | |||
| @@ -48,131 +48,10 @@ uint64_t GetTimeUs() { | |||
| return 0; | |||
| } | |||
| // USECS_IN_SEC *NSECS_IN_USEC; | |||
| uint64_t retval = static_cast<uint64_t>((ts.tv_sec * USEC) + (ts.tv_nsec / MSEC)); | |||
| auto retval = static_cast<uint64_t>((ts.tv_sec * USEC) + (ts.tv_nsec / MSEC)); | |||
| return retval; | |||
| } | |||
| static const unsigned int FP32_BIT_SIZE = 32; | |||
| static const unsigned int FP32_EXPONENT_BIAS = 127; | |||
| static const unsigned int FP32_SIGNIFICAND = 23; | |||
| static const unsigned int FP32_EXPONENT_MAX = 255; | |||
| static const unsigned int FP16_BIT_SIZE = 16; | |||
| static const unsigned int FP16_EXPONENT_BIAS = 15; | |||
| static const unsigned int FP16_SIGNIFICAND = 10; | |||
| static const int FP16_EXPONENT_MAX = 30; | |||
| static const int FP16_EXPONENT_MIN = -10; | |||
| // fp16.c | |||
| float ShortToFloat32(int16_t srcValue) { | |||
| uint16_t expHalf16 = srcValue & 0x7C00; | |||
| int exp1 = static_cast<int>(expHalf16); | |||
| uint16_t mantissa16 = srcValue & 0x03FF; | |||
| int mantissa1 = static_cast<int>(mantissa16); | |||
| int sign = static_cast<int>(srcValue & 0x8000); | |||
| sign = sign << FP16_BIT_SIZE; | |||
| // nan or inf | |||
| if (expHalf16 == 0x7C00) { | |||
| // nan | |||
| if (mantissa16 > 0) { | |||
| int res = (0x7FC00000 | sign); | |||
| int *iRes = &res; | |||
| auto fres = static_cast<float>(*iRes); | |||
| return fres; | |||
| } | |||
| // inf | |||
| int res = (0x7F800000 | sign); | |||
| int *iRes = &res; | |||
| auto fres = static_cast<float>(*iRes); | |||
| return fres; | |||
| } | |||
| if (expHalf16 != 0) { | |||
| exp1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS) << FP16_SIGNIFICAND); // exponents converted to float32 bias | |||
| int res = (exp1 | mantissa1); | |||
| res = res << (FP32_SIGNIFICAND - FP16_SIGNIFICAND); | |||
| res = (res | sign); | |||
| int *iRes = &res; | |||
| auto fres = static_cast<float>(*iRes); | |||
| return fres; | |||
| } | |||
| int xmm1 = exp1 > (1 << FP16_SIGNIFICAND) ? exp1 : (1 << FP16_SIGNIFICAND); | |||
| xmm1 = (xmm1 << (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||
| xmm1 += ((FP32_EXPONENT_BIAS - FP16_EXPONENT_BIAS - FP16_SIGNIFICAND) | |||
| << FP32_SIGNIFICAND); // add the bias difference to xmm1 | |||
| xmm1 = xmm1 | sign; // Combine with the sign mask | |||
| auto res = static_cast<float>(mantissa1); // Convert mantissa to float | |||
| int *ixmm1 = nullptr; | |||
| ixmm1 = &xmm1; | |||
| res *= static_cast<float>(*ixmm1); | |||
| return res; | |||
| } | |||
| // __gnu_f2h_ieee | |||
| int16_t Float32ToShort(float srcValue) { | |||
| float *psrcValue = nullptr; | |||
| psrcValue = &srcValue; | |||
| auto srcValueBit = static_cast<unsigned int>(*psrcValue); | |||
| int sign = srcValueBit >> (FP32_BIT_SIZE - 1); | |||
| int mantissa = srcValueBit & 0x007FFFFF; | |||
| // exponent | |||
| int exp = ((srcValueBit & 0x7F800000) >> FP32_SIGNIFICAND) + FP16_EXPONENT_BIAS - FP32_EXPONENT_BIAS; | |||
| int16_t res; | |||
| if (exp > 0 && exp < FP16_EXPONENT_MAX) { | |||
| // use rte rounding mode, round the significand, combine sign, exponent and significand into a short. | |||
| res = (sign << (FP16_BIT_SIZE - 1)) | (exp << FP16_SIGNIFICAND) | | |||
| ((mantissa + 0x00001000) >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||
| } else if (srcValueBit == 0) { | |||
| res = 0; | |||
| } else { | |||
| if (exp <= 0) { | |||
| if (exp < FP16_EXPONENT_MIN) { | |||
| // value is less than min half float point | |||
| res = 0; | |||
| } else { | |||
| // normalized single, magnitude is less than min normal half float point. | |||
| mantissa = (mantissa | 0x00800000) >> (1 - exp); | |||
| // round to nearest | |||
| if ((mantissa & 0x00001000) > 0) { | |||
| mantissa = mantissa + 0x00002000; | |||
| } | |||
| // combine sign & mantissa (exp is zero to get denormalized number) | |||
| res = (sign << FP16_EXPONENT_BIAS) | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||
| } | |||
| } else if (exp == (FP32_EXPONENT_MAX - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS)) { | |||
| if (mantissa == 0) { | |||
| // input float is infinity, return infinity half | |||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00; | |||
| } else { | |||
| // input float is NaN, return half NaN | |||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00 | (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||
| } | |||
| } else { | |||
| // exp > 0, normalized single, round to nearest | |||
| if ((mantissa & 0x00001000) > 0) { | |||
| mantissa = mantissa + 0x00002000; | |||
| if ((mantissa & 0x00800000) > 0) { | |||
| mantissa = 0; | |||
| exp = exp + 1; | |||
| } | |||
| } | |||
| if (exp > FP16_EXPONENT_MAX) { | |||
| // exponent overflow - return infinity half | |||
| res = (sign << FP16_EXPONENT_BIAS) | 0x7C00; | |||
| } else { | |||
| // combine sign, exp and mantissa into normalized half | |||
| res = (sign << FP16_EXPONENT_BIAS) | (exp << FP16_SIGNIFICAND) | | |||
| (mantissa >> (FP32_SIGNIFICAND - FP16_SIGNIFICAND)); | |||
| } | |||
| } | |||
| } | |||
| return res; | |||
| } | |||
| std::string Remove(const std::string &from, const std::string &subStr, Mode mode) { | |||
| std::string result = from; | |||
| if (mode == PREFIX) { | |||
| @@ -236,22 +115,6 @@ std::vector<std::string> Tokenize(const std::string &src, const std::string &del | |||
| return tokens; | |||
| } | |||
| void ShortToFloat32(const int16_t *srcdata, float *dstdata, size_t elementSize) { | |||
| MS_ASSERT(srcdata != nullptr); | |||
| MS_ASSERT(dstdata != nullptr); | |||
| for (size_t i = 0; i < elementSize; i++) { | |||
| dstdata[i] = ShortToFloat32(srcdata[i]); | |||
| } | |||
| } | |||
| void Float32ToShort(const float *srcdata, int16_t *dstdata, size_t elementSize) { | |||
| MS_ASSERT(srcdata != nullptr); | |||
| MS_ASSERT(dstdata != nullptr); | |||
| for (size_t i = 0; i < elementSize; i++) { | |||
| dstdata[i] = Float32ToShort(srcdata[i]); | |||
| } | |||
| } | |||
| #if defined(__ANDROID__) | |||
| uint32_t getHwCap(int hwcap_type) { | |||
| uint32_t ret = getauxval(hwcap_type); | |||
| @@ -34,15 +34,7 @@ const int USEC = 1000000; | |||
| const int MSEC = 1000; | |||
| std::vector<std::string> StringSplit(std::string str, const std::string &pattern); | |||
| uint64_t GetTimeUs(void); | |||
| int16_t Float32ToShort(float srcValue); | |||
| float ShortToFloat32(int16_t srcValue); | |||
| void ShortToFloat32(const int16_t *srcdata, float *dstdata, size_t elementSize); | |||
| void Float32ToShort(const float *srcdata, int16_t *dstdata, size_t elementSize); | |||
| uint64_t GetTimeUs(); | |||
| bool IsSupportSDot(); | |||
| @@ -75,57 +75,4 @@ int Executor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_ | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int Executor::TransformTensorLayout(Tensor *tensor, schema::Format dst_format, Allocator *allocator) { | |||
| MS_ASSERT(nullptr != tensor); | |||
| MS_ASSERT(nullptr != allocator); | |||
| MS_ASSERT(4 == tensor->shape().size()); | |||
| auto data_type = tensor->data_type(); | |||
| switch (data_type) { | |||
| case kNumberTypeInt8: | |||
| return TransformTensorLayoutUint8(tensor, dst_format, allocator); | |||
| case kNumberTypeFloat32: | |||
| return TransformTensorLayoutFp32(tensor, dst_format, allocator); | |||
| default: | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int Executor::TransformTensorLayoutFp32(Tensor *tensor, schema::Format dst_format, Allocator *allocator) { | |||
| MS_ASSERT(nullptr != tensor); | |||
| MS_ASSERT(nullptr != allocator); | |||
| MS_ASSERT(4 == tensor->shape().size()); | |||
| auto src_format = tensor->GetFormat(); | |||
| if (src_format == schema::Format::Format_NC4HW4 && dst_format == schema::Format::Format_NHWC) { | |||
| auto *src_data = tensor->data_c(); | |||
| if (src_data == nullptr) { | |||
| MS_LOG(ERROR) << "data of tensor is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| auto *dst_data = allocator->Malloc(tensor->Size()); | |||
| if (dst_data == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc data failed"; | |||
| return RET_ERROR; | |||
| } | |||
| PackNC4HW4ToNHWCFp32(src_data, dst_data, tensor->Batch(), tensor->Height() * tensor->Width(), tensor->Channel()); | |||
| tensor->set_data(dst_data); | |||
| tensor->SetFormat(dst_format); | |||
| allocator->Free(src_data); | |||
| return RET_OK; | |||
| } else { | |||
| MS_LOG(ERROR) << "Unsupported layout transform: " << EnumNameFormat(tensor->GetFormat()) << " to " | |||
| << EnumNameFormat(dst_format) << " in float32"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| int Executor::TransformTensorLayoutUint8(Tensor *tensor, schema::Format dst_format, Allocator *allocator) { | |||
| MS_ASSERT(nullptr != tensor); | |||
| MS_ASSERT(nullptr != allocator); | |||
| MS_ASSERT(4 == tensor->shape().size()); | |||
| MS_LOG(ERROR) << "Unsupported layout transform: " << EnumNameFormat(tensor->GetFormat()) << " to " | |||
| << EnumNameFormat(dst_format) << " in uint8"; | |||
| return RET_ERROR; | |||
| } | |||
| } // namespace mindspore::lite | |||
| @@ -36,12 +36,6 @@ class Executor { | |||
| protected: | |||
| int CheckInputs(std::vector<Tensor *> &in_tensors); | |||
| int TransformTensorLayoutFp32(Tensor *tensor, schema::Format dst_format, Allocator *allocator = nullptr); | |||
| int TransformTensorLayoutUint8(Tensor *tensor, schema::Format dst_format, Allocator *allocator = nullptr); | |||
| int TransformTensorLayout(Tensor *tensor, schema::Format dst_format, Allocator *allocator = nullptr); | |||
| }; | |||
| } // namespace mindspore::lite | |||
| #endif | |||
| @@ -50,8 +50,7 @@ class KernelRegistry { | |||
| static const int data_type_length_{kNumberTypeEnd - kNumberTypeBegin + 1}; | |||
| static const int op_type_length_{PrimitiveType_MAX - PrimitiveType_MIN + 1}; | |||
| static const int array_size_{device_type_length_ * data_type_length_ * op_type_length_}; | |||
| kernel::KernelCreator creator_arrays_[array_size_] = {0}; | |||
| std::vector<OpParameter *> op_parameters_; | |||
| kernel::KernelCreator creator_arrays_[array_size_] = {nullptr}; | |||
| }; | |||
| class KernelRegistrar { | |||
| @@ -52,89 +52,106 @@ static bool WeightTensorNeedCopy(const lite::Model *model, const uint32_t tensor | |||
| LiteSession::LiteSession() { this->is_running_.store(false); } | |||
| void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor) { | |||
| MS_ASSERT(src_tensor != nullptr); | |||
| MS_ASSERT(dst_tensor != nullptr); | |||
| auto quant_params = src_tensor->quantParams(); | |||
| if (quant_params != nullptr) { | |||
| for (size_t j = 0; j < quant_params->size(); j++) { | |||
| QuantArg quant_arg{}; | |||
| quant_arg.bitNum = quant_params->Get(j)->numBits(); | |||
| quant_arg.scale = quant_params->Get(j)->scale(); | |||
| quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint(); | |||
| quant_arg.var_corr = quant_params->Get(j)->varCorr(); | |||
| quant_arg.mean_corr = quant_params->Get(j)->meanCorr(); | |||
| quant_arg.inited = quant_params->Get(j)->inited(); | |||
| dst_tensor->AddQuantParam(quant_arg); | |||
| } | |||
| } | |||
| auto quant_clusters = src_tensor->quantClusters(); | |||
| if (quant_clusters != nullptr) { | |||
| std::vector<float> clusters; | |||
| for (size_t j = 0; j < quant_clusters->size(); j++) { | |||
| clusters.push_back(quant_clusters->Get(j)); | |||
| } | |||
| dst_tensor->SetQuantClusters(clusters); | |||
| } | |||
| } | |||
| int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_index, const schema::Tensor *src_tensor, | |||
| lite::Tensor *dst_tensor) { | |||
| MS_ASSERT(src_tensor != nullptr); | |||
| MS_ASSERT(dst_tensor != nullptr); | |||
| auto src_category = TensorCategory(src_tensor); | |||
| auto data_type = src_tensor->dataType(); | |||
| if ((src_category == Tensor::Category::CONST_TENSOR || src_category == Tensor::Category::CONST_SCALAR) && | |||
| src_tensor->data() != nullptr && src_tensor->data()->size() > 0) { | |||
| MS_ASSERT(dst_tensor->Size() == src_tensor->data()->size()); | |||
| if (WeightTensorNeedCopy(model, tensor_index)) { | |||
| auto dst_data = dst_tensor->MutableData(); | |||
| if (dst_data == nullptr) { | |||
| MS_LOG(ERROR) << "Data from tensor is nullptr"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| memcpy(dst_data, src_tensor->data()->data(), dst_tensor->Size()); | |||
| copyed_tensor_idxes_.emplace_back(tensor_index); | |||
| } else { | |||
| int pack_size = src_tensor->data()->size(); | |||
| int org_size = dst_tensor->Size(); | |||
| if (pack_size != org_size && (data_type == kNumberTypeInt8 || data_type == kNumberTypeInt16)) { | |||
| auto ret = dst_tensor->MallocData(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Malloc data for tensor failed "; | |||
| return RET_ERROR; | |||
| } | |||
| kernel::DequantUtil::UnPackToInt(src_tensor, dst_tensor->MutableData()); | |||
| } else { | |||
| dst_tensor->set_data(const_cast<unsigned char *>(src_tensor->data()->data())); | |||
| } | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int LiteSession::ConvertTensors(const lite::Model *model) { | |||
| MS_ASSERT(model != nullptr); | |||
| copyed_tensor_idxes_.clear(); | |||
| uint32_t tensor_count = model->all_tensors_.size(); | |||
| for (uint32_t i = 0; i < tensor_count; ++i) { | |||
| auto *srcTensor = model->all_tensors_[i]; | |||
| if (srcTensor == nullptr) { | |||
| auto *src_tensor = model->all_tensors_[i]; | |||
| if (src_tensor == nullptr) { | |||
| MS_LOG(ERROR) << i << "th tensor in model is nullptr"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| auto src_category = TensorCategory(srcTensor); | |||
| auto src_category = TensorCategory(src_tensor); | |||
| std::vector<int> shape; | |||
| if (srcTensor->dims() == nullptr) { | |||
| if (src_tensor->dims() == nullptr) { | |||
| MS_LOG(DEBUG) << "Dims of " << i << "th tensor is nullptr"; | |||
| } else { | |||
| if (src_category == Tensor::Category::CONST_TENSOR) { | |||
| if (srcTensor->dataType() == kObjectTypeString && srcTensor->data() != nullptr) { | |||
| shape.push_back(srcTensor->data()->size()); | |||
| } else { | |||
| for (size_t j = 0; j < srcTensor->dims()->size(); j++) { | |||
| shape.push_back(srcTensor->dims()->data()[j]); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| int dataType = srcTensor->dataType(); | |||
| auto *dstTensor = new (std::nothrow) Tensor(TypeId(dataType), shape, srcTensor->format(), src_category); | |||
| if (dstTensor == nullptr) { | |||
| MS_LOG(ERROR) << "new " << i << "th tensor failed"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| if ((src_category == Tensor::Category::CONST_TENSOR || src_category == Tensor::Category::CONST_SCALAR) && | |||
| srcTensor->data() != nullptr && srcTensor->data()->size() > 0) { | |||
| MS_ASSERT(dstTensor->Size() == srcTensor->data()->size()); | |||
| if (WeightTensorNeedCopy(model, i)) { | |||
| auto dst_data = dstTensor->MutableData(); | |||
| if (dst_data == nullptr) { | |||
| MS_LOG(ERROR) << "MutableData from " << i << "th tensor is nullptr"; | |||
| delete dstTensor; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(dst_data, srcTensor->data()->data(), dstTensor->Size()); | |||
| copyed_tensor_idxes_.emplace_back(i); | |||
| if (src_tensor->dims() != nullptr && src_category == Tensor::Category::CONST_TENSOR) { | |||
| if (src_tensor->dataType() == kObjectTypeString && src_tensor->data() != nullptr) { | |||
| shape.push_back(src_tensor->data()->size()); | |||
| } else { | |||
| int pack_size = srcTensor->data()->size(); | |||
| int org_size = dstTensor->Size(); | |||
| if (pack_size != org_size && (dataType == kNumberTypeInt8 || dataType == kNumberTypeInt16)) { | |||
| auto ret = dstTensor->MallocData(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Malloc data for " << i << "tensor failed "; | |||
| delete dstTensor; | |||
| return RET_ERROR; | |||
| } | |||
| kernel::DequantUtil::UnPackToInt(srcTensor, dstTensor->MutableData()); | |||
| } else { | |||
| dstTensor->set_data(const_cast<unsigned char *>(srcTensor->data()->data())); | |||
| for (size_t j = 0; j < src_tensor->dims()->size(); j++) { | |||
| shape.push_back(src_tensor->dims()->data()[j]); | |||
| } | |||
| } | |||
| } | |||
| auto quant_params = srcTensor->quantParams(); | |||
| if (quant_params != nullptr) { | |||
| for (size_t j = 0; j < quant_params->size(); j++) { | |||
| QuantArg quant_arg{}; | |||
| quant_arg.bitNum = quant_params->Get(j)->numBits(); | |||
| quant_arg.scale = quant_params->Get(j)->scale(); | |||
| quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint(); | |||
| quant_arg.var_corr = quant_params->Get(j)->varCorr(); | |||
| quant_arg.mean_corr = quant_params->Get(j)->meanCorr(); | |||
| quant_arg.inited = quant_params->Get(j)->inited(); | |||
| dstTensor->AddQuantParam(quant_arg); | |||
| } | |||
| auto *dst_tensor = | |||
| new (std::nothrow) Tensor(TypeId(src_tensor->dataType()), shape, src_tensor->format(), src_category); | |||
| if (dst_tensor == nullptr) { | |||
| MS_LOG(ERROR) << "new " << i << "th tensor failed"; | |||
| return RET_NULL_PTR; | |||
| } | |||
| auto quant_clusters = srcTensor->quantClusters(); | |||
| if (quant_clusters != nullptr) { | |||
| std::vector<float> clusters; | |||
| for (size_t j = 0; j < quant_clusters->size(); j++) { | |||
| clusters.push_back(quant_clusters->Get(j)); | |||
| } | |||
| dstTensor->SetQuantClusters(clusters); | |||
| auto ret = ConvertTensorsData(model, i, src_tensor, dst_tensor); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Convert data of " << i << "th tensor failed"; | |||
| delete (dst_tensor); | |||
| return ret; | |||
| } | |||
| this->tensors_.emplace_back(dstTensor); | |||
| ConvertTensorsQuantParam(src_tensor, dst_tensor); | |||
| this->tensors_.emplace_back(dst_tensor); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -66,6 +66,11 @@ class LiteSession : public session::LiteSession { | |||
| const std::vector<std::vector<int>> &dims) override; | |||
| protected: | |||
| void ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor); | |||
| int ConvertTensorsData(const lite::Model *model, size_t tensor_index, const schema::Tensor *src_tensor, | |||
| lite::Tensor *dst_tensor); | |||
| int ConvertTensors(const lite::Model *model); | |||
| void InitGraphInOutTensors(const lite::Model *model); | |||
| @@ -216,12 +216,29 @@ void PrimitiveC::CalFloatScopeByMeanAndStddev(const double &mean, const double & | |||
| *mMax = static_cast<float>((qmax - mean) / stdDev); | |||
| } | |||
| void PrimitiveC::PopulaterQuantParam(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| auto narrow_range = prim.GetAttr("narrow_range"); | |||
| bool narrowRangeQuantParam = narrow_range != nullptr ? GetValue<bool>(narrow_range) : false; | |||
| auto num_bits = prim.GetAttr("num_bits"); | |||
| int32_t numbitsRangeQuantParam = num_bits != nullptr ? GetValue<int64_t>(num_bits) : 8; | |||
| void PrimitiveC::FillDefaultInputQuantParamIfNeed(const size_t &inputSize) { | |||
| std::vector<schema::QuantParamT> quants; | |||
| schema::QuantParamT quantParam; | |||
| // fill input_quant_param_ by not inited quant_parm | |||
| if (input_quant_param_.size() < inputSize) { | |||
| schema::QuantParamT tmpQuantParam; | |||
| quants.emplace_back(tmpQuantParam); | |||
| input_quant_param_.insert(input_quant_param_.end(), inputSize - input_quant_param_.size(), quants); | |||
| } | |||
| if (input_quant_param_.size() == kDoubleNum) { | |||
| quants.clear(); | |||
| quantParam.min = 0.0; | |||
| quantParam.max = 0.0; | |||
| quantParam.zeroPoint = 0; | |||
| quantParam.scale = input_quant_param_.at(0).at(0).scale * input_quant_param_.at(1).at(0).scale; | |||
| quants.emplace_back(quantParam); | |||
| input_quant_param_.emplace_back(quants); | |||
| } | |||
| } | |||
| void PrimitiveC::PopulaterInputQuantParam(const Primitive &prim, const std::vector<AnfNodePtr> &inputs, | |||
| bool narrowRangeQuantParam, int32_t numbitsRangeQuantParam) { | |||
| std::vector<schema::QuantParamT> quants; | |||
| schema::QuantParamT quantParam; | |||
| auto inputMin = prim.GetAttr("input_minq"); | |||
| @@ -267,26 +284,13 @@ void PrimitiveC::PopulaterQuantParam(const Primitive &prim, const std::vector<An | |||
| quants.emplace_back(quantParam); | |||
| input_quant_param_.emplace_back(quants); | |||
| } | |||
| FillDefaultInputQuantParamIfNeed(inputs.size()); | |||
| } | |||
| // fill input_quant_param_ by not inited quant_parm | |||
| if (input_quant_param_.size() < inputs.size()) { | |||
| quants.clear(); | |||
| schema::QuantParamT tmpQuantParam; | |||
| quants.emplace_back(tmpQuantParam); | |||
| input_quant_param_.insert(input_quant_param_.end(), inputs.size() - input_quant_param_.size(), quants); | |||
| } | |||
| if (input_quant_param_.size() == kDoubleNum) { | |||
| quants.clear(); | |||
| quantParam.min = 0.0; | |||
| quantParam.max = 0.0; | |||
| quantParam.zeroPoint = 0; | |||
| quantParam.scale = input_quant_param_.at(0).at(0).scale * input_quant_param_.at(1).at(0).scale; | |||
| quants.emplace_back(quantParam); | |||
| input_quant_param_.emplace_back(quants); | |||
| } | |||
| quants.clear(); | |||
| void PrimitiveC::PopulaterOutputQuantParam(const Primitive &prim, bool narrowRangeQuantParam, | |||
| int32_t numbitsRangeQuantParam) { | |||
| std::vector<schema::QuantParamT> quants; | |||
| schema::QuantParamT quantParam; | |||
| auto outputMin = prim.GetAttr("output_minq"); | |||
| auto outputMax = prim.GetAttr("output_maxq"); | |||
| if (outputMin != nullptr && outputMax != nullptr) { | |||
| @@ -311,6 +315,15 @@ void PrimitiveC::PopulaterQuantParam(const Primitive &prim, const std::vector<An | |||
| } | |||
| } | |||
| void PrimitiveC::PopulaterQuantParam(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) { | |||
| auto narrow_range = prim.GetAttr("narrow_range"); | |||
| bool narrowRangeQuantParam = narrow_range != nullptr ? GetValue<bool>(narrow_range) : false; | |||
| auto num_bits = prim.GetAttr("num_bits"); | |||
| int32_t numbitsRangeQuantParam = num_bits != nullptr ? GetValue<int64_t>(num_bits) : 8; | |||
| PopulaterInputQuantParam(prim, inputs, narrowRangeQuantParam, numbitsRangeQuantParam); | |||
| PopulaterOutputQuantParam(prim, narrowRangeQuantParam, numbitsRangeQuantParam); | |||
| } | |||
| void PrimitiveC::GetAttrDataFromInput(const AnfNodePtr inputNode, std::vector<int> *data) { | |||
| if (inputNode->isa<ValueNode>()) { | |||
| auto valNode = inputNode->cast<ValueNodePtr>(); | |||
| @@ -131,6 +131,10 @@ class PrimitiveC : public mindspore::Primitive { | |||
| static std::shared_ptr<PrimitiveC> Create(const Primitive &prim, const std::vector<AnfNodePtr> &inputs, | |||
| const schema::QuantType &quantType); | |||
| void PopulaterQuantParam(const Primitive &prim, const std::vector<AnfNodePtr> &inputs); | |||
| void FillDefaultInputQuantParamIfNeed(const size_t &inputSize); | |||
| void PopulaterInputQuantParam(const Primitive &prim, const std::vector<AnfNodePtr> &inputs, | |||
| bool narrowRangeQuantParam, int32_t numbitsRangeQuantParam); | |||
| void PopulaterOutputQuantParam(const Primitive &prim, bool narrowRangeQuantParam, int32_t numbitsRangeQuantParam); | |||
| void CalFloatScopeByMeanAndStddev(const double &mean, const double &stdDev, float *mMin, float *mMax); | |||
| protected: | |||
| @@ -17,7 +17,6 @@ | |||
| #include "src/runtime/runtime_api.h" | |||
| #include <mutex> | |||
| #include <string> | |||
| #include "src/runtime/workspace_pool.h" | |||
| #include "src/common/log_adapter.h" | |||
| static std::mutex gWorkspaceMutex; | |||
| @@ -28,26 +27,6 @@ extern "C" { | |||
| ThreadPool *CreateLiteThreadPool(int thread_num, int mode) { return CreateThreadPool(thread_num, mode); } | |||
| void LiteAPISetLastError(const char *msg) { MS_LOG(ERROR) << "The lite api set last error is " << msg; } | |||
| void *LiteBackendAllocWorkspace(int deviceType, int deviceId, uint64_t size, int dtypeCode, int dtypeBits) { | |||
| std::lock_guard<std::mutex> lock(gWorkspaceMutex); | |||
| auto p = mindspore::predict::WorkspacePool::GetInstance(); | |||
| if (p == nullptr) { | |||
| MS_LOG(ERROR) << "Get thread pool instance failed"; | |||
| return nullptr; | |||
| } | |||
| return p->AllocWorkSpaceMem(size); | |||
| } | |||
| int LiteBackendFreeWorkspace(int deviceType, int deviceId, const void *ptr) { | |||
| std::lock_guard<std::mutex> lock(gWorkspaceMutex); | |||
| auto p = mindspore::predict::WorkspacePool::GetInstance(); | |||
| if (p == nullptr) { | |||
| return -1; | |||
| } | |||
| p->FreeWorkSpaceMem(ptr); | |||
| return 0; | |||
| } | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -36,9 +36,6 @@ struct ThreadPool; | |||
| #endif | |||
| INTERNAL_API_DLL ThreadPool *CreateLiteThreadPool(int thread_num, int mode); | |||
| INTERNAL_API_DLL void LiteAPISetLastError(const char *msg); | |||
| INTERNAL_API_DLL void *LiteBackendAllocWorkspace(int deviceType, int deviceId, uint64_t size, int dtypeCode, | |||
| int dtypeBits); | |||
| INTERNAL_API_DLL int LiteBackendFreeWorkspace(int deviceType, int deviceId, const void *ptr); | |||
| INTERNAL_API_DLL int LiteBackendRegisterSystemLibSymbol(const char *name, void *ptr); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -44,7 +44,6 @@ | |||
| #define RET_TP_SYSTEM_ERROR (-1) | |||
| #define MAX_THREAD_NUM (8) | |||
| #define MAX_THREAD_POOL_NUM (4) | |||
| #define DEFAULT_SPIN_COUNT (30000) | |||
| typedef struct { | |||
| @@ -509,60 +508,74 @@ int BindMasterThread(struct ThreadPool *thread_pool, bool is_bind) { | |||
| return RET_TP_OK; | |||
| } | |||
| int BindSalverThreads(struct ThreadPool *thread_pool, bool is_bind) { | |||
| if (thread_pool == NULL) { | |||
| LOG_ERROR("get thread pool instane failed"); | |||
| return RET_TP_ERROR; | |||
| int FreeBindSalverThreads(struct ThreadPool *thread_pool) { | |||
| cpu_set_t mask; | |||
| CPU_ZERO(&mask); | |||
| for (int i = 0; i < gHigNum + gMidNum; ++i) { | |||
| CPU_SET(cpu_cores[i], &mask); | |||
| } | |||
| for (int i = 0; i < thread_pool->thread_num - 1; ++i) { | |||
| Thread *thread = GetThread(thread_pool, i); | |||
| if (thread == NULL) { | |||
| LOG_ERROR("get thread failed, thread_id: %d", i); | |||
| return false; | |||
| } | |||
| int ret = SetAffinity(thread->pthread, &mask); | |||
| if (ret != RET_TP_OK) { | |||
| LOG_ERROR("set thread affinity failed"); | |||
| return RET_TP_ERROR; | |||
| } | |||
| } | |||
| return RET_TP_OK; | |||
| } | |||
| int DoBindSalverThreads(struct ThreadPool *thread_pool) { | |||
| cpu_set_t mask; | |||
| if (is_bind && thread_pool->mode != NO_BIND_MODE) { | |||
| unsigned int attach_id; | |||
| for (int i = 0; i < thread_pool->thread_num - 1; ++i) { | |||
| if (thread_pool->mode == MID_MODE) { | |||
| int core_id = gHigNum + gMidNum - i - 2; | |||
| if (core_id >= 0) { | |||
| attach_id = cpu_cores[core_id]; | |||
| } else { | |||
| attach_id = cpu_cores[0]; | |||
| } | |||
| unsigned int attach_id; | |||
| for (int i = 0; i < thread_pool->thread_num - 1; ++i) { | |||
| if (thread_pool->mode == MID_MODE) { | |||
| int core_id = gHigNum + gMidNum - i - 2; | |||
| if (core_id >= 0) { | |||
| attach_id = cpu_cores[core_id]; | |||
| } else { | |||
| attach_id = cpu_cores[i + 1]; | |||
| } | |||
| LOG_INFO("mode: %d, attach id: %u", thread_pool->mode, attach_id); | |||
| CPU_ZERO(&mask); | |||
| CPU_SET(attach_id, &mask); | |||
| Thread *thread = GetThread(thread_pool, i); | |||
| if (thread == NULL) { | |||
| LOG_ERROR("get thread failed, thread_id: %d", i); | |||
| return false; | |||
| } | |||
| int ret = SetAffinity(thread->pthread, &mask); | |||
| if (ret != RET_TP_OK) { | |||
| LOG_ERROR("set thread affinity failed"); | |||
| return RET_TP_ERROR; | |||
| attach_id = cpu_cores[0]; | |||
| } | |||
| } else { | |||
| attach_id = cpu_cores[i + 1]; | |||
| } | |||
| } else { | |||
| LOG_INFO("mode: %d, attach id: %u", thread_pool->mode, attach_id); | |||
| CPU_ZERO(&mask); | |||
| for (int i = 0; i < gHigNum + gMidNum; ++i) { | |||
| CPU_SET(cpu_cores[i], &mask); | |||
| CPU_SET(attach_id, &mask); | |||
| Thread *thread = GetThread(thread_pool, i); | |||
| if (thread == NULL) { | |||
| LOG_ERROR("get thread failed, thread_id: %d", i); | |||
| return false; | |||
| } | |||
| for (int i = 0; i < thread_pool->thread_num - 1; ++i) { | |||
| Thread *thread = GetThread(thread_pool, i); | |||
| if (thread == NULL) { | |||
| LOG_ERROR("get thread failed, thread_id: %d", i); | |||
| return false; | |||
| } | |||
| int ret = SetAffinity(thread->pthread, &mask); | |||
| if (ret != RET_TP_OK) { | |||
| LOG_ERROR("set thread affinity failed"); | |||
| return RET_TP_ERROR; | |||
| } | |||
| int ret = SetAffinity(thread->pthread, &mask); | |||
| if (ret != RET_TP_OK) { | |||
| LOG_ERROR("set thread affinity failed"); | |||
| return RET_TP_ERROR; | |||
| } | |||
| } | |||
| LOG_INFO("BindSalverThreads success"); | |||
| return RET_TP_OK; | |||
| } | |||
| int BindSalverThreads(struct ThreadPool *thread_pool, bool is_bind) { | |||
| if (thread_pool == NULL) { | |||
| LOG_ERROR("get thread pool instane failed"); | |||
| return RET_TP_ERROR; | |||
| } | |||
| int ret; | |||
| if (is_bind && thread_pool->mode != NO_BIND_MODE) { | |||
| ret = DoBindSalverThreads(thread_pool); | |||
| } else { | |||
| ret = FreeBindSalverThreads(thread_pool); | |||
| } | |||
| if (ret == RET_TP_OK) { | |||
| LOG_INFO("BindSalverThreads success"); | |||
| } | |||
| return ret; | |||
| } | |||
| #endif | |||
| int BindThreads(struct ThreadPool *thread_pool, bool is_bind, int mode) { | |||
| @@ -782,46 +795,6 @@ int CreateNewThread(struct ThreadPool *thread_pool, int thread_id) { | |||
| return RET_TP_OK; | |||
| } | |||
| int ReConfigThreadPool(struct ThreadPool *thread_pool, int thread_num, int mode) { | |||
| LOG_INFO("reconfig thread pool, thread_num: %d, mode: %d", thread_num, mode); | |||
| if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) { | |||
| LOG_ERROR("invalid thread num: %d", thread_num); | |||
| return RET_TP_ERROR; | |||
| } | |||
| if (thread_pool == NULL) { | |||
| LOG_ERROR("get thread pool instane failed"); | |||
| return RET_TP_ERROR; | |||
| } | |||
| if (thread_num <= thread_pool->thread_num) { | |||
| LOG_INFO("no need to add thread"); | |||
| return RET_TP_OK; | |||
| } | |||
| int curr_thread_num = thread_pool->thread_num; | |||
| thread_pool->thread_num = thread_num > MAX_THREAD_NUM ? MAX_THREAD_NUM : thread_num; | |||
| thread_pool->mode = mode; | |||
| if (thread_pool->thread_list == NULL) { | |||
| thread_pool->thread_list = (ThreadList *)malloc(sizeof(ThreadList)); | |||
| if (thread_pool->thread_list == NULL) { | |||
| LOG_ERROR("create thread list failed"); | |||
| DestroyThreadPool(thread_pool); | |||
| return RET_TP_ERROR; | |||
| } | |||
| thread_pool->thread_list->head = NULL; | |||
| thread_pool->thread_list->tail = NULL; | |||
| thread_pool->thread_list->size = 0; | |||
| pthread_mutex_init(&thread_pool->thread_list->lock, NULL); | |||
| } | |||
| int add_thread_num = thread_pool->thread_num - curr_thread_num; | |||
| for (int i = curr_thread_num - 1, j = 0; j < add_thread_num; ++i, ++j) { | |||
| int ret = CreateNewThread(thread_pool, i); | |||
| if (ret != RET_TP_OK) { | |||
| LOG_ERROR("create new thread failed"); | |||
| return RET_TP_ERROR; | |||
| } | |||
| } | |||
| return BindThreads(thread_pool, true, mode); | |||
| } | |||
| ThreadPool *CreateThreadPool(int thread_num, int mode) { | |||
| LOG_INFO("create thread pool, thread_num: %d, mode: %d", thread_num, mode); | |||
| if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) { | |||
| @@ -873,18 +846,6 @@ ThreadPool *CreateThreadPool(int thread_num, int mode) { | |||
| return thread_pool; | |||
| } | |||
| int ConfigThreadPool(struct ThreadPool *thread_pool, int thread_num, int mode) { | |||
| if (thread_num <= 0 || thread_num > MAX_THREAD_NUM) { | |||
| LOG_ERROR("invalid thread num: %d", thread_num); | |||
| return RET_TP_ERROR; | |||
| } | |||
| int ret = ReConfigThreadPool(thread_pool, thread_num, mode); | |||
| if (ret != RET_TP_OK) { | |||
| LOG_ERROR("reconfig thread pool failed, thread_num: %d, mode: %d", thread_num, mode); | |||
| } | |||
| return ret; | |||
| } | |||
| void ActivateThreadPool(struct ThreadPool *thread_pool) { | |||
| if (thread_pool == NULL) { | |||
| LOG_ERROR("get thread pool instane failed"); | |||
| @@ -28,25 +28,10 @@ typedef enum { | |||
| MID_MODE = 2 /**< bind middle cpu first */ | |||
| } BindMode; | |||
| /// \brief ThreadPoolId defined for specifying which thread pool to use. | |||
| typedef enum { | |||
| THREAD_POOL_DEFAULT = 0, /**< default thread pool id */ | |||
| THREAD_POOL_SECOND = 1, /**< the second thread pool id */ | |||
| THREAD_POOL_THIRD = 2, /**< the third thread pool id */ | |||
| THREAD_POOL_FOURTH = 3 /**< the fourth thread pool id */ | |||
| } ThreadPoolId; | |||
| struct ThreadPool; | |||
| struct ThreadPool *CreateThreadPool(int thread_num, int mode); | |||
| /** | |||
| * create thread pool and init | |||
| * @param thread_num | |||
| * @param mode | |||
| */ | |||
| int ConfigThreadPool(struct ThreadPool *thread_pool, int thread_num, int mode); | |||
| /** | |||
| * | |||
| * @param session_index, support multi session | |||
| @@ -1,154 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/workspace_pool.h" | |||
| #ifdef __APPLE__ | |||
| #include <stdlib.h> | |||
| #else | |||
| #include <malloc.h> | |||
| #endif | |||
| #include <algorithm> | |||
| #include "src/common/log_adapter.h" | |||
| namespace mindspore { | |||
| namespace predict { | |||
| static constexpr size_t kWorkspacePageSize = 4096; | |||
| static constexpr int kTempAllocaAlignment = 64; | |||
| WorkspacePool *WorkspacePool::GetInstance() { | |||
| static WorkspacePool instance; | |||
| return &instance; | |||
| } | |||
| void *WorkspacePool::AllocWorkSpaceMem(size_t size) { | |||
| size_t nbytes = (size + (kWorkspacePageSize - 1)) / kWorkspacePageSize * kWorkspacePageSize; | |||
| if (nbytes == 0) { | |||
| nbytes = kWorkspacePageSize; | |||
| } | |||
| std::pair<size_t, void *> alloc; | |||
| // fist alloc | |||
| if (freeList.empty()) { | |||
| alloc.first = nbytes; | |||
| #ifdef __APPLE__ | |||
| int err = posix_memalign(&alloc.second, kTempAllocaAlignment, nbytes); | |||
| if (err != 0) { | |||
| MS_LOGE("posix_memalign failed, error code:%d", err); | |||
| return alloc.second; | |||
| } | |||
| #else | |||
| #ifdef _WIN32 | |||
| alloc.second = _aligned_malloc(nbytes, kTempAllocaAlignment); | |||
| #else | |||
| alloc.second = memalign(kTempAllocaAlignment, nbytes); | |||
| #endif | |||
| #endif | |||
| } else if (freeList.size() == 1) { // one element | |||
| alloc = *(freeList.begin()); | |||
| freeList.erase(freeList.begin()); | |||
| if (alloc.first < nbytes) { | |||
| free(alloc.second); | |||
| alloc.first = nbytes; | |||
| #ifdef __APPLE__ | |||
| int err = posix_memalign(&alloc.second, kTempAllocaAlignment, nbytes); | |||
| if (err != 0) { | |||
| MS_LOGE("posix_memalign failed, error code:%d", err); | |||
| return alloc.second; | |||
| } | |||
| #else | |||
| #ifdef _WIN32 | |||
| alloc.second = _aligned_malloc(nbytes, kTempAllocaAlignment); | |||
| #else | |||
| alloc.second = memalign(kTempAllocaAlignment, nbytes); | |||
| #endif | |||
| #endif | |||
| } | |||
| } else { | |||
| if ((*(freeList.begin())).first >= nbytes) { | |||
| auto iter = freeList.begin(); | |||
| for (; iter != freeList.end(); ++iter) { | |||
| if ((*iter).first < size) { | |||
| alloc = *(--iter); | |||
| freeList.erase(iter); | |||
| break; | |||
| } | |||
| } | |||
| if (iter == freeList.end()) { | |||
| alloc = *(freeList.rbegin()); | |||
| freeList.erase(--freeList.end()); | |||
| } | |||
| } else { | |||
| alloc = *(freeList.begin()); | |||
| freeList.erase(freeList.begin()); | |||
| free(alloc.second); | |||
| alloc.first = nbytes; | |||
| #ifdef __APPLE__ | |||
| int err = posix_memalign(&alloc.second, kTempAllocaAlignment, nbytes); | |||
| if (err != 0) { | |||
| MS_LOGE("posix_memalign failed, error code:%d", err); | |||
| return alloc.second; | |||
| } | |||
| #else | |||
| #ifdef _WIN32 | |||
| alloc.second = _aligned_malloc(nbytes, kTempAllocaAlignment); | |||
| #else | |||
| alloc.second = memalign(kTempAllocaAlignment, nbytes); | |||
| #endif | |||
| #endif | |||
| } | |||
| } | |||
| allocList.emplace_back(alloc); | |||
| return alloc.second != nullptr ? alloc.second : nullptr; | |||
| } | |||
| void WorkspacePool::FreeWorkSpaceMem(const void *ptr) { | |||
| if (ptr == nullptr) { | |||
| return; | |||
| } | |||
| std::pair<size_t, void *> alloc; | |||
| if (allocList.empty()) { | |||
| MS_LOG(ERROR) << "no mem have been alloc"; | |||
| return; | |||
| } else if (allocList.back().second == ptr) { | |||
| alloc = allocList.back(); | |||
| allocList.pop_back(); | |||
| } else { | |||
| auto iter = allocList.begin(); | |||
| for (; iter != allocList.end(); ++iter) { | |||
| if ((*iter).second == ptr) { | |||
| alloc = *iter; | |||
| allocList.erase(iter); | |||
| break; | |||
| } | |||
| } | |||
| if (iter == allocList.end()) { | |||
| MS_LOG(ERROR) << "no value ptr have been alloc"; | |||
| return; | |||
| } | |||
| } | |||
| freeList.insert(alloc); | |||
| } | |||
| WorkspacePool::~WorkspacePool() { | |||
| for (auto &a : allocList) { | |||
| free(a.second); | |||
| } | |||
| allocList.clear(); | |||
| for (auto &f : freeList) { | |||
| free(f.second); | |||
| } | |||
| freeList.clear(); | |||
| } | |||
| } // namespace predict | |||
| } // namespace mindspore | |||
| @@ -1,44 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_WORKSPACE_POOL_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_WORKSPACE_POOL_H_ | |||
| #include <memory> | |||
| #include <vector> | |||
| #include <set> | |||
| #include <utility> | |||
| #include <functional> | |||
| #include <mutex> | |||
| namespace mindspore { | |||
| namespace predict { | |||
| class WorkspacePool { | |||
| public: | |||
| WorkspacePool() = default; | |||
| ~WorkspacePool(); | |||
| WorkspacePool(const WorkspacePool &) = delete; | |||
| WorkspacePool &operator=(const WorkspacePool &) = delete; | |||
| static WorkspacePool *GetInstance(); | |||
| void *AllocWorkSpaceMem(size_t size); | |||
| void FreeWorkSpaceMem(const void *ptr); | |||
| private: | |||
| std::vector<std::pair<size_t, void *>> allocList{}; | |||
| std::set<std::pair<size_t, void *>, std::greater<std::pair<size_t, void *>>> freeList{}; | |||
| }; | |||
| } // namespace predict | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_WORKSPACE_POOL_H_ | |||
| @@ -171,6 +171,33 @@ int Scheduler::BuildKernels(const lite::Model *model, std::vector<Tensor *> *ten | |||
| return RET_OK; | |||
| } | |||
| std::vector<kernel::LiteKernel *> Scheduler::FindAllSubGraphKernels( | |||
| kernel::LiteKernel *head_kernel, std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map) { | |||
| MS_ASSERT(head_kernel != nullptr); | |||
| MS_ASSERT(sinked_kernel_map != nullptr); | |||
| std::vector<kernel::LiteKernel *> sub_kernels; | |||
| std::queue<kernel::LiteKernel *> kernel_queue; | |||
| kernel_queue.emplace(head_kernel); | |||
| auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernel); | |||
| while (!kernel_queue.empty()) { | |||
| auto cur_kernel = kernel_queue.front(); | |||
| kernel_queue.pop(); | |||
| (*sinked_kernel_map)[cur_kernel] = true; | |||
| sub_kernels.emplace_back(cur_kernel); | |||
| auto post_kernels = cur_kernel->out_kernels(); | |||
| for (auto post_kernel : post_kernels) { | |||
| if (cur_sub_graph_type == mindspore::lite::Scheduler::GetKernelSubGraphType(post_kernel)) { | |||
| auto post_kernel_inputs = post_kernel->in_kernels(); | |||
| if (std::all_of(post_kernel_inputs.begin(), post_kernel_inputs.end(), | |||
| [&](kernel::LiteKernel *kernel) { return (*sinked_kernel_map)[kernel]; })) { | |||
| kernel_queue.emplace(post_kernel); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return sub_kernels; | |||
| } | |||
| int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) { | |||
| auto old_kernels = *kernels; | |||
| kernels->clear(); | |||
| @@ -194,27 +221,8 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) { | |||
| MS_LOG(ERROR) << "Not support NPU and APU now"; | |||
| return RET_NOT_SUPPORT; | |||
| } | |||
| std::vector<kernel::LiteKernel *> sub_kernels; | |||
| std::queue<kernel::LiteKernel *> kernel_queue; | |||
| kernel_queue.emplace(head_kernel); | |||
| auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernel); | |||
| while (!kernel_queue.empty()) { | |||
| auto cur_kernel = kernel_queue.front(); | |||
| kernel_queue.pop(); | |||
| is_kernel_sinked[cur_kernel] = true; | |||
| sub_kernels.emplace_back(cur_kernel); | |||
| auto post_kernels = cur_kernel->out_kernels(); | |||
| for (auto post_kernel : post_kernels) { | |||
| if (cur_sub_graph_type == mindspore::lite::Scheduler::GetKernelSubGraphType(post_kernel)) { | |||
| auto post_kernel_inputs = post_kernel->in_kernels(); | |||
| if (std::all_of(post_kernel_inputs.begin(), post_kernel_inputs.end(), | |||
| [&](kernel::LiteKernel *kernel) { return is_kernel_sinked[kernel]; })) { | |||
| kernel_queue.emplace(post_kernel); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| auto sub_kernels = FindAllSubGraphKernels(head_kernel, &is_kernel_sinked); | |||
| auto subgraph = CreateSubGraphKernel(sub_kernels, cur_sub_graph_type); | |||
| if (subgraph == nullptr) { | |||
| MS_LOG(ERROR) << "Create SubGraphKernel failed"; | |||
| @@ -18,6 +18,7 @@ | |||
| #define MINDSPORE_LITE_SRC_SCHEDULER_H_ | |||
| #include <vector> | |||
| #include <map> | |||
| #include "src/sub_graph_kernel.h" | |||
| #include "src/inner_context.h" | |||
| #include "include/model.h" | |||
| @@ -47,6 +48,9 @@ class Scheduler { | |||
| kernel::SubGraphKernel *CreateSubGraphKernel(const std::vector<kernel::LiteKernel *> &kernels, | |||
| kernel::SubGraphType type); | |||
| std::vector<kernel::LiteKernel *> FindAllSubGraphKernels( | |||
| kernel::LiteKernel *head_kernel, std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map); | |||
| static TypeId GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors); | |||
| static void SetKernelTensorDataType(kernel::LiteKernel *kernel); | |||
| @@ -256,54 +256,19 @@ std::string Tensor::ToString() const { | |||
| oss << std::endl << "Data:"; | |||
| switch (this->data_type_) { | |||
| case kNumberTypeFloat32: { | |||
| auto data = static_cast<float *>(this->data_); | |||
| if (data == nullptr) { | |||
| return "Data of tensor is nullptr"; | |||
| } else { | |||
| for (int i = 0; i < 40 && i < this->ElementsNum(); i++) { | |||
| oss << " " << data[i]; | |||
| } | |||
| } | |||
| oss << DataToString<float>(this->data_c(), this->ElementsNum()); | |||
| } break; | |||
| case kNumberTypeFloat16: { | |||
| auto data = static_cast<int16_t *>(this->data_); | |||
| if (data == nullptr) { | |||
| oss << " Data of tensor is nullptr"; | |||
| } else { | |||
| for (int i = 0; i < 40 && i < this->ElementsNum(); i++) { | |||
| oss << " " << data[i]; | |||
| } | |||
| } | |||
| oss << DataToString<int16_t>(this->data_c(), this->ElementsNum()); | |||
| } break; | |||
| case kNumberTypeInt32: { | |||
| auto data = static_cast<int32_t *>(this->data_); | |||
| if (data == nullptr) { | |||
| oss << " Data of tensor is nullptr"; | |||
| } else { | |||
| for (int i = 0; i < 40 && i < this->ElementsNum(); i++) { | |||
| oss << " " << data[i]; | |||
| } | |||
| } | |||
| oss << DataToString<int32_t>(this->data_c(), this->ElementsNum()); | |||
| } break; | |||
| case kNumberTypeInt16: { | |||
| auto data = static_cast<int16_t *>(this->data_); | |||
| if (data == nullptr) { | |||
| oss << " Data of tensor is nullptr"; | |||
| } else { | |||
| for (int i = 0; i < 40 && i < this->ElementsNum(); i++) { | |||
| oss << " " << data[i]; | |||
| } | |||
| } | |||
| oss << DataToString<int16_t>(this->data_c(), this->ElementsNum()); | |||
| } break; | |||
| case kNumberTypeInt8: { | |||
| auto data = static_cast<int8_t *>(this->data_); | |||
| if (data == nullptr) { | |||
| oss << " Data of tensor is nullptr"; | |||
| } else { | |||
| for (int i = 0; i < 40 && i < this->ElementsNum(); i++) { | |||
| oss << " " << static_cast<int32_t>(data[i]); | |||
| } | |||
| } | |||
| oss << DataToString<int8_t>(this->data_c(), this->ElementsNum()); | |||
| } break; | |||
| default: | |||
| oss << "Unsupported data type to print"; | |||
| @@ -134,6 +134,20 @@ class Tensor : public mindspore::tensor::MSTensor { | |||
| } | |||
| } | |||
| private: | |||
| template <typename T> | |||
| std::string DataToString(void *data, size_t data_number) const { | |||
| if (data == nullptr) { | |||
| return "Data of tensor is nullptr"; | |||
| } | |||
| std::ostringstream oss; | |||
| auto casted_data = static_cast<T *>(data); | |||
| for (size_t i = 0; i < 40 && i < data_number; i++) { | |||
| oss << " " << casted_data[i]; | |||
| } | |||
| return oss.str(); | |||
| } | |||
| protected: | |||
| void *data_ = nullptr; | |||
| void *device_data_ = nullptr; | |||
| @@ -119,7 +119,6 @@ set(TEST_LITE_SRC | |||
| ${LITE_DIR}/src/runtime/allocator.cc | |||
| ${LITE_DIR}/src/runtime/runtime_api.cc | |||
| ${LITE_DIR}/src/runtime/thread_pool.c | |||
| ${LITE_DIR}/src/runtime/workspace_pool.cc | |||
| ${LITE_DIR}/src/runtime/parallel_executor.cc | |||
| ${LITE_DIR}/src/tensor.cc | |||
| ${LITE_DIR}/src/executor.cc | |||
| @@ -72,7 +72,6 @@ set(LITE_SRC | |||
| ${SRC_DIR}/runtime/allocator.cc | |||
| ${SRC_DIR}/runtime/runtime_api.cc | |||
| ${SRC_DIR}/runtime/thread_pool.c | |||
| ${SRC_DIR}/runtime/workspace_pool.cc | |||
| ${SRC_DIR}/inner_context.cc | |||
| ${SRC_DIR}/tensor.cc | |||
| ${SRC_DIR}/kernel_registry.cc | |||