From: @ehaleva Reviewed-by: Signed-off-by:pull/12773/head
| @@ -49,6 +49,7 @@ set(LITE_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/common/prim_util.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/common/tensor_util.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/common/loader_util.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/common/quant_utils.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/allocator.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_api.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/runtime/thread_pool.c | |||
| @@ -124,6 +125,7 @@ if(SUPPORT_TRAIN) | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_monitor.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/train/classification_train_accuracy_monitor.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/train/train_export.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../tools/common/storage.cc | |||
| ) | |||
| if(ENABLE_V0) | |||
| set(LITE_SRC | |||
| @@ -192,7 +194,10 @@ if(BUILD_MINDDATA STREQUAL "lite") | |||
| target_link_libraries(mindspore-lite_static minddata_eager_mid) | |||
| endif() | |||
| if(SUPPORT_TRAIN) | |||
| add_dependencies(mindspore-lite fbs_inner_src) | |||
| add_dependencies(mindspore-lite_static fbs_inner_src) | |||
| target_link_libraries(mindspore-lite minddata-lite) | |||
| target_link_libraries(mindspore-lite_static minddata-lite) | |||
| endif() | |||
| @@ -0,0 +1,104 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "schema/inner/model_generated.h" | |||
| #include "src/common/quant_utils.h" | |||
| #include "src/lite_kernel.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas, | |||
| bool channel_at_first, float *desired_max, float *desired_min) { | |||
| float min = FLT_MAX; | |||
| float max = -FLT_MAX; | |||
| // find min and max | |||
| for (int j = 0; j < one_filter_size; j++) { | |||
| auto index = j + i * one_filter_size; | |||
| if (!channel_at_first) { | |||
| index = j * channels + i; | |||
| } | |||
| if (index >= elem_count) { | |||
| MS_LOG(ERROR) << "over flow!"; | |||
| } | |||
| min = std::min(min, raw_datas[index]); | |||
| max = std::max(max, raw_datas[index]); | |||
| } | |||
| *desired_max = max; | |||
| *desired_min = min; | |||
| } | |||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, | |||
| int quant_min, int num_bits) { | |||
| MS_ASSERT(quantParam != nullptr); | |||
| if (mMin > 0.0f) { | |||
| MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision"; | |||
| mMin = 0.0f; | |||
| } | |||
| if (mMax < 0.0f) { | |||
| MS_LOG(DEBUG) << "mMax " << mMax << " is smaller than 0, set to 0, this may course low precision"; | |||
| mMax = 0.0f; | |||
| } | |||
| if (mMin > mMax) { | |||
| MS_LOG(ERROR) << "cal error while min" << mMin << ">" << mMax; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| if (mMin == mMax) { | |||
| if (mMin != 0.0f) { | |||
| MS_LOG(ERROR) << "min and max should both be zero if they are equal to each other"; | |||
| return RET_ERROR; | |||
| } | |||
| quantParam->inited = true; | |||
| quantParam->min = mMin; | |||
| quantParam->max = mMax; | |||
| quantParam->scale = 0.0f; | |||
| quantParam->zeroPoint = 0; | |||
| quantParam->narrowRange = narrowRange; | |||
| quantParam->numBits = num_bits; | |||
| return RET_OK; | |||
| } | |||
| auto quantMinFloat = static_cast<double>(quant_min); | |||
| auto quantMaxFloat = static_cast<double>(quant_max); | |||
| if (fabs(quantMaxFloat - quantMinFloat) <= 0.0f) { | |||
| MS_LOG(ERROR) << "divisor cannot be 0"; | |||
| return RET_ERROR; | |||
| } | |||
| double scale = (mMax - mMin) / (quantMaxFloat - quantMinFloat); | |||
| if (fabs(scale) <= 0.0f) { | |||
| MS_LOG(ERROR) << "divisor 'scale' cannot be 0"; | |||
| return RET_ERROR; | |||
| } | |||
| const double zeroPointFromMin = quantMinFloat - mMin / scale; | |||
| int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin)); | |||
| // The zero point should always be in the range of quantized value, | |||
| // [qmin, qmax]. | |||
| MS_ASSERT(zeroPoint >= quant_min); | |||
| MS_ASSERT(zeroPoint <= quant_max); | |||
| quantParam->inited = true; | |||
| quantParam->min = mMin; | |||
| quantParam->max = mMax; | |||
| quantParam->scale = scale; | |||
| quantParam->zeroPoint = zeroPoint; | |||
| quantParam->narrowRange = narrowRange; | |||
| quantParam->numBits = num_bits; | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,234 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_ | |||
| #define MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_ | |||
| #include <float.h> | |||
| #include <cmath> | |||
| #include <climits> | |||
| #include <limits> | |||
| #include <algorithm> | |||
| #include <vector> | |||
| #include "include/errorcode.h" | |||
| #include "src/common/log_adapter.h" | |||
| #include "ir/dtype/type_id.h" | |||
| namespace mindspore { | |||
| namespace schema { | |||
| struct QuantParamT; | |||
| } | |||
| namespace lite { | |||
| const int RET_QUANT_CONTINUE = 2; | |||
| static constexpr double SCALE_THREASHOLD = 1e-38; | |||
| static constexpr int kPerTensor = 1; | |||
| inline int QuantMax(int bits, TypeId type) { | |||
| if (type == kNumberTypeInt8) { | |||
| return (1 << (bits - 1)) - 1; | |||
| } else if (type == kNumberTypeUInt8) { | |||
| return (1 << bits) - 1; | |||
| } | |||
| return 0; | |||
| } | |||
| inline int QuantMin(int bits, TypeId type) { | |||
| if (type == kNumberTypeInt8) { | |||
| return -(1 << (bits - 1)); | |||
| } | |||
| return 0; | |||
| } | |||
| void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas, | |||
| bool channel_at_first, float *desired_max, float *desired_min); | |||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, | |||
| int quant_min, int num_bits); | |||
| template <typename T> | |||
| T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | |||
| MS_ASSERT(quantParam != nullptr); | |||
| MS_ASSERT(quantParam->inited); | |||
| const auto scale = quantParam->scale; | |||
| const auto zeroPoint = quantParam->zeroPoint; | |||
| const auto numBit = quantParam->numBits; | |||
| const auto narrowRange = quantParam->narrowRange; | |||
| double maxLimitTemp = static_cast<float>((1 << (unsigned int)numBit) - 1); | |||
| const double maxLimit = static_cast<float>(maxLimitTemp - zeroPoint + std::numeric_limits<T>::min()) * scale; | |||
| double minLimit; | |||
| if (narrowRange) { | |||
| minLimit = static_cast<float>(std::numeric_limits<T>::min() + 1 - zeroPoint) * scale; | |||
| } else { | |||
| minLimit = static_cast<float>(std::numeric_limits<T>::min() - zeroPoint) * scale; | |||
| } | |||
| return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] { | |||
| double tmp; | |||
| if (originData > maxLimit) { | |||
| tmp = maxLimit; | |||
| } else if (originData < minLimit) { | |||
| tmp = minLimit; | |||
| } else { | |||
| tmp = originData; | |||
| } | |||
| auto quantData = static_cast<T>(std::round(zeroPoint + tmp / scale)); | |||
| return quantData; | |||
| }(); | |||
| } | |||
| template <typename T> | |||
| T QuantizeData(float originData, const schema::QuantParamT *quantParam, int quant_max, int quant_min) { | |||
| MS_ASSERT(quantParam != nullptr); | |||
| MS_ASSERT(quantParam->inited); | |||
| const auto scale = quantParam->scale; | |||
| const int zeroPoint = quantParam->zeroPoint; | |||
| const int maxLimit = quant_max; | |||
| const int minLimit = quant_min; | |||
| if (scale <= SCALE_THREASHOLD) { | |||
| return 0; | |||
| } | |||
| return [maxLimit, minLimit, zeroPoint, scale, originData] { | |||
| auto quant_data = std::round(originData / scale + zeroPoint); | |||
| if (quant_data > maxLimit) { | |||
| quant_data = maxLimit; | |||
| } else if (quant_data < minLimit) { | |||
| quant_data = minLimit; | |||
| } | |||
| return static_cast<T>(quant_data); | |||
| }(); | |||
| } | |||
| template <typename T> | |||
| STATUS DoPerLayerQuant(const float *raw_datas, size_t elem_count, std::vector<schema::QuantParamT> *quant_params, | |||
| const int &quant_max, const int &quant_min, const size_t &bit_num, const bool &k_means, | |||
| std::vector<T> *quant_datas) { | |||
| float min = FLT_MAX; | |||
| float max = -FLT_MIN; | |||
| for (uint32_t i = 0; i < elem_count; i++) { | |||
| min = std::min(min, raw_datas[i]); | |||
| max = std::max(max, raw_datas[i]); | |||
| } | |||
| schema::QuantParamT quant_param; | |||
| if (!k_means) { | |||
| STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "CalQuantizationParams failed" << status; | |||
| return status; | |||
| } | |||
| } | |||
| quant_params->emplace_back(quant_param); | |||
| // update data and datatype | |||
| for (uint32_t i = 0; i < elem_count; i++) { | |||
| float raw_data = raw_datas[i]; | |||
| if (!k_means) { | |||
| auto quant_data = QuantizeData<T>(raw_data, &quant_param, quant_max, quant_min); | |||
| (*quant_datas)[i] = quant_data; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| template <typename T> | |||
| STATUS DoPerChannelQuant(const float *raw_datas, size_t elem_count, const schema::QuantType &quant_type, | |||
| std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min, | |||
| const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas, int channels, | |||
| bool channel_at_first = true) { | |||
| static const int quant_param_size = 32 * 8; | |||
| std::vector<float> dequant_datas(quant_datas->size()); | |||
| if (channels <= 0) { | |||
| MS_LOG(ERROR) << "channels must be greater than 0"; | |||
| return RET_ERROR; | |||
| } | |||
| size_t one_filter_size = elem_count / channels; | |||
| bool do_quant = quant_param_size / (sizeof(float) * 8 - bit_num) < one_filter_size; | |||
| if (!do_quant && quant_type == schema::QuantType_WeightQuant) { | |||
| MS_LOG(INFO) << "too few elements in a filter, no need to quantize. " << one_filter_size; | |||
| return RET_QUANT_CONTINUE; | |||
| } | |||
| for (int i = 0; i < channels; i++) { | |||
| float min = FLT_MAX; | |||
| float max = -FLT_MAX; | |||
| GetMaxMinPerchannel(channels, one_filter_size, i, elem_count, raw_datas, channel_at_first, &max, &min); | |||
| schema::QuantParamT quant_param; | |||
| STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "CalQuantizationParams failed" << status; | |||
| return status; | |||
| } | |||
| // do quantization | |||
| double average_dequant = 0; | |||
| double average_raw = 0; | |||
| for (uint32_t j = 0; j < one_filter_size; j++) { | |||
| auto index = j + i * one_filter_size; | |||
| if (!channel_at_first) { | |||
| index = j * channels + i; | |||
| } | |||
| MS_ASSERT(index < elem_count); | |||
| float raw_data = raw_datas[index]; | |||
| auto quant_data = QuantizeData<T>(raw_data, &quant_param, quant_max, quant_min); | |||
| (*quant_datas)[index] = quant_data; | |||
| if (quant_type == schema::QuantType_WeightQuant) { | |||
| float dequant_data = quant_param.scale * (quant_data - quant_param.zeroPoint); | |||
| dequant_datas[index] = dequant_data; | |||
| average_dequant += dequant_data; | |||
| average_raw += raw_data; | |||
| } | |||
| } | |||
| if (quant_type == schema::QuantType_WeightQuant && !k_means) { | |||
| // mean | |||
| average_dequant = average_dequant / one_filter_size; | |||
| average_raw = average_raw / one_filter_size; | |||
| // std | |||
| double variance_dequant = 0; | |||
| double variance_raw = 0; | |||
| for (uint32_t j = 0; j < one_filter_size; j++) { | |||
| auto index = j + i * one_filter_size; | |||
| if (!channel_at_first) { | |||
| index = j * channels + i; | |||
| } | |||
| MS_ASSERT(index < elem_count); | |||
| variance_dequant += std::pow(dequant_datas[index] - average_dequant, 2); | |||
| variance_raw += std::pow(raw_datas[index] - average_raw, 2); | |||
| } | |||
| variance_dequant = std::sqrt(variance_dequant / one_filter_size); | |||
| variance_raw = std::sqrt(variance_raw / one_filter_size); | |||
| quant_param.varCorr = 1; | |||
| if (variance_raw != 0 && variance_dequant != 0) { | |||
| auto temp_var_corr = variance_raw / variance_dequant; | |||
| if (temp_var_corr > 0 && temp_var_corr < 10) { | |||
| quant_param.varCorr = temp_var_corr; | |||
| } else { | |||
| MS_LOG(WARNING) << "unexpected var_corr: " << temp_var_corr; | |||
| } | |||
| } | |||
| quant_param.meanCorr = average_raw - average_dequant * quant_param.varCorr; | |||
| } | |||
| quant_params->emplace_back(quant_param); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_COMMON_QUANT_UTILS_H_ | |||
| @@ -23,46 +23,93 @@ | |||
| #include <set> | |||
| #include "schema/inner/model_generated.h" | |||
| #include "src/train/train_utils.h" | |||
| #include "src/common/quant_utils.h" | |||
| #include "tools/common/storage.h" | |||
| namespace mindspore { | |||
| namespace lite { | |||
| std::vector<uint8_t> TrainExport::CreateData(const mindspore::lite::Tensor *tensor) { | |||
| std::vector<uint8_t> TrainExport::CreateData(const lite::Tensor *tensor) { | |||
| uint8_t *tensor_data = reinterpret_cast<uint8_t *>(tensor->data_c()); | |||
| auto size = tensor->Size(); | |||
| std::vector<uint8_t> data(tensor_data, tensor_data + size); | |||
| return data; | |||
| } | |||
| bool TrainExport::NeedQuantization(const lite::Tensor *tensor) { | |||
| return (tensor->quant_params().size() > 0 && tensor->quant_params().at(0).inited); | |||
| } | |||
| schema::QuantType TrainExport::GetNodeQuantType(const kernel::LiteKernel *kernel) { | |||
| if (std::any_of(kernel->in_tensors().cbegin(), kernel->in_tensors().cend(), [](const lite::Tensor *t) { | |||
| return (t->IsConst() && (t->quant_params().size() > 0) && (t->quant_params().at(0).inited)); | |||
| })) { | |||
| return schema::QuantType_QUANT_WEIGHT; | |||
| } | |||
| return schema::QuantType_QUANT_NONE; | |||
| } | |||
| int TrainExport::QuantTensorData(schema::TensorT *dest_tensor, const lite::Tensor *src_tensor) { | |||
| int channels = src_tensor->quant_params().size(); | |||
| if (channels < 1) { | |||
| MS_LOG(ERROR) << "Quant Params is empty"; | |||
| return RET_ERROR; | |||
| } | |||
| int bit_num = src_tensor->quant_params().at(0).bitNum; | |||
| int quant_max = QuantMax(bit_num, kNumberTypeInt8); | |||
| int quant_min = QuantMin(bit_num, kNumberTypeInt8); | |||
| std::vector<int8_t> data(src_tensor->ElementsNum()); | |||
| std::vector<schema::QuantParamT> quant_params; | |||
| STATUS ret = RET_OK; | |||
| if (channels == kPerTensor) { | |||
| ret = DoPerLayerQuant<int8_t>(reinterpret_cast<float *>(src_tensor->data_c()), src_tensor->ElementsNum(), | |||
| &(quant_params), quant_max, quant_min, bit_num, false, &data); | |||
| } else { | |||
| bool channel_at_first = (src_tensor->shape().at(0) == channels); | |||
| ret = DoPerChannelQuant<int8_t>(reinterpret_cast<float *>(src_tensor->data_c()), src_tensor->ElementsNum(), | |||
| schema::QuantType_WeightQuant, &(quant_params), quant_max, quant_min, bit_num, | |||
| false, &data, channels, channel_at_first); | |||
| } | |||
| if (ret == RET_QUANT_CONTINUE) { | |||
| MS_LOG(DEBUG) << "No Need to quant per channel"; | |||
| return RET_OK; | |||
| } | |||
| if (ret == RET_ERROR) { | |||
| MS_LOG(ERROR) << "QuantTensorData error, channels = " << channels; | |||
| return ret; | |||
| } | |||
| if (quant_params.empty()) { | |||
| MS_LOG(ERROR) << "quant_params empty"; | |||
| return RET_ERROR; | |||
| } | |||
| dest_tensor->data = std::vector<uint8_t>(data.data(), data.data() + data.size()); | |||
| dest_tensor->dataType = kNumberTypeInt8; | |||
| dest_tensor->quantParams.clear(); | |||
| for (auto quant_param : quant_params) { | |||
| dest_tensor->quantParams.emplace_back(std::make_unique<schema::QuantParamT>(quant_param)); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| std::unique_ptr<schema::TensorT> TrainExport::CreateTensor(const mindspore::lite::Tensor *tensor, | |||
| schema::Tensor *scTensor) { | |||
| auto tensorT = std::make_unique<schema::TensorT>(); | |||
| tensorT->nodeType = scTensor->nodeType(); | |||
| tensorT->dataType = tensor->data_type(); | |||
| tensorT->dims = tensor->shape(); | |||
| tensorT->format = tensor->format(); | |||
| tensorT->name = tensor->tensor_name(); | |||
| tensorT->refCount = 0; | |||
| tensorT->offset = 0; | |||
| tensorT->dataType = tensor->data_type(); | |||
| tensorT->enableHuffmanCode = false; | |||
| if ((tensorT->nodeType == NodeType_ValueNode) && (scTensor->data() != nullptr) && (scTensor->data()->size() > 0)) { | |||
| tensorT->data = CreateData(tensor); | |||
| } | |||
| for (auto quant_param : tensor->quant_params()) { | |||
| auto quantParamT = std::make_unique<schema::QuantParamT>(); | |||
| quantParamT->scale = quant_param.scale; | |||
| quantParamT->zeroPoint = quant_param.zeroPoint; | |||
| quantParamT->min = 0; | |||
| quantParamT->max = 0; | |||
| quantParamT->narrowRange = true; | |||
| quantParamT->numBits = quant_param.bitNum; | |||
| quantParamT->inited = quant_param.inited; | |||
| quantParamT->varCorr = quant_param.var_corr; | |||
| quantParamT->meanCorr = quant_param.mean_corr; | |||
| quantParamT->dstDtype = quant_param.dstDtype; | |||
| quantParamT->roundType = quant_param.roundType; | |||
| quantParamT->multiplier = quant_param.multiplier; | |||
| tensorT->quantParams.emplace_back(std::move(quantParamT)); | |||
| if (NeedQuantization(tensor)) { | |||
| QuantTensorData(tensorT.get(), tensor); | |||
| } else { | |||
| tensorT->data = CreateData(tensor); | |||
| } | |||
| } | |||
| tensorT->quantClusters = tensor->quant_clusters(); | |||
| return tensorT; | |||
| @@ -85,7 +132,7 @@ std::unique_ptr<schema::CNodeT> TrainExport::CreateCNode(const mindspore::kernel | |||
| cnodeT->inputIndex = inputIndex; | |||
| cnodeT->outputIndex = outputIndex; | |||
| cnodeT->name = kernel->name(); | |||
| cnodeT->quantType = schema::QuantType_QUANT_NONE; | |||
| cnodeT->quantType = GetNodeQuantType(kernel); | |||
| // find kernel in model | |||
| auto *node = FindNode(kernel); | |||
| if (node == nullptr) { | |||
| @@ -132,7 +179,6 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern | |||
| MS_LOG(ERROR) << "cannot find tensor " + tensor->ToString() + " in model"; | |||
| return RET_ERROR; | |||
| } | |||
| out_set.insert(id); | |||
| auto it = remap.find(id); | |||
| if (it == remap.end()) { | |||
| remap[id] = tensor_idx; | |||
| @@ -153,7 +199,7 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern | |||
| schema::Tensor *scTensor = model_->all_tensors_.at(id); | |||
| auto tensorT = CreateTensor(tensor, scTensor); | |||
| // find a tensor which is not an output | |||
| if (out_set.find(id) == out_set.end()) { | |||
| if (out_set.find(remap[id]) == out_set.end()) { | |||
| if ((tensorT->nodeType == NodeType_ValueNode) && (tensorT->data.size() == 0)) { | |||
| meta_graph->inputIndex.push_back(remap[id]); | |||
| } | |||
| @@ -165,7 +211,7 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern | |||
| meta_graph->allTensors.emplace_back(std::move(tensorT)); | |||
| } | |||
| auto graph = meta_graph.release(); | |||
| int err = SaveToFile(graph, file_name_); | |||
| int err = Storage::Save(*graph, file_name_); | |||
| if (err != RET_OK) { | |||
| MS_LOG(ERROR) << "failed to save flatbuffer file " << file_name_; | |||
| } | |||
| @@ -173,30 +219,5 @@ int TrainExport::Export(const std::vector<mindspore::kernel::LiteKernel *> &kern | |||
| return err; | |||
| } | |||
| int TrainExport::SaveToFile(const schema::MetaGraphT *graph, const std::string &outputPath) { | |||
| flatbuffers::FlatBufferBuilder builder(1024); | |||
| auto offset = schema::MetaGraph::Pack(builder, graph); | |||
| builder.Finish(offset); | |||
| schema::FinishMetaGraphBuffer(builder, offset); | |||
| int size = builder.GetSize(); | |||
| auto content = builder.GetBufferPointer(); | |||
| if (content == nullptr) { | |||
| MS_LOG(ERROR) << "GetBufferPointer nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| if (access((outputPath + ".ms").c_str(), F_OK) == 0) { | |||
| chmod((outputPath + ".ms").c_str(), S_IWUSR); | |||
| } | |||
| std::ofstream output(outputPath + ".ms", std::ofstream::binary); | |||
| if (!output.is_open()) { | |||
| MS_LOG(ERROR) << "Can not open output file: " << outputPath << ".ms"; | |||
| return RET_ERROR; | |||
| } | |||
| output.write((const char *)content, size); | |||
| output.close(); | |||
| chmod((outputPath + ".ms").c_str(), S_IRUSR); | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| @@ -50,7 +50,10 @@ class TrainExport { | |||
| std::unique_ptr<schema::TensorT> CreateTensor(const mindspore::lite::Tensor *tensor, schema::Tensor *scTensor); | |||
| std::unique_ptr<schema::CNodeT> CreateCNode(const mindspore::kernel::LiteKernel *kernel, | |||
| std::vector<uint32_t> inputIndex, std::vector<uint32_t> outputIndex); | |||
| int SaveToFile(const schema::MetaGraphT *graph, const std::string &outputPath); | |||
| bool NeedQuantization(const mindspore::lite::Tensor *tensor); | |||
| virtual int QuantTensorData(schema::TensorT *dest_tensor, const mindspore::lite::Tensor *src_tensor); | |||
| mindspore::schema::QuantType GetNodeQuantType(const mindspore::kernel::LiteKernel *kernel); | |||
| }; | |||
| }; // namespace lite | |||
| } // namespace mindspore | |||
| @@ -32,7 +32,7 @@ std::vector<bool> StringToBitVector(const std::string &str) { | |||
| } | |||
| STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) { | |||
| MS_LOG(ERROR) << "un-index weight"; | |||
| MS_LOG(DEBUG) << "un-index weight"; | |||
| auto bit_num = src_tensor.quantParams()->Get(0)->numBits(); | |||
| std::string str(reinterpret_cast<const char *>(src_tensor.data()->data()), src_tensor.data()->size()); | |||
| @@ -159,6 +159,7 @@ set(TEST_LITE_SRC | |||
| ${LITE_DIR}/src/common/utils.cc | |||
| ${LITE_DIR}/src/common/loader_util.cc | |||
| ${LITE_DIR}/src/common/string_util.cc | |||
| ${LITE_DIR}/src/common/quant_utils.cc | |||
| ${LITE_DIR}/tools/common/flag_parser.cc | |||
| ${LITE_DIR}/tools/benchmark/benchmark.cc | |||
| ${LITE_DIR}/test/st/benchmark_test.cc | |||
| @@ -306,6 +307,7 @@ if(SUPPORT_TRAIN) | |||
| ${LITE_DIR}/src/train/train_utils.cc | |||
| ${LITE_DIR}/src/train/transfer_session.cc | |||
| ${LITE_DIR}/src/lite_session.cc | |||
| ${LITE_DIR}/tools/common/storage.cc | |||
| ) | |||
| else() | |||
| set(TEST_LITE_SRC | |||
| @@ -397,6 +399,10 @@ endif() | |||
| add_executable(lite-test ${TEST_SRC}) | |||
| add_dependencies(lite-test fbs_src) | |||
| if(SUPPORT_TRAIN) | |||
| add_dependencies(lite-test fbs_inner_src) | |||
| endif() | |||
| target_link_libraries(lite-test dl mindspore::gtest) | |||
| if(PLATFORM_ARM AND ENABLE_FP16) | |||
| @@ -11,7 +11,7 @@ googlenet | |||
| densenet | |||
| shufflenetv2 | |||
| mini_alexnet weight_quant 2 | |||
| nin weight_quant 7 | |||
| nin weight_quant 9 | |||
| lenet weight_quant 5 | |||
| mobilenetv1 weight_quant 2 | |||
| mobilenetv2 weight_quant 2 | |||
| @@ -82,22 +82,27 @@ function Run_x86() { | |||
| model_prefix=${line_array[0]} | |||
| model_name=${line_array[0]}'_train' | |||
| accuracy_limit=0.5 | |||
| export_file="" | |||
| inference_file="" | |||
| if [[ $model_name == \#* ]]; then | |||
| continue | |||
| fi | |||
| if [[ "${line_array[1]}" == "weight_quant" ]]; then | |||
| model_name=${line_array[0]}'_train_quant' | |||
| accuracy_limit=${line_array[2]} | |||
| else | |||
| export_file="${ms_models_path}/${model_name}_tod" | |||
| rm -f ${export_file}"*" | |||
| fi | |||
| if [[ "${save_lite}" == "1" ]]; then | |||
| inference_file="${ms_models_path}/${model_name}_infer" | |||
| fi | |||
| inference_file="${ms_models_path}/${model_name}_infer" | |||
| rm -f ${inference_file}"*" | |||
| echo ${model_name} >> "${run_x86_log_file}" | |||
| ${run_valgrind}./tools/benchmark_train/benchmark_train \ | |||
| --modelFile=${ms_models_path}/${model_name}.ms \ | |||
| --inDataFile=${train_io_path}/${model_prefix}_input1.bin,${train_io_path}/${model_prefix}_input2.bin \ | |||
| --inDataFile=${train_io_path}/${model_prefix}_input \ | |||
| --expectedDataFile=${train_io_path}/${model_prefix}_output --epochs=${epoch_num} --numThreads=${threads} \ | |||
| --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} >> "${run_x86_log_file}" | |||
| --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} \ | |||
| --exportFile=${export_file} >> "${run_x86_log_file}" | |||
| if [ $? = 0 ]; then | |||
| run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file} | |||
| else | |||
| @@ -168,21 +173,22 @@ function Run_arm() { | |||
| model_prefix=${line_array[0]} | |||
| model_name=${line_array[0]}'_train' | |||
| accuracy_limit=0.5 | |||
| export_file="" | |||
| if [[ $model_name == \#* ]]; then | |||
| continue | |||
| fi | |||
| if [[ "${line_array[1]}" == "weight_quant" ]]; then | |||
| model_name=${line_array[0]}'_train_quant' | |||
| accuracy_limit=${line_array[2]} | |||
| else | |||
| export_file="${tmp_dir}/${model_name}_tod" | |||
| fi | |||
| inference_file="${tmp_dir}/${model_name}_infer" | |||
| if [[ "${line_array[1]}" == "noarm32" ]] && [[ "$1" == arm32 ]]; then | |||
| run_result=$1': '${model_name}' irrelevant'; echo ${run_result} >> ${run_benchmark_train_result_file} | |||
| continue | |||
| fi | |||
| if [[ "${save_lite}" == "1" ]]; then | |||
| inference_file="${ms_models_path}/${model_name}_infer" | |||
| fi | |||
| # run benchmark_train test without clib data | |||
| echo ${model_name} >> "${run_arm_log_file}" | |||
| adb -s ${device_id} push ${train_io_path}/${model_prefix}_input*.bin ${train_io_path}/${model_prefix}_output*.bin /data/local/tmp/benchmark_train_test >> ${adb_push_log_file} | |||
| @@ -193,15 +199,20 @@ function Run_arm() { | |||
| elif [ "$1" == arm32 ]; then | |||
| echo 'cp /data/local/tmp/arm32/libc++_shared.so ./' >> ${adb_cmd_run_file} | |||
| fi | |||
| echo "rm -f ${tmp_dir}/${model_name}_exported.ms" >> ${run_arm_log_file} | |||
| echo "rm -f ${tmp_dir}/${model_name}_exported.ms" >> ${adb_cmd_run_file} | |||
| adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file} | |||
| echo "rm -f ${export_file} ${inference_file}.ms" >> ${run_arm_log_file} | |||
| echo "rm -f ${export_file} ${inference_file}.ms" >> ${adb_cmd_run_file} | |||
| adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file} | |||
| adb_cmd=$(cat <<-ENDM | |||
| export LD_LIBRARY_PATH=./:/data/local/tmp/:/data/local/tmp/benchmark_train_test;./benchmark_train \ | |||
| --epochs=${epoch_num} \ | |||
| --modelFile=${model_name}.ms \ | |||
| --inDataFile=${tmp_dir}/${model_prefix}_input1.bin,${tmp_dir}/${model_prefix}_input2.bin \ | |||
| --inDataFile=${tmp_dir}/${model_prefix}_input \ | |||
| --expectedDataFile=${tmp_dir}/${model_prefix}_output \ | |||
| --numThreads=${threads} --accuracyThreshold=${accuracy_limit} --inferenceFile=${inference_file} | |||
| --numThreads=${threads} \ | |||
| --accuracyThreshold=${accuracy_limit} \ | |||
| --inferenceFile=${inference_file} \ | |||
| --exportFile=${export_file} | |||
| ENDM | |||
| ) | |||
| echo "${adb_cmd}" >> ${run_arm_log_file} | |||
| @@ -252,7 +263,7 @@ models_mindspore_train_config=${basepath}/models_ms_train.cfg | |||
| epoch_num=1 | |||
| threads=2 | |||
| train_io_path="" | |||
| while getopts "r:M:c:m:d:i:e:vt:q:DF" opt; do | |||
| while getopts "r:M:c:m:d:i:e:vt:q:D" opt; do | |||
| case ${opt} in | |||
| r) | |||
| release_path=${OPTARG} | |||
| @@ -295,8 +306,6 @@ while getopts "r:M:c:m:d:i:e:vt:q:DF" opt; do | |||
| epoch_num=${OPTARG} | |||
| echo "train epoch num is ${epoch_num}" | |||
| ;; | |||
| F) save_lite=1 | |||
| ;; | |||
| ?) | |||
| echo "unknown para" | |||
| exit 1;; | |||
| @@ -107,7 +107,7 @@ static STATUS CompressTensor(schema::TensorT *tensor_input, const std::unique_pt | |||
| int bit_num = tensor_input->quantParams.at(0)->numBits; | |||
| // Pack Repetition | |||
| auto repetition_packed = false; | |||
| MS_LOG(ERROR) << dst_node->name; | |||
| MS_LOG(DEBUG) << dst_node->name; | |||
| if (dst_node->quantType == schema::QuantType_QUANT_WEIGHT) { | |||
| if (bit_num <= 8) { | |||
| repetition_packed = PackRepetition<int8_t>(bit_num, tensor_input); | |||
| @@ -32,8 +32,6 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| static const char *DELIM_COLON = ":"; | |||
| static const char *DELIM_COMMA = ","; | |||
| static const char *DELIM_SLASH = "/"; | |||
| namespace { | |||
| @@ -81,8 +79,8 @@ int NetTrain::GenerateRandomData(size_t size, void *data) { | |||
| return RET_OK; | |||
| } | |||
| int NetTrain::GenerateInputData() { | |||
| for (auto tensor : ms_inputs_) { | |||
| int NetTrain::GenerateInputData(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) { | |||
| for (auto tensor : *ms_inputs) { | |||
| MS_ASSERT(tensor != nullptr); | |||
| auto input_data = tensor->MutableData(); | |||
| if (input_data == nullptr) { | |||
| @@ -100,16 +98,16 @@ int NetTrain::GenerateInputData() { | |||
| return RET_OK; | |||
| } | |||
| int NetTrain::LoadInput() { | |||
| int NetTrain::LoadInput(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) { | |||
| if (flags_->in_data_file_.empty()) { | |||
| auto status = GenerateInputData(); | |||
| auto status = GenerateInputData(ms_inputs); | |||
| if (status != RET_OK) { | |||
| std::cerr << "Generate input data error " << status << std::endl; | |||
| MS_LOG(ERROR) << "Generate input data error " << status; | |||
| return status; | |||
| } | |||
| } else { | |||
| auto status = ReadInputFile(); | |||
| auto status = ReadInputFile(ms_inputs); | |||
| if (status != RET_OK) { | |||
| std::cerr << "ReadInputFile error, " << status << std::endl; | |||
| MS_LOG(ERROR) << "ReadInputFile error, " << status; | |||
| @@ -119,8 +117,8 @@ int NetTrain::LoadInput() { | |||
| return RET_OK; | |||
| } | |||
| int NetTrain::ReadInputFile() { | |||
| if (ms_inputs_.empty()) { | |||
| int NetTrain::ReadInputFile(std::vector<mindspore::tensor::MSTensor *> *ms_inputs) { | |||
| if (ms_inputs->empty()) { | |||
| return RET_OK; | |||
| } | |||
| @@ -128,16 +126,12 @@ int NetTrain::ReadInputFile() { | |||
| MS_LOG(ERROR) << "Not supported image input"; | |||
| return RET_ERROR; | |||
| } else { | |||
| if (ms_inputs_.size() > flags_->input_data_list_.size()) { | |||
| MS_LOG(ERROR) << "missing input files expecting " << ms_inputs_.size() << ",got " | |||
| << flags_->input_data_list_.size(); | |||
| return RET_ERROR; | |||
| } | |||
| for (size_t i = 0; i < ms_inputs_.size(); i++) { | |||
| auto cur_tensor = ms_inputs_.at(i); | |||
| for (size_t i = 0; i < ms_inputs->size(); i++) { | |||
| auto cur_tensor = ms_inputs->at(i); | |||
| MS_ASSERT(cur_tensor != nullptr); | |||
| size_t size; | |||
| char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size); | |||
| std::string file_name = flags_->in_data_file_ + std::to_string(i + 1) + ".bin"; | |||
| char *bin_buf = ReadFile(file_name.c_str(), &size); | |||
| if (bin_buf == nullptr) { | |||
| MS_LOG(ERROR) << "ReadFile return nullptr"; | |||
| return RET_ERROR; | |||
| @@ -158,94 +152,12 @@ int NetTrain::ReadInputFile() { | |||
| return RET_OK; | |||
| } | |||
| int NetTrain::CompareOutput() { | |||
| std::cout << "================ Comparing Output data ================" << std::endl; | |||
| float total_bias = 0; | |||
| int total_size = 0; | |||
| bool has_error = false; | |||
| auto tensors_list = session_->GetOutputs(); | |||
| if (tensors_list.empty()) { | |||
| MS_LOG(ERROR) << "Cannot find output tensors, get model output failed"; | |||
| return RET_ERROR; | |||
| } | |||
| mindspore::tensor::MSTensor *tensor = nullptr; | |||
| int i = 1; | |||
| for (auto it = tensors_list.begin(); it != tensors_list.end(); ++it) { | |||
| tensor = session_->GetOutputByTensorName(it->first); | |||
| std::cout << "output is tensor " << it->first << "\n"; | |||
| auto outputs = tensor->MutableData(); | |||
| size_t size; | |||
| std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin"; | |||
| auto *bin_buf = ReadFileBuf(output_file.c_str(), &size); | |||
| if (bin_buf == nullptr) { | |||
| MS_LOG(ERROR) << "ReadFile return nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| if (flags_->enable_fp16_ && tensor->data_type() == kNumberTypeFloat16) { | |||
| if (static_cast<int>(size / sizeof(float)) != tensor->ElementsNum()) { | |||
| MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size() | |||
| << ", read size: " << size / sizeof(float); | |||
| return RET_ERROR; | |||
| } | |||
| } else { | |||
| if (size != tensor->Size()) { | |||
| MS_LOG(ERROR) << "Output buffer and output file differ by size. Tensor size: " << tensor->Size() | |||
| << ", read size: " << size; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| float bias = 0.f; | |||
| if (flags_->enable_fp16_ && tensor->data_type() == kNumberTypeFloat16) { | |||
| #ifdef ENABLE_FP16 | |||
| bias = CompareData<float16_t>(bin_buf, tensor->ElementsNum(), reinterpret_cast<float16_t *>(outputs)); | |||
| #endif | |||
| } else { | |||
| bias = CompareData<float>(bin_buf, tensor->ElementsNum(), reinterpret_cast<float *>(outputs)); | |||
| } | |||
| if (bias >= 0) { | |||
| total_bias += bias; | |||
| total_size++; | |||
| } else { | |||
| has_error = true; | |||
| break; | |||
| } | |||
| i++; | |||
| delete[] bin_buf; | |||
| } | |||
| if (!has_error) { | |||
| float mean_bias; | |||
| if (total_size != 0) { | |||
| mean_bias = total_bias / total_size * 100; | |||
| } else { | |||
| mean_bias = 0; | |||
| } | |||
| std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" | |||
| << " threshold is:" << this->flags_->accuracy_threshold_ << std::endl; | |||
| std::cout << "=======================================================" << std::endl << std::endl; | |||
| if (mean_bias > this->flags_->accuracy_threshold_) { | |||
| MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%"; | |||
| std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl; | |||
| return RET_ERROR; | |||
| } else { | |||
| return RET_OK; | |||
| } | |||
| } else { | |||
| MS_LOG(ERROR) << "Error in CompareData"; | |||
| std::cerr << "Error in CompareData" << std::endl; | |||
| std::cout << "=======================================================" << std::endl << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lite_session) { | |||
| int NetTrain::CompareOutput(const session::LiteSession &lite_session) { | |||
| std::cout << "================ Comparing Forward Output data ================" << std::endl; | |||
| float total_bias = 0; | |||
| int total_size = 0; | |||
| bool has_error = false; | |||
| auto tensors_list = lite_session->GetOutputs(); | |||
| auto tensors_list = lite_session.GetOutputs(); | |||
| if (tensors_list.empty()) { | |||
| MS_LOG(ERROR) << "Cannot find output tensors, get model output failed"; | |||
| return RET_ERROR; | |||
| @@ -253,9 +165,9 @@ int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lit | |||
| mindspore::tensor::MSTensor *tensor = nullptr; | |||
| int i = 1; | |||
| for (auto it = tensors_list.begin(); it != tensors_list.end(); ++it) { | |||
| tensor = lite_session->GetOutputByTensorName(it->first); | |||
| tensor = lite_session.GetOutputByTensorName(it->first); | |||
| std::cout << "output is tensor " << it->first << "\n"; | |||
| auto outputs = tensor->MutableData(); | |||
| auto outputs = tensor->data(); | |||
| size_t size; | |||
| std::string output_file = flags_->data_file_ + std::to_string(i) + ".bin"; | |||
| auto *bin_buf = ReadFileBuf(output_file.c_str(), &size); | |||
| @@ -307,7 +219,7 @@ int NetTrain::CompareOutputLite(const std::unique_ptr<session::LiteSession> &lit | |||
| } | |||
| } | |||
| int NetTrain::MarkPerformance() { | |||
| int NetTrain::MarkPerformance(session::TrainSession *session) { | |||
| MS_LOG(INFO) << "Running train loops..."; | |||
| std::cout << "Running train loops..." << std::endl; | |||
| uint64_t time_min = 0xFFFFFFFFFFFFFFFF; | |||
| @@ -315,10 +227,10 @@ int NetTrain::MarkPerformance() { | |||
| uint64_t time_avg = 0; | |||
| for (int i = 0; i < flags_->epochs_; i++) { | |||
| session_->BindThread(true); | |||
| session->BindThread(true); | |||
| auto start = GetTimeUs(); | |||
| auto status = | |||
| flags_->time_profiling_ ? session_->RunGraph(before_call_back_, after_call_back_) : session_->RunGraph(); | |||
| flags_->time_profiling_ ? session->RunGraph(before_call_back_, after_call_back_) : session->RunGraph(); | |||
| if (status != 0) { | |||
| MS_LOG(ERROR) << "Inference error " << status; | |||
| std::cerr << "Inference error " << status; | |||
| @@ -330,7 +242,7 @@ int NetTrain::MarkPerformance() { | |||
| time_min = std::min(time_min, time); | |||
| time_max = std::max(time_max, time); | |||
| time_avg += time; | |||
| session_->BindThread(false); | |||
| session->BindThread(false); | |||
| } | |||
| if (flags_->time_profiling_) { | |||
| @@ -352,10 +264,9 @@ int NetTrain::MarkPerformance() { | |||
| return RET_OK; | |||
| } | |||
| int NetTrain::MarkAccuracy() { | |||
| int NetTrain::MarkAccuracy(session::LiteSession *session) { | |||
| MS_LOG(INFO) << "MarkAccuracy"; | |||
| std::cout << "MarkAccuracy" << std::endl; | |||
| for (auto &msInput : ms_inputs_) { | |||
| for (auto &msInput : session->GetInputs()) { | |||
| switch (msInput->data_type()) { | |||
| case TypeId::kNumberTypeFloat: | |||
| PrintInputData<float>(msInput); | |||
| @@ -371,16 +282,14 @@ int NetTrain::MarkAccuracy() { | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| session_->Eval(); | |||
| auto status = session_->RunGraph(before_call_back_, after_call_back_); | |||
| auto status = session->RunGraph(); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Inference error " << status; | |||
| std::cerr << "Inference error " << status << std::endl; | |||
| return status; | |||
| } | |||
| status = CompareOutput(); | |||
| status = CompareOutput(*session); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Compare output error " << status; | |||
| std::cerr << "Compare output error " << status << std::endl; | |||
| @@ -388,263 +297,107 @@ int NetTrain::MarkAccuracy() { | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NetTrain::MarkAccuracyLite(const std::unique_ptr<session::LiteSession> &lite_session) { | |||
| MS_LOG(INFO) << "MarkAccuracy"; | |||
| std::cout << "MarkAccuracy" << std::endl; | |||
| for (auto &msInput : ms_inputs_) { | |||
| switch (msInput->data_type()) { | |||
| case TypeId::kNumberTypeFloat: | |||
| PrintInputData<float>(msInput); | |||
| break; | |||
| case TypeId::kNumberTypeFloat32: | |||
| PrintInputData<float>(msInput); | |||
| break; | |||
| case TypeId::kNumberTypeInt32: | |||
| PrintInputData<int>(msInput); | |||
| break; | |||
| default: | |||
| MS_LOG(ERROR) << "Datatype " << msInput->data_type() << " is not supported."; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| auto status = lite_session->RunGraph(); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Inference error " << status; | |||
| std::cerr << "Inference error " << status << std::endl; | |||
| return status; | |||
| } | |||
| status = CompareOutputLite(lite_session); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Compare output error " << status; | |||
| std::cerr << "Compare output error " << status << std::endl; | |||
| return status; | |||
| static CpuBindMode FlagToBindMode(int flag) { | |||
| if (flag == 2) { | |||
| return MID_CPU; | |||
| } | |||
| return RET_OK; | |||
| if (flag == 1) { | |||
| return HIGHER_CPU; | |||
| } | |||
| return NO_BIND; | |||
| } | |||
| int NetTrain::RunExportedNet() { | |||
| int NetTrain::CreateAndRunNetwork(const std::string &filename, int train_session, int epochs) { | |||
| auto start_prepare_time = GetTimeUs(); | |||
| // Load graph | |||
| std::string model_name = flags_->export_file_.substr(flags_->export_file_.find_last_of(DELIM_SLASH) + 1); | |||
| MS_LOG(INFO) << "start reading exported model file"; | |||
| std::cout << "start reading exported model file" << std::endl; | |||
| auto context = std::make_shared<Context>(); | |||
| if (context == nullptr) { | |||
| MS_LOG(ERROR) << "New context failed while running " << model_name.c_str(); | |||
| std::cerr << "New context failed while running " << model_name.c_str() << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| if (flags_->cpu_bind_mode_ == 2) { | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU; | |||
| } else if (flags_->cpu_bind_mode_ == 1) { | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU; | |||
| } else { | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND; | |||
| } | |||
| context->thread_num_ = flags_->num_threads_; | |||
| auto *model = mindspore::lite::Model::Import(flags_->export_file_.c_str()); | |||
| std::string model_name = filename.substr(filename.find_last_of(DELIM_SLASH) + 1); | |||
| Context context; | |||
| context.device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = FlagToBindMode(flags_->cpu_bind_mode_); | |||
| context.device_list_[0].device_info_.cpu_device_info_.enable_float16_ = flags_->enable_fp16_; | |||
| context.device_list_[0].device_type_ = mindspore::lite::DT_CPU; | |||
| context.thread_num_ = flags_->num_threads_; | |||
| MS_LOG(INFO) << "start reading model file" << filename.c_str(); | |||
| std::cout << "start reading model file " << filename.c_str() << std::endl; | |||
| auto *model = mindspore::lite::Model::Import(filename.c_str()); | |||
| if (model == nullptr) { | |||
| MS_LOG(ERROR) << "create model for train session failed"; | |||
| return RET_ERROR; | |||
| } | |||
| session_ = session::TrainSession::CreateSession(model, context.get()); | |||
| if (session_ == nullptr) { | |||
| MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str(); | |||
| std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| if (flags_->loss_name_ != "") { | |||
| session_->SetLossName(flags_->loss_name_); | |||
| } | |||
| ms_inputs_ = session_->GetInputs(); | |||
| auto end_prepare_time = GetTimeUs(); | |||
| MS_LOG(INFO) << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms"; | |||
| std::cout << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl; | |||
| // Load input | |||
| MS_LOG(INFO) << "start generate input data"; | |||
| auto status = LoadInput(); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Generate input data error"; | |||
| return status; | |||
| } | |||
| if (!flags_->data_file_.empty()) { | |||
| MS_LOG(INFO) << "Check accuracy for exported model"; | |||
| std::cout << "Check accuracy for exported model " << std::endl; | |||
| status = MarkAccuracy(); | |||
| for (auto &data : data_) { | |||
| data.second->shape.clear(); | |||
| data.second->data.clear(); | |||
| delete data.second; | |||
| } | |||
| data_.clear(); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Run MarkAccuracy on exported model error: " << status; | |||
| std::cout << "Run MarkAccuracy on exported model error: " << status << std::endl; | |||
| return status; | |||
| session::LiteSession *session = nullptr; | |||
| session::TrainSession *t_session = nullptr; | |||
| if (train_session) { | |||
| t_session = session::TrainSession::CreateSession(model, &context); | |||
| if (t_session == nullptr) { | |||
| MS_LOG(ERROR) << "RunNetTrain CreateSession failed while running " << model_name.c_str(); | |||
| std::cout << "RunNetTrain CreateSession failed while running " << model_name.c_str() << std::endl; | |||
| delete model; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int NetTrain::RunExportedNetLite(std::string file_name) { | |||
| auto start_prepare_time = GetTimeUs(); | |||
| // Load graph | |||
| std::string model_name = file_name.substr(file_name.find_last_of(DELIM_SLASH) + 1); | |||
| MS_LOG(INFO) << "start reading exported model file"; | |||
| std::cout << "reading " << file_name << std::endl; | |||
| auto context = std::make_shared<Context>(); | |||
| if (context == nullptr) { | |||
| MS_LOG(ERROR) << "New context failed while running " << model_name.c_str(); | |||
| std::cerr << "New context failed while running " << model_name.c_str() << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| if (flags_->cpu_bind_mode_ == 2) { | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU; | |||
| } else if (flags_->cpu_bind_mode_ == 1) { | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU; | |||
| if (flags_->loss_name_ != "") { | |||
| t_session->SetLossName(flags_->loss_name_); | |||
| } | |||
| if (epochs > 0) { | |||
| t_session->Train(); | |||
| } | |||
| session = t_session; | |||
| } else { | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND; | |||
| } | |||
| context->thread_num_ = flags_->num_threads_; | |||
| auto *model = mindspore::lite::Model::Import(file_name.c_str()); | |||
| if (model == nullptr) { | |||
| MS_LOG(ERROR) << "create model for lite session failed"; | |||
| return RET_ERROR; | |||
| } | |||
| auto lite_session = std::unique_ptr<session::LiteSession>(session::LiteSession::CreateSession(context.get())); | |||
| if (lite_session == nullptr) { | |||
| MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str(); | |||
| std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| if (lite_session->CompileGraph(model) != RET_OK) { | |||
| MS_LOG(ERROR) << "Cannot compile model"; | |||
| delete model; | |||
| return RET_ERROR; | |||
| } | |||
| ms_inputs_ = lite_session->GetInputs(); | |||
| auto end_prepare_time = GetTimeUs(); | |||
| MS_LOG(INFO) << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms"; | |||
| std::cout << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl; | |||
| // Load input | |||
| MS_LOG(INFO) << "start generate input data"; | |||
| auto status = LoadInput(); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Generate input data error"; | |||
| delete model; | |||
| return status; | |||
| } | |||
| if (!flags_->data_file_.empty()) { | |||
| MS_LOG(INFO) << "Check accuracy for exported model"; | |||
| std::cout << "Check accuracy for exported model " << std::endl; | |||
| status = MarkAccuracyLite(lite_session); | |||
| for (auto &data : data_) { | |||
| data.second->shape.clear(); | |||
| data.second->data.clear(); | |||
| delete data.second; | |||
| } | |||
| data_.clear(); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Run MarkAccuracy on exported model error: " << status; | |||
| std::cout << "Run MarkAccuracy on exported model error: " << status << std::endl; | |||
| session = session::LiteSession::CreateSession(&context); | |||
| if (session == nullptr) { | |||
| MS_LOG(ERROR) << "ExportedFile CreateSession failed while running " << model_name.c_str(); | |||
| std::cout << "CreateSession failed while running " << model_name.c_str() << std::endl; | |||
| delete model; | |||
| return status; | |||
| return RET_ERROR; | |||
| } | |||
| if (session->CompileGraph(model) != RET_OK) { | |||
| MS_LOG(ERROR) << "Cannot compile model"; | |||
| delete model; | |||
| return RET_ERROR; | |||
| } | |||
| delete model; | |||
| } | |||
| delete model; | |||
| return RET_OK; | |||
| } | |||
| int NetTrain::RunNetTrain() { | |||
| auto start_prepare_time = GetTimeUs(); | |||
| // Load graph | |||
| std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1); | |||
| MS_LOG(INFO) << "start reading model file"; | |||
| std::cout << "start reading model file" << std::endl; | |||
| auto context = std::make_shared<Context>(); | |||
| if (context == nullptr) { | |||
| MS_LOG(ERROR) << "New context failed while running " << model_name.c_str(); | |||
| std::cerr << "New context failed while running " << model_name.c_str() << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| if (flags_->cpu_bind_mode_ == 2) { | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU; | |||
| } else if (flags_->cpu_bind_mode_ == 1) { | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU; | |||
| } else { | |||
| context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND; | |||
| } | |||
| context->device_list_[0].device_info_.cpu_device_info_.enable_float16_ = flags_->enable_fp16_; | |||
| layer_checksum_ = flags_->layer_checksum_; | |||
| context->thread_num_ = flags_->num_threads_; | |||
| auto *model = mindspore::lite::Model::Import(flags_->model_file_.c_str()); | |||
| if (model == nullptr) { | |||
| MS_LOG(ERROR) << "create model for train session failed"; | |||
| return RET_ERROR; | |||
| } | |||
| session_ = session::TrainSession::CreateSession(model, context.get()); | |||
| if (session_ == nullptr) { | |||
| MS_LOG(ERROR) << "RunNetTrain CreateSession failed while running " << model_name.c_str(); | |||
| std::cout << "RunNetTrain CreateSession failed while running " << model_name.c_str() << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| if (flags_->loss_name_ != "") { | |||
| session_->SetLossName(flags_->loss_name_); | |||
| } | |||
| session_->Train(); | |||
| ms_inputs_ = session_->GetInputs(); | |||
| auto end_prepare_time = GetTimeUs(); | |||
| MS_LOG(INFO) << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms"; | |||
| std::cout << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl; | |||
| // Load input | |||
| MS_LOG(INFO) << "start generate input data"; | |||
| auto status = LoadInput(); | |||
| MS_LOG(INFO) << "Load input data"; | |||
| auto ms_inputs = session->GetInputs(); | |||
| auto status = LoadInput(&ms_inputs); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Generate input data error"; | |||
| MS_LOG(ERROR) << "Load input data error"; | |||
| return status; | |||
| } | |||
| if (flags_->epochs_ > 0) { | |||
| status = MarkPerformance(); | |||
| if ((epochs > 0) && (t_session != nullptr)) { | |||
| status = MarkPerformance(t_session); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Run MarkPerformance error: " << status; | |||
| std::cout << "Run MarkPerformance error: " << status << std::endl; | |||
| return status; | |||
| } | |||
| SaveModels(t_session, model); // save file if flags are on | |||
| } | |||
| if (!flags_->data_file_.empty()) { | |||
| status = MarkAccuracy(); | |||
| for (auto &data : data_) { | |||
| data.second->shape.clear(); | |||
| data.second->data.clear(); | |||
| delete data.second; | |||
| if (t_session != nullptr) { | |||
| t_session->Eval(); | |||
| } | |||
| data_.clear(); | |||
| status = MarkAccuracy(session); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Run MarkAccuracy error: " << status; | |||
| std::cout << "Run MarkAccuracy error: " << status << std::endl; | |||
| return status; | |||
| } | |||
| } | |||
| status = CheckExecute(model); | |||
| return RET_OK; | |||
| } | |||
| int NetTrain::RunNetTrain() { | |||
| CreateAndRunNetwork(flags_->model_file_, true, flags_->epochs_); | |||
| auto status = CheckExecutionOfSavedModels(); // re-initialize sessions according to flags | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Run CheckExecute error: " << status; | |||
| std::cout << "Run CheckExecute error: " << status << std::endl; | |||
| @@ -653,8 +406,7 @@ int NetTrain::RunNetTrain() { | |||
| return RET_OK; | |||
| } | |||
| int NetTrain::CheckExecute(mindspore::lite::Model *model) { | |||
| int status; | |||
| int NetTrain::SaveModels(session::TrainSession *session, mindspore::lite::Model *model) { | |||
| if (!flags_->export_file_.empty()) { | |||
| auto ret = Model::Export(model, flags_->export_file_.c_str()); | |||
| if (ret != RET_OK) { | |||
| @@ -662,67 +414,39 @@ int NetTrain::CheckExecute(mindspore::lite::Model *model) { | |||
| std::cout << "Run SaveToFile error"; | |||
| return RET_ERROR; | |||
| } | |||
| delete session_; | |||
| session_ = nullptr; | |||
| status = RunExportedNet(); | |||
| } | |||
| if (!flags_->inference_file_.empty()) { | |||
| auto tick = GetTimeUs(); | |||
| auto status = session->ExportInference(flags_->inference_file_); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Run Exported model error: " << status; | |||
| std::cout << "Run Exported model error: " << status << std::endl; | |||
| MS_LOG(ERROR) << "Save model error: " << status; | |||
| std::cout << "Save model error: " << status << std::endl; | |||
| return status; | |||
| } | |||
| } else { | |||
| if (!flags_->inference_file_.empty()) { | |||
| auto tick = GetTimeUs(); | |||
| status = session_->ExportInference(flags_->inference_file_); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Save model error: " << status; | |||
| std::cout << "Save model error: " << status << std::endl; | |||
| return status; | |||
| } | |||
| std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n"; | |||
| delete session_; | |||
| session_ = nullptr; | |||
| status = RunExportedNetLite(flags_->inference_file_ + ".ms"); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Running saved model error: " << status; | |||
| std::cout << "Running saved model error: " << status << std::endl; | |||
| return status; | |||
| } | |||
| } | |||
| std::cout << "ExportInference() execution time is " << GetTimeUs() - tick << "us\n"; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| void NetTrainFlags::InitInputDataList() { | |||
| char *saveptr1 = nullptr; | |||
| char *input_list = new char[this->in_data_file_.length() + 1]; | |||
| snprintf(input_list, this->in_data_file_.length() + 1, "%s", this->in_data_file_.c_str()); | |||
| const char *split_c = ","; | |||
| char *cur_input = strtok_r(input_list, split_c, &saveptr1); | |||
| while (cur_input != nullptr) { | |||
| input_data_list_.emplace_back(cur_input); | |||
| cur_input = strtok_r(nullptr, split_c, &saveptr1); | |||
| } | |||
| delete[] input_list; | |||
| } | |||
| void NetTrainFlags::InitResizeDimsList() { | |||
| std::string content; | |||
| content = this->resize_dims_in_; | |||
| std::vector<int64_t> shape; | |||
| auto shape_strs = StringSplit(content, std::string(DELIM_COLON)); | |||
| for (const auto &shape_str : shape_strs) { | |||
| shape.clear(); | |||
| auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA)); | |||
| std::cout << "Resize Dims: "; | |||
| for (const auto &dim_str : dim_strs) { | |||
| std::cout << dim_str << " "; | |||
| shape.emplace_back(static_cast<int64_t>(std::stoi(dim_str))); | |||
| } | |||
| std::cout << std::endl; | |||
| this->resize_dims_.emplace_back(shape); | |||
| int NetTrain::CheckExecutionOfSavedModels() { | |||
| int status = RET_OK; | |||
| if (!flags_->export_file_.empty()) { | |||
| status = NetTrain::CreateAndRunNetwork(flags_->export_file_, true, 0); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Run Exported model " << flags_->export_file_ << " error: " << status; | |||
| std::cout << "Run Exported model " << flags_->export_file_ << " error: " << status << std::endl; | |||
| return status; | |||
| } | |||
| } | |||
| if (!flags_->inference_file_.empty()) { | |||
| status = NetTrain::CreateAndRunNetwork(flags_->inference_file_ + ".ms", false, 0); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "Running saved model " << flags_->inference_file_ << ".ms error: " << status; | |||
| std::cout << "Running saved model " << flags_->inference_file_ << ".ms error: " << status << std::endl; | |||
| return status; | |||
| } | |||
| } | |||
| return status; | |||
| } | |||
| int NetTrain::InitCallbackParameter() { | |||
| @@ -766,7 +490,7 @@ int NetTrain::InitCallbackParameter() { | |||
| op_times_by_type_[call_param.node_type].second += cost; | |||
| op_times_by_name_[call_param.node_name].first++; | |||
| op_times_by_name_[call_param.node_name].second += cost; | |||
| if (layer_checksum_) { | |||
| if (flags_->layer_checksum_) { | |||
| auto out_tensor = after_outputs.at(0); | |||
| void *output = out_tensor->MutableData(); | |||
| int tensor_size = out_tensor->ElementsNum(); | |||
| @@ -841,13 +565,6 @@ int NetTrain::Init() { | |||
| std::cerr << "modelPath is required" << std::endl; | |||
| return 1; | |||
| } | |||
| flags_->InitInputDataList(); | |||
| flags_->InitResizeDimsList(); | |||
| if (!flags_->resize_dims_.empty() && flags_->resize_dims_.size() != flags_->input_data_list_.size()) { | |||
| MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath"; | |||
| std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl; | |||
| return RET_ERROR; | |||
| } | |||
| if (flags_->time_profiling_) { | |||
| auto status = InitCallbackParameter(); | |||
| @@ -925,14 +642,6 @@ int NetTrain::PrintResult(const std::vector<std::string> &title, | |||
| return RET_OK; | |||
| } | |||
| NetTrain::~NetTrain() { | |||
| for (auto iter : this->data_) { | |||
| delete (iter.second); | |||
| } | |||
| this->data_.clear(); | |||
| if (session_ != nullptr) delete (session_); | |||
| } | |||
| int RunNetTrain(int argc, const char **argv) { | |||
| NetTrainFlags flags; | |||
| Option<std::string> err = flags.ParseFlags(argc, argv); | |||
| @@ -42,15 +42,6 @@ enum MS_API DataType { kImage = 0, kBinary = 1 }; | |||
| constexpr float relativeTolerance = 1e-5; | |||
| constexpr float absoluteTolerance = 1e-8; | |||
| struct MS_API CheckTensor { | |||
| CheckTensor(const std::vector<size_t> &shape, const std::vector<float> &data) { | |||
| this->shape = shape; | |||
| this->data = data; | |||
| } | |||
| std::vector<size_t> shape; | |||
| std::vector<float> data; | |||
| }; | |||
| template <typename T> | |||
| float TensorSum(void *data, int size) { | |||
| T *typed_data = reinterpret_cast<T *>(data); | |||
| @@ -84,10 +75,6 @@ class MS_API NetTrainFlags : public virtual FlagParser { | |||
| ~NetTrainFlags() override = default; | |||
| void InitInputDataList(); | |||
| void InitResizeDimsList(); | |||
| public: | |||
| // common | |||
| std::string model_file_; | |||
| @@ -118,25 +105,22 @@ class MS_API NetTrainFlags : public virtual FlagParser { | |||
| class MS_API NetTrain { | |||
| public: | |||
| explicit NetTrain(NetTrainFlags *flags) : flags_(flags) {} | |||
| virtual ~NetTrain(); | |||
| virtual ~NetTrain() = default; | |||
| int Init(); | |||
| int RunNetTrain(); | |||
| int RunExportedNet(); | |||
| private: | |||
| // call GenerateInputData or ReadInputFile to init inputTensors | |||
| int LoadInput(); | |||
| int LoadInput(Vector<tensor::MSTensor *> *ms_inputs); | |||
| // call GenerateRandomData to fill inputTensors | |||
| int GenerateInputData(); | |||
| int GenerateInputData(std::vector<mindspore::tensor::MSTensor *> *ms_inputs); | |||
| int GenerateRandomData(size_t size, void *data); | |||
| int ReadInputFile(); | |||
| int CompareOutput(); | |||
| int ReadInputFile(std::vector<mindspore::tensor::MSTensor *> *ms_inputs); | |||
| int CreateAndRunNetwork(const std::string &filename, int train_session, int epochs); | |||
| int InitCallbackParameter(); | |||
| @@ -208,22 +192,13 @@ class MS_API NetTrain { | |||
| return meanError; | |||
| } | |||
| int MarkPerformance(); | |||
| int MarkAccuracy(); | |||
| int MarkPerformance(session::TrainSession *session); | |||
| private: | |||
| int RunExportedNetLite(std::string file_name); | |||
| int MarkAccuracyLite(const std::unique_ptr<session::LiteSession> &lite_session); | |||
| int CompareOutputLite(const std::unique_ptr<session::LiteSession> &lite_session); | |||
| int CheckExecute(mindspore::lite::Model *model); | |||
| int MarkAccuracy(session::LiteSession *lite_session); | |||
| int CompareOutput(const session::LiteSession &lite_session); | |||
| int SaveModels(session::TrainSession *session, mindspore::lite::Model *model); | |||
| int CheckExecutionOfSavedModels(); | |||
| NetTrainFlags *flags_; | |||
| session::TrainSession *session_ = nullptr; | |||
| std::vector<mindspore::tensor::MSTensor *> ms_inputs_; | |||
| std::unordered_map<std::string, std::vector<mindspore::tensor::MSTensor *>> ms_outputs_; | |||
| std::unordered_map<std::string, CheckTensor *> data_; | |||
| std::unordered_map<std::string, TypeId> data_type_map_{{"FLOAT", TypeId::kNumberTypeFloat}, | |||
| {"INT32", TypeId::kNumberTypeInt32}}; | |||
| // callback parameters | |||
| uint64_t op_begin_ = 0; | |||
| @@ -234,7 +209,6 @@ class MS_API NetTrain { | |||
| mindspore::KernelCallBack before_call_back_; | |||
| mindspore::KernelCallBack after_call_back_; | |||
| bool layer_checksum_ = false; | |||
| }; | |||
| int MS_API RunNetTrain(int argc, const char **argv); | |||
| @@ -143,7 +143,7 @@ bool IndexingCompress(const std::set<T> &quant_data_set, const std::map<T, size_ | |||
| tensor->data.resize(new_data_str.size()); | |||
| tensor->weightQunatCompressType = schema::WeightQunatCompressType_INDEXING; | |||
| MS_LOG(ERROR) << "set WeightQunatCompressType_INDEXING"; | |||
| MS_LOG(DEBUG) << "set WeightQunatCompressType_INDEXING"; | |||
| return true; | |||
| } | |||
| @@ -285,21 +285,21 @@ bool PackRepetition(size_t bit_num, schema::TensorT *tensor) { | |||
| auto pack_sparsity_size_in_bit = | |||
| 1 * 8 + 4 * 8 + bit_num + bit_num * unique_value_cnt + unique_value_bit * nz_cnt + nz_cnt * coor_best_bit; | |||
| size_t pack_sparsity_size_in_byte = ceil(pack_sparsity_size_in_bit / 8.0); | |||
| MS_LOG(ERROR) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte | |||
| MS_LOG(DEBUG) << "coor_best_bit: " << coor_best_bit << " ori: " << origin_size_in_byte | |||
| << " indexing: " << pack_repetition_size_in_byte << " sparse: " << pack_sparsity_size_in_byte; | |||
| auto min_byte_need = std::min({origin_size_in_byte, pack_repetition_size_in_byte, pack_sparsity_size_in_byte}); | |||
| if (min_byte_need == origin_size_in_byte) { | |||
| return false; | |||
| } else if (min_byte_need == pack_repetition_size_in_byte) { | |||
| MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte; | |||
| MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_repetition_size_in_byte; | |||
| return IndexingCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt, | |||
| pack_repetition_size_in_byte, bit_num, tensor); | |||
| } else if (min_byte_need == pack_sparsity_size_in_byte) { | |||
| MS_LOG(ERROR) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte; | |||
| MS_LOG(DEBUG) << "from " << origin_size_in_byte << " to " << pack_sparsity_size_in_byte; | |||
| return SparsityCompress<T>(quant_data_set, unique_value_index_map, unique_value_bit, unique_value_cnt, | |||
| pack_sparsity_size_in_byte, nz_cnt, coor_best_bit, bit_num, tensor); | |||
| } else { | |||
| MS_LOG(ERROR) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " " | |||
| MS_LOG(DEBUG) << "unexpected: " << min_byte_need << " not in {" << origin_size_in_byte << " " | |||
| << pack_repetition_size_in_byte << " " << pack_sparsity_size_in_byte << "}"; | |||
| } | |||
| return false; | |||
| @@ -22,6 +22,7 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/graphdef_transform.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/optimizer.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/file_utils.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/quant_utils.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../common/graph_util.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../common/node_util.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../common/tensor_util.cc | |||
| @@ -14,14 +14,16 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "tools/converter/legacy_optimizer/graph/tensor_quant_pass.h" | |||
| #include <vector> | |||
| #include <cmath> | |||
| #include "tools/converter/legacy_optimizer/graph/tensor_quant_pass.h" | |||
| #include <algorithm> | |||
| #include "tools/converter/converter_context.h" | |||
| #include "tools/converter/quantizer/quantize_util.h" | |||
| #include "tools/common/tensor_util.h" | |||
| #include "tools/common/graph_util.h" | |||
| #include "tools/common/node_util.h" | |||
| #include "src/common/quant_utils.h" | |||
| namespace mindspore::lite { | |||
| namespace { | |||
| @@ -49,7 +51,7 @@ STATUS ComputeDataToInt8(const std::unique_ptr<TensorT> &tensor, int32_t index) | |||
| return RET_OK; | |||
| } | |||
| for (size_t j = 0; j < wShapeSize; j++) { | |||
| qDatas[j] = quant::QuantizeData<int8_t>(weightData[j], weightQauntParam.get()); | |||
| qDatas[j] = QuantizeData<int8_t>(weightData[j], weightQauntParam.get()); | |||
| } | |||
| } else { // convert uint8 to int8 | |||
| auto *weightData = static_cast<uint8_t *>(oriWeightData); | |||
| @@ -141,7 +143,7 @@ STATUS ComputeQuantTensorPerChannel(TensorT *tensor, const int &tensor_index, co | |||
| auto *dst_data_int32 = reinterpret_cast<int32_t *>(dst_data.data()); | |||
| dst_data_int32[index] = quant_data; | |||
| } else { | |||
| auto quant_data = quant::QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get()); | |||
| auto quant_data = QuantizeData<int8_t>(raw_data, tensor->quantParams.at(i).get()); | |||
| dst_data[index] = quant_data; | |||
| } | |||
| } | |||
| @@ -44,6 +44,7 @@ | |||
| #include "securec/include/securec.h" | |||
| #include "tools/common/tensor_util.h" | |||
| #include "src/common/file_utils.h" | |||
| #include "src/common/quant_utils.h" | |||
| #include "src/common/utils.h" | |||
| #include "tools/converter/quantizer/weight_quantizer.h" | |||
| @@ -1282,8 +1283,7 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) { | |||
| return status; | |||
| } | |||
| if (calibrator_->config_param_.mixed) { | |||
| // get opname_bit map | |||
| if (calibrator_->config_param_.mixed) { // get opname_bit map | |||
| auto weight_quant_func_graph = CopyFuncGraph(func_graph); | |||
| if (weight_quant_func_graph == nullptr) { | |||
| MS_LOG(ERROR) << "CopyFuncGraph error"; | |||
| @@ -1315,7 +1315,6 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) { | |||
| MS_LOG(ERROR) << "create session failed!"; | |||
| return RET_ERROR; | |||
| } | |||
| MS_LOG(INFO) << "start to update divergence's max value"; | |||
| status = DoInference(); | |||
| if (status != RET_OK) { | |||
| @@ -1363,14 +1362,12 @@ STATUS PostTrainingQuantizer::DoQuantize(FuncGraphPtr func_graph) { | |||
| MS_LOG(ERROR) << "create session failed!"; | |||
| return RET_ERROR; | |||
| } | |||
| MS_LOG(INFO) << "do bias correction"; | |||
| status = BiasCorrection(func_graph); | |||
| if (status != RET_OK) { | |||
| MS_LOG(WARNING) << "BiasCorrection failed."; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -1477,7 +1474,7 @@ KernelCallBack PostTrainingQuantizer::GetBeforeCallBack(bool int8_op) { | |||
| quant_param_t.scale = quant_params[0].scale; | |||
| quant_param_t.zeroPoint = quant_params[0].zeroPoint; | |||
| for (auto float_data : fp32_op_input) { | |||
| auto quant_data = QuantizeData<int8_t>(float_data, quant_param_t, quant_max, quant_min); | |||
| auto quant_data = QuantizeData<int8_t>(float_data, &quant_param_t, quant_max, quant_min); | |||
| quant_datas.push_back(quant_data); | |||
| } | |||
| @@ -100,12 +100,12 @@ bool QuantStrategy::CanConvOpQuantized(const CNodePtr &node) const { | |||
| return true; | |||
| } | |||
| bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const { | |||
| bool QuantStrategy::CanOpPostQuantized(const AnfNodePtr &node) const { | |||
| MS_ASSERT(node != nullptr); | |||
| if (!node->isa<mindspore::CNode>()) { | |||
| return false; | |||
| } | |||
| auto cnode = std::dynamic_pointer_cast<mindspore::CNode>(node); | |||
| const auto cnode = std::dynamic_pointer_cast<mindspore::CNode>(node); | |||
| auto type = NodePrimitiveType(cnode); | |||
| static const std::vector<std::string> int8OpList = { | |||
| ops::kNameAddFusion, ops::kNameActivation, ops::kNameAvgPoolFusion, | |||
| @@ -268,67 +268,6 @@ bool TensorQuantParamsInited(const schema::TensorT &tensor) { | |||
| return true; | |||
| } | |||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, | |||
| int quant_min, int num_bits) { | |||
| MS_ASSERT(quantParam != nullptr); | |||
| if (mMin > 0.0f) { | |||
| MS_LOG(DEBUG) << "min " << mMin << " is bigger then 0, set to 0, this may course low precision"; | |||
| mMin = 0.0f; | |||
| } | |||
| if (mMax < 0.0f) { | |||
| MS_LOG(DEBUG) << "mMax " << mMax << " is smaller than 0, set to 0, this may course low precision"; | |||
| mMax = 0.0f; | |||
| } | |||
| if (mMin > mMax) { | |||
| MS_LOG(ERROR) << "cal error while min" << mMin << ">" << mMax; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| if (mMin == mMax) { | |||
| if (mMin != 0.0f) { | |||
| MS_LOG(ERROR) << "min and max should both be zero if they are equal to each other"; | |||
| return RET_ERROR; | |||
| } | |||
| quantParam->inited = true; | |||
| quantParam->min = mMin; | |||
| quantParam->max = mMax; | |||
| quantParam->scale = 0.0f; | |||
| quantParam->zeroPoint = 0; | |||
| quantParam->narrowRange = narrowRange; | |||
| quantParam->numBits = num_bits; | |||
| return RET_OK; | |||
| } | |||
| auto quantMinFloat = static_cast<double>(quant_min); | |||
| auto quantMaxFloat = static_cast<double>(quant_max); | |||
| if (fabs(quantMaxFloat - quantMinFloat) <= 0.0f) { | |||
| MS_LOG(ERROR) << "divisor cannot be 0"; | |||
| return RET_ERROR; | |||
| } | |||
| double scale = (mMax - mMin) / (quantMaxFloat - quantMinFloat); | |||
| if (fabs(scale) <= 0.0f) { | |||
| MS_LOG(ERROR) << "divisor 'scale' cannot be 0"; | |||
| return RET_ERROR; | |||
| } | |||
| const double zeroPointFromMin = quantMinFloat - mMin / scale; | |||
| int zeroPoint = static_cast<int32_t>(std::round(zeroPointFromMin)); | |||
| if (scale < SCALE_THREASHOLD) { | |||
| zeroPoint = 0; | |||
| } | |||
| // The zero point should always be in the range of quantized value, | |||
| // [qmin, qmax]. | |||
| MS_ASSERT(zeroPoint >= quantMin); | |||
| MS_ASSERT(zeroPoint <= quantMax); | |||
| quantParam->inited = true; | |||
| quantParam->min = mMin; | |||
| quantParam->max = mMax; | |||
| quantParam->scale = scale; | |||
| quantParam->zeroPoint = zeroPoint; | |||
| quantParam->narrowRange = narrowRange; | |||
| quantParam->numBits = num_bits; | |||
| return RET_OK; | |||
| } | |||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int numBits) { | |||
| MS_ASSERT(quantParam != nullptr); | |||
| if (mMin > 0.0f) { | |||
| @@ -999,26 +938,6 @@ STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_data | |||
| return RET_OK; | |||
| } | |||
| void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas, | |||
| bool channel_at_first, float *desired_max, float *desired_min) { | |||
| float min = FLT_MAX; | |||
| float max = -FLT_MAX; | |||
| // find min and max | |||
| for (int j = 0; j < one_filter_size; j++) { | |||
| auto index = j + i * one_filter_size; | |||
| if (!channel_at_first) { | |||
| index = j * channels + i; | |||
| } | |||
| if (index >= elem_count) { | |||
| MS_LOG(ERROR) << "over flow!"; | |||
| } | |||
| min = std::min(min, raw_datas[index]); | |||
| max = std::max(max, raw_datas[index]); | |||
| } | |||
| *desired_max = max; | |||
| *desired_min = min; | |||
| } | |||
| int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first) { | |||
| auto channels = dims[0]; | |||
| if (!(*channel_at_first)) { | |||
| @@ -43,6 +43,7 @@ | |||
| #include "src/lite_session.h" | |||
| #include "tools/converter/graphdef_transform.h" | |||
| #include "src/common/file_utils.h" | |||
| #include "src/common/quant_utils.h" | |||
| namespace mindspore::lite::quant { | |||
| static constexpr size_t UINT8_QUANTIZATION = 8; | |||
| @@ -82,7 +83,7 @@ class QuantStrategy { | |||
| bool CanConvOpQuantized(const CNodePtr &node) const; | |||
| bool CanMulOpQuantized(const CNodePtr &node) const; | |||
| bool CanOpPostQuantized(AnfNodePtr &node) const; | |||
| bool CanOpPostQuantized(const AnfNodePtr &node) const; | |||
| bool CanTensorQuantized(const AnfNodePtr &inputNode) const; | |||
| size_t m_weight_size_; | |||
| @@ -100,9 +101,6 @@ constexpr int quant_param_size = 32 * 8; | |||
| QuantParamHolderPtr GetCNodeQuantHolder(const PrimitivePtr &primitive); | |||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, | |||
| int quant_min, int num_bits); | |||
| STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false, | |||
| int numBits = UINT8_QUANTIZATION); | |||
| @@ -112,9 +110,6 @@ std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epoc | |||
| STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_datas, int new_size, TypeId new_data_type); | |||
| void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas, | |||
| bool channel_at_first, float *desired_max, float *desired_min); | |||
| int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first); | |||
| void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first, | |||
| @@ -123,193 +118,10 @@ void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, | |||
| void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index, | |||
| bool *channel_at_first, int *channel_cnt); | |||
| template <typename T> | |||
| T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { | |||
| MS_ASSERT(quantParam != nullptr); | |||
| MS_ASSERT(quantParam->inited); | |||
| const auto scale = quantParam->scale; | |||
| const auto zeroPoint = quantParam->zeroPoint; | |||
| const auto numBit = quantParam->numBits; | |||
| const auto narrowRange = quantParam->narrowRange; | |||
| double maxLimitTemp = static_cast<float>((1 << (unsigned int)numBit) - 1); | |||
| const double maxLimit = static_cast<float>(maxLimitTemp - zeroPoint + std::numeric_limits<T>::min()) * scale; | |||
| double minLimit; | |||
| if (narrowRange) { | |||
| minLimit = static_cast<float>(std::numeric_limits<T>::min() + 1 - zeroPoint) * scale; | |||
| } else { | |||
| minLimit = static_cast<float>(std::numeric_limits<T>::min() - zeroPoint) * scale; | |||
| } | |||
| return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] { | |||
| double tmp; | |||
| if (originData > maxLimit) { | |||
| tmp = maxLimit; | |||
| } else if (originData < minLimit) { | |||
| tmp = minLimit; | |||
| } else { | |||
| tmp = originData; | |||
| } | |||
| auto quantData = static_cast<T>(std::round(zeroPoint + tmp / scale)); | |||
| return quantData; | |||
| }(); | |||
| } | |||
| template <typename T> | |||
| T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quant_max, int quant_min) { | |||
| MS_ASSERT(quantParam != nullptr); | |||
| MS_ASSERT(quantParam->inited); | |||
| const auto scale = quantParam.scale; | |||
| const int zeroPoint = quantParam.zeroPoint; | |||
| const auto narrowRange = quantParam.narrowRange; | |||
| const int maxLimit = quant_max; | |||
| const int minLimit = quant_min; | |||
| if (scale <= SCALE_THREASHOLD) { | |||
| return 0; | |||
| } | |||
| return [maxLimit, minLimit, zeroPoint, scale, narrowRange, originData] { | |||
| auto quant_data = std::round(originData / scale + zeroPoint); | |||
| if (quant_data > maxLimit) { | |||
| quant_data = maxLimit; | |||
| } else if (quant_data < minLimit) { | |||
| quant_data = minLimit; | |||
| } | |||
| return static_cast<T>(quant_data); | |||
| }(); | |||
| } | |||
| bool QuantParamEqual(const schema::QuantParamT &quant_param1, const schema::QuantParamT &quant_param2); | |||
| bool TensorQuantParamsInited(const schema::TensorT &tensor); | |||
| template <typename T> | |||
| STATUS DoPerChannelQuant(const tensor::TensorPtr &weight, const QuantType &quant_type, | |||
| std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min, | |||
| const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas, | |||
| std::vector<float> *dequant_datas, TypeId quant_data_type, bool channel_at_first = true, | |||
| int channel_cnt = -1) { | |||
| auto dims = weight->shape(); | |||
| size_t elem_count = weight->DataSize(); | |||
| auto *raw_datas = static_cast<float *>(weight->data_c()); | |||
| auto channels = CalChannels(dims, channel_cnt, &channel_at_first); | |||
| if (channels == 0) { | |||
| MS_LOG(ERROR) << "channels is zero"; | |||
| return RET_ERROR; | |||
| } | |||
| size_t one_filter_size = elem_count / channels; | |||
| bool do_quant = quant_param_size / (sizeof(float) * 8 - bit_num) < one_filter_size; | |||
| if (!do_quant && quant_type == QuantType_WeightQuant) { | |||
| MS_LOG(INFO) << "too few elements in a filter, no need to quantize. " << one_filter_size; | |||
| return RET_CONTINUE; | |||
| } | |||
| for (int i = 0; i < channels; i++) { | |||
| float min = FLT_MAX; | |||
| float max = -FLT_MAX; | |||
| GetMaxMinPerchannel(channels, one_filter_size, i, elem_count, raw_datas, channel_at_first, &max, &min); | |||
| schema::QuantParamT quant_param; | |||
| STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "CalQuantizationParams failed" << status; | |||
| return status; | |||
| } | |||
| // do quantization | |||
| double average_dequant = 0; | |||
| double average_raw = 0; | |||
| for (uint32_t j = 0; j < one_filter_size; j++) { | |||
| auto index = j + i * one_filter_size; | |||
| if (!channel_at_first) { | |||
| index = j * channels + i; | |||
| } | |||
| MS_ASSERT(index < elem_count); | |||
| float raw_data = raw_datas[index]; | |||
| auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min); | |||
| (*quant_datas)[index] = quant_data; | |||
| if (quant_type == QuantType_WeightQuant) { | |||
| float dequant_data = quant_param.scale * (quant_data - quant_param.zeroPoint); | |||
| (*dequant_datas)[index] = dequant_data; | |||
| average_dequant += dequant_data; | |||
| average_raw += raw_data; | |||
| } | |||
| } | |||
| if (quant_type == QuantType_WeightQuant && !k_means) { | |||
| // mean | |||
| average_dequant = average_dequant / one_filter_size; | |||
| average_raw = average_raw / one_filter_size; | |||
| // std | |||
| double variance_dequant = 0; | |||
| double variance_raw = 0; | |||
| for (uint32_t j = 0; j < one_filter_size; j++) { | |||
| auto index = j + i * one_filter_size; | |||
| if (!channel_at_first) { | |||
| index = j * channels + i; | |||
| } | |||
| MS_ASSERT(index < elem_count); | |||
| variance_dequant += std::pow((*dequant_datas)[index] - average_dequant, 2); | |||
| variance_raw += std::pow(raw_datas[index] - average_raw, 2); | |||
| } | |||
| variance_dequant = std::sqrt(variance_dequant / one_filter_size); | |||
| variance_raw = std::sqrt(variance_raw / one_filter_size); | |||
| quant_param.varCorr = 1; | |||
| if (variance_raw != 0 && variance_dequant != 0) { | |||
| auto temp_var_corr = variance_raw / variance_dequant; | |||
| if (temp_var_corr > 0 && temp_var_corr < 10) { | |||
| quant_param.varCorr = temp_var_corr; | |||
| } else { | |||
| MS_LOG(WARNING) << "unexpected var_corr: " << temp_var_corr; | |||
| } | |||
| } | |||
| quant_param.meanCorr = average_raw - average_dequant * quant_param.varCorr; | |||
| } | |||
| quant_params->emplace_back(quant_param); | |||
| } | |||
| auto status = UpdateTensorDataAndSize(weight, quant_datas->data(), quant_datas->size() * sizeof(T), quant_data_type); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| template <typename T> | |||
| STATUS DoPerLayerQuant(const tensor::TensorPtr &weight, const QuantType &quant_type, | |||
| std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min, | |||
| const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas, | |||
| TypeId quant_data_type) { | |||
| auto dims = weight->shape(); | |||
| size_t elem_count = weight->DataSize(); | |||
| auto *raw_datas = static_cast<float *>(weight->data_c()); | |||
| float min = FLT_MAX; | |||
| float max = -FLT_MIN; | |||
| for (uint32_t i = 0; i < elem_count; i++) { | |||
| // find max min | |||
| min = std::min(min, raw_datas[i]); | |||
| max = std::max(max, raw_datas[i]); | |||
| } | |||
| schema::QuantParamT quant_param; | |||
| if (!k_means) { | |||
| STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bit_num); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "CalQuantizationParams failed" << status; | |||
| return status; | |||
| } | |||
| } | |||
| quant_params->emplace_back(quant_param); | |||
| // update data and datatype | |||
| for (uint32_t i = 0; i < elem_count; i++) { | |||
| float raw_data = raw_datas[i]; | |||
| if (!k_means) { | |||
| auto quant_data = QuantizeData<T>(raw_data, quant_param, quant_max, quant_min); | |||
| (*quant_datas)[i] = quant_data; | |||
| } | |||
| } | |||
| auto status = UpdateTensorDataAndSize(weight, quant_datas->data(), quant_datas->size() * sizeof(T), quant_data_type); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| template <typename T> | |||
| STATUS DoBitPack(const tensor::TensorPtr &weight, const size_t &bit_num, const std::vector<T> &quant_datas) { | |||
| if (bit_num != 8 && bit_num != 16) { | |||
| @@ -363,15 +175,19 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv | |||
| } | |||
| std::vector<T> quant_data(elem_count); | |||
| std::vector<float> dequant_datas(elem_count); | |||
| int ret = RET_OK; | |||
| if (per_channel) { | |||
| bool channel_at_first = true; | |||
| int channel_cnt = -1; | |||
| CalQuantAssitInfo(primitive, dims, index, &channel_at_first, &channel_cnt); | |||
| // channel at first | |||
| ret = DoPerChannelQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data, | |||
| &dequant_datas, quant_data_type, channel_at_first, channel_cnt); | |||
| auto channels = CalChannels(dims, channel_cnt, &channel_at_first); | |||
| if (channels == 0) { | |||
| MS_LOG(ERROR) << "channels is zero"; | |||
| return RET_ERROR; | |||
| } | |||
| ret = DoPerChannelQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(), | |||
| static_cast<mindspore::schema::QuantType>(quant_type), &quant_params, quant_max, | |||
| quant_min, bit_num, k_means, &quant_data, channels, channel_at_first); | |||
| if (ret == RET_CONTINUE) { | |||
| return ret; | |||
| } else if (ret != RET_OK) { | |||
| @@ -379,13 +195,18 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv | |||
| return ret; | |||
| } | |||
| } else { | |||
| ret = DoPerLayerQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data, | |||
| quant_data_type); | |||
| ret = DoPerLayerQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(), &quant_params, quant_max, | |||
| quant_min, bit_num, k_means, &quant_data); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Do per layer quant failed."; | |||
| return ret; | |||
| } | |||
| } | |||
| auto status = UpdateTensorDataAndSize(weight, quant_data.data(), quant_data.size() * sizeof(T), quant_data_type); | |||
| if (status != RET_OK) { | |||
| MS_LOG(ERROR) << "UpdateTensorDataAndSize error"; | |||
| return RET_ERROR; | |||
| } | |||
| #ifdef HUFFMAN_ENCODE | |||
| auto huffman_encode = std::make_unique<lite::HuffmanEncode>(); | |||