Merge pull request !21205 from yonibaehr/export_yonitags/v1.4.0
| @@ -133,6 +133,7 @@ set(TRAIN_SRC | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_monitor.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/train/classification_train_accuracy_monitor.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/train/train_export.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/train/opt_allocator.cc | |||
| ${CMAKE_CURRENT_SOURCE_DIR}/../tools/common/storage.cc | |||
| ) | |||
| if(ENABLE_V0) | |||
| @@ -316,7 +316,9 @@ void Tensor::FreeData() { | |||
| this->data_ = nullptr; | |||
| } else { | |||
| allocator_->Free(this->data_); | |||
| this->data_ = nullptr; | |||
| if (!IS_STATIC_ALLOCATOR(allocator_) || (allocator_->RefCount(this->data_) != 0)) { | |||
| this->data_ = nullptr; | |||
| } | |||
| } | |||
| } | |||
| @@ -34,12 +34,15 @@ | |||
| namespace mindspore { | |||
| namespace lite { | |||
| #define STATIC_ALLOCATION -271964 | |||
| #define IS_STATIC_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == STATIC_ALLOCATION)) | |||
| struct LiteQuantParam { | |||
| double scale; | |||
| int32_t zeroPoint; | |||
| float var_corr{1}; | |||
| float mean_corr{0}; | |||
| bool inited; | |||
| bool inited{false}; | |||
| std::vector<float> clusters{}; | |||
| int bitNum; | |||
| int roundType; | |||
| @@ -133,7 +136,6 @@ class Tensor : public mindspore::tensor::MSTensor { | |||
| void set_format(mindspore::Format format) override { this->format_ = format; } | |||
| mindspore::Format format() const override { return this->format_; } | |||
| virtual int ref_count() const { return ref_count_; } | |||
| virtual int init_ref_count() const { return this->init_ref_count_; } | |||
| @@ -0,0 +1,90 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/train/opt_allocator.h" | |||
| #include <limits> | |||
| #include "nnacl/op_base.h" | |||
| namespace mindspore { | |||
| size_t OptAllocator::FindFree(size_t size) { | |||
| size_t min_size = std::numeric_limits<size_t>::max(); | |||
| size_t min_addr = std::numeric_limits<size_t>::max(); | |||
| for (auto const &itr : arena_) { | |||
| // best fit | |||
| if (itr.second >= size) { | |||
| if (min_size > itr.second) { | |||
| min_size = itr.second; | |||
| min_addr = itr.first; | |||
| } | |||
| } | |||
| } | |||
| return min_addr; | |||
| } | |||
| void OptAllocator::Reorder(size_t addr) { | |||
| size_t length = arena_[addr]; | |||
| size_t post = addr + length; | |||
| // connect to upper block | |||
| auto it = arena_.find(post); | |||
| if (it != arena_.end()) { | |||
| size_t post_size = it->second; | |||
| arena_[addr] = length + post_size; | |||
| arena_.erase(post); | |||
| } | |||
| // connect to lower block | |||
| auto itr = arena_.lower_bound(addr); | |||
| if (itr != arena_.begin()) { | |||
| itr--; | |||
| size_t last = itr->first; | |||
| if ((last + arena_[last]) == addr) { | |||
| arena_[last] = arena_[last] + arena_[addr]; | |||
| arena_.erase(addr); | |||
| } | |||
| } | |||
| } | |||
| size_t OptAllocator::Malloc(size_t size) { | |||
| size = UP_DIV(size, align_size_) * align_size_; | |||
| size_t addr = FindFree(size); | |||
| // free block not found | |||
| if (addr == std::numeric_limits<size_t>::max()) { | |||
| if (!arena_.empty()) { | |||
| addr = arena_.rbegin()->first; | |||
| if (addr + arena_[addr] < heap_) { | |||
| addr = heap_; | |||
| } else { | |||
| arena_.erase(addr); | |||
| } | |||
| } else { | |||
| addr = heap_; | |||
| } | |||
| heap_ = addr + size; | |||
| } else { | |||
| if (arena_[addr] > size) { | |||
| arena_[addr + size] = arena_[addr] - size; | |||
| } | |||
| arena_.erase(addr); | |||
| } | |||
| alloc_[addr] = size; | |||
| return addr; | |||
| } | |||
| void OptAllocator::Free(size_t addr) { | |||
| arena_[addr] = alloc_[addr]; | |||
| alloc_.erase(addr); | |||
| Reorder(addr); | |||
| } | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,41 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_TRAIN_OPT_ALLOCATOR_H_ | |||
| #define MINDSPORE_LITE_SRC_TRAIN_OPT_ALLOCATOR_H_ | |||
| #include <map> | |||
| #include "include/api/allocator.h" | |||
| namespace mindspore { | |||
| class OptAllocator { | |||
| public: | |||
| explicit OptAllocator(size_t aligned_size = 32) : align_size_(aligned_size) {} | |||
| ~OptAllocator() {} | |||
| size_t Malloc(size_t size); | |||
| void Free(size_t offset); | |||
| size_t total_size() { return heap_; } | |||
| private: | |||
| size_t FindFree(size_t size); | |||
| void Reorder(size_t addr); | |||
| std::map<size_t, size_t> arena_; | |||
| std::map<size_t, size_t> alloc_; | |||
| size_t heap_ = 0; | |||
| size_t align_size_; | |||
| }; | |||
| }; // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_TRAIN_OPT_ALLOCATOR_H_ | |||
| @@ -0,0 +1,52 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_TRAIN_STATIC_ALLOCATOR_H_ | |||
| #define MINDSPORE_LITE_SRC_TRAIN_STATIC_ALLOCATOR_H_ | |||
| namespace mindspore { | |||
| class StaticAllocator : public Allocator { | |||
| public: | |||
| void SetContex(void *buf, size_t size) { | |||
| start_buf_ = buf; | |||
| size_ = size; | |||
| } | |||
| int SetRefCount(void *ptr, int ref_count) override { return 0; } | |||
| int DecRefCount(void *ptr, int ref_count) override { return 0; } | |||
| int IncRefCount(void *ptr, int ref_count) override { return 0; } | |||
| size_t total_size() { return total_size_; } | |||
| void Clear() {} | |||
| void *Malloc(size_t size) override { | |||
| total_size_ += size; | |||
| return malloc(size); | |||
| } | |||
| void Free(void *ptr) override { | |||
| if (RefCount(ptr) != 0) free(ptr); | |||
| } | |||
| int RefCount(void *ptr) override { | |||
| if (ptr == nullptr) return STATIC_ALLOCATION; | |||
| char *ptrc = reinterpret_cast<char *>(ptr); | |||
| char *bufc = reinterpret_cast<char *>(start_buf_); | |||
| return ((ptrc < bufc) || (ptrc - bufc >= static_cast<ptrdiff_t>(size_)) ? 1 : 0); | |||
| } | |||
| private: | |||
| void *start_buf_; | |||
| size_t size_; | |||
| size_t total_size_ = 0; | |||
| }; | |||
| }; // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_TRAIN_STATIC_ALLOCATOR_H_ | |||
| @@ -39,6 +39,8 @@ | |||
| #include "src/train/optimizer_kernel.h" | |||
| #include "src/train/train_utils.h" | |||
| #include "src/train/train_export.h" | |||
| #include "src/train/opt_allocator.h" | |||
| #include "src/train/static_allocator.h" | |||
| #include "src/train/train_populate_parameter.h" | |||
| #include "src/train/train_populate_parameter_v0.h" | |||
| @@ -68,6 +70,7 @@ int TrainSession::Init(const Context *context, const TrainCfg *train_cfg) { | |||
| } | |||
| cfg_ = *train_cfg; | |||
| } | |||
| allocator_ = context->allocator; | |||
| return lite::LiteSession::Init(context); | |||
| } | |||
| @@ -159,6 +162,51 @@ int TrainSession::InitCallBack() { | |||
| return RET_OK; | |||
| } | |||
| int TrainSession::AllocTensors(const std::vector<kernel::LiteKernel *> &kernels) { | |||
| if (!IS_STATIC_ALLOCATOR(allocator_)) return RET_OK; | |||
| OptAllocator allocator; | |||
| std::unordered_map<lite::Tensor *, int> ref_count; | |||
| std::unordered_map<lite::Tensor *, size_t> offset_map; | |||
| for (auto kernel : kernels) { | |||
| for (auto tensor : kernel->out_tensors()) { | |||
| size_t size = tensor->Size(); | |||
| size_t offset = allocator.Malloc(size); | |||
| offset_map[tensor] = offset; | |||
| ref_count[tensor] = tensor->init_ref_count(); | |||
| } | |||
| for (auto tensor : kernel->in_tensors()) { | |||
| if (tensor->category() == lite::Tensor::VAR) { | |||
| int count = ref_count[tensor] - 1; | |||
| ref_count[tensor] = count; | |||
| if (count == 0) { | |||
| allocator.Free(offset_map[tensor]); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| // Set Tensor data | |||
| if (tensors_data_ == nullptr) { | |||
| auto size = allocator.total_size(); | |||
| auto buf = malloc(size); | |||
| if (buf == nullptr) { | |||
| MS_LOG(ERROR) << "cannot allocate buffer size" << size; | |||
| return RET_ERROR; | |||
| } | |||
| StaticAllocator *alloc = reinterpret_cast<StaticAllocator *>(allocator_.get()); | |||
| alloc->SetContex(buf, size); | |||
| tensors_data_ = buf; | |||
| } | |||
| for (auto kernel : train_kernels_) { | |||
| for (auto tensor : kernel->out_tensors()) { | |||
| auto it = offset_map.find(tensor); | |||
| if (it != offset_map.end()) { | |||
| tensor->set_data(reinterpret_cast<void *>(reinterpret_cast<char *>(tensors_data_) + it->second)); | |||
| } | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int TrainSession::CompileGraph(lite::Model *model) { return lite::RET_ERROR; } | |||
| int TrainSession::CompileTrainGraph(std::shared_ptr<Model> model) { | |||
| @@ -194,10 +242,21 @@ int TrainSession::CompileTrainGraph(std::shared_ptr<Model> model) { | |||
| MS_LOG(ERROR) << "failed to allocate space"; | |||
| return RET_ERROR; | |||
| } | |||
| ret = AllocTensors(train_kernels_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "failed to allocate space"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| TrainSession::~TrainSession() { FreeWorkSpace(); } | |||
| TrainSession::~TrainSession() { | |||
| FreeWorkSpace(); | |||
| if (tensors_data_ != nullptr) { | |||
| free(tensors_data_); | |||
| tensors_data_ = nullptr; | |||
| } | |||
| } | |||
| int TrainSession::ExecKernels(const KernelCallBack &before, const KernelCallBack &after, | |||
| const std::vector<kernel::LiteKernel *> &run_kernels) { | |||
| @@ -420,6 +479,12 @@ int TrainSession::Train() { | |||
| lite_tensor->set_init_ref_count(lite_tensor->init_ref_count() + 1); | |||
| } | |||
| } | |||
| // allocate tensors | |||
| auto ret = AllocTensors(train_kernels_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "failed to allocate tensor space"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -446,6 +511,11 @@ int TrainSession::Eval() { | |||
| lite_tensor->set_init_ref_count(lite_tensor->init_ref_count() + 1); | |||
| } | |||
| } | |||
| auto ret = AllocTensors(inference_kernels_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "failed to allocate space"; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -781,7 +851,12 @@ session::LiteSession *session::TrainSession::CreateTrainSession(const std::strin | |||
| MS_LOG(ERROR) << "create session failed"; | |||
| return nullptr; | |||
| } | |||
| if (context->allocator == nullptr) { | |||
| const_cast<lite::Context *>(context)->allocator = std::shared_ptr<Allocator>(new (std::nothrow) StaticAllocator()); | |||
| if (context->allocator == nullptr) { | |||
| MS_LOG(ERROR) << " cannot convert to static allocation"; | |||
| } | |||
| } | |||
| auto ret = session->Init(context, cfg); | |||
| if (ret != mindspore::lite::RET_OK) { | |||
| MS_LOG(ERROR) << "init session failed"; | |||
| @@ -147,6 +147,7 @@ class TrainSession : virtual public lite::LiteSession { | |||
| void FreeRestoreTensors(); | |||
| bool AllInputsNeedScale(kernel::LiteKernel *kernel); | |||
| void FreeWorkSpace(); | |||
| int AllocTensors(const std::vector<kernel::LiteKernel *> &kernels); | |||
| std::map<Tensor *, Tensor *> restored_origin_tensors_; | |||
| int virtual_batch_idx_ = 0; | |||
| @@ -155,6 +156,8 @@ class TrainSession : virtual public lite::LiteSession { | |||
| void *workspace_ = nullptr; | |||
| SchedCallBack sched_mix_precision_callback_; | |||
| bool train_mode_ = false; | |||
| void *tensors_data_ = nullptr; | |||
| std::shared_ptr<Allocator> allocator_; | |||
| }; | |||
| } // namespace lite | |||
| @@ -603,7 +603,7 @@ int NetTrain::InitCallbackParameter() { | |||
| } | |||
| op_call_times_total_++; | |||
| op_begin_ = GetTimeUs(); | |||
| if ((callParam.node_type == "Adam") || (callParam.node_type == "Assign")) { | |||
| if ((callParam.node_type == "Adam") || (callParam.node_type == "Assign") || callParam.node_type == "SGD") { | |||
| for (auto tensor : before_outputs) { | |||
| std::fill(reinterpret_cast<int8_t *>(tensor->MutableData()), | |||
| reinterpret_cast<int8_t *>(tensor->MutableData()) + tensor->Size(), 0); | |||