/** * Copyright 2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef MIINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_DATA_H_ #define MIINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_DATA_H_ #include #include #include #include #include #include #include "distributed/persistent/storage/local_file.h" #include "utils/log_adapter.h" namespace mindspore { namespace distributed { namespace persistent { // The data class is used to save and manage the tensor in memory, and provides // interfaces for persistence and disaster recovery. template class Data { public: explicit Data(const std::shared_ptr> &data, const std::shared_ptr> &shape = nullptr) : data_(data), shape_(shape) {} virtual ~Data() = default; // Get the memory data of Data T *data() const { return data_->data(); } // Get the mutable memory data of Data std::shared_ptr> MutableData() const { return data_; } // Get the element number of Data size_t size() const { return data_->size(); } // Get the dimension information of Data. std::shared_ptr> shape() const { return shape_; } protected: // Container used to store continuous memory buffer of Data. std::shared_ptr> data_; // Container used to record the dimension information of Data which persists a tensor. std::shared_ptr> shape_; }; // Implementation of the class Data to complete the function of persistence and disaster tolerance. template class PersistentData : public Data { public: explicit PersistentData(const std::shared_ptr> &data, const std::shared_ptr> &shape = nullptr) : Data(data, shape) {} ~PersistentData() override = default; // Initialize storage module. // Custom storage config, you can choose different configurations according to different storage forms, // such as using file storage by configuring the file storage path, // and config can be like this: std::map config = {{kFileStoragePath, "real_path_of_dir"}}; void Initialize(const std::map &storage_config); // In disaster recovery mode, memory of tensor need to be saved into disk file periodically. void Persist(const storage::DirtyInfo &dirty_info) const; // In disaster recovery mode, server node or worker node need to restore persistent data when restart. void Restore() const; private: // The following variables are used in disaster recovery mode: // The threads used to execute persistence task. std::thread persist_thread_; // The file storage handle used to persist data. std::shared_ptr storage_; }; template void PersistentData::Initialize(const std::map &storage_config) { storage_ = std::make_shared(storage_config); } template void PersistentData::Persist(const storage::DirtyInfo &dirty_info) const { MS_EXCEPTION_IF_NULL(storage_); storage::InputData input = std::make_tuple(*Data::shape_, Data::data(), Data::size() * sizeof(T)); storage_->Write(input, dirty_info); } template void PersistentData::Restore() const { storage::OutputData output = std::make_pair(Data::data(), Data::size() * sizeof(T)); MS_EXCEPTION_IF_NULL(storage_); storage_->Read(output); } } // namespace persistent } // namespace distributed } // namespace mindspore #endif // MIINDSPORE_CCSRC_DISTRIBUTED_PERSISTENT_DATA_H_