Merge pull request !25092 from harshvardhangupta/auto_tune_apitags/v1.6.0
| @@ -151,8 +151,8 @@ Status DatasetIterator::FetchNextTensorRow(TensorRow *out_row) { | |||
| #ifndef ENABLE_SECURITY | |||
| if (tracing_ != nullptr) { | |||
| cur_batch_num_++; | |||
| RETURN_IF_NOT_OK(tracing_->Record(static_cast<int32_t>(CONNECTOR_DEPTH), cur_connector_capacity_, cur_batch_num_, | |||
| cur_connector_size_, ProfilingTime::GetCurMilliSecond())); | |||
| tracing_->Record(static_cast<int32_t>(CONNECTOR_DEPTH), cur_connector_capacity_, cur_batch_num_, | |||
| cur_connector_size_, ProfilingTime::GetCurMilliSecond()); | |||
| } | |||
| #endif | |||
| return Status::OK(); | |||
| @@ -389,7 +389,7 @@ Status DeviceQueueOp::LaunchParallelCopyThread() { | |||
| RETURN_IF_NOT_OK(CircularPool::CreateCircularPool(&pool, -1, kDeviceQueGpuThreadMemory, false, true)); | |||
| pool_.push_back(pool); | |||
| } | |||
| gpu_item_connector_ = std::make_unique<GpuItemConnector>(num_workers_, 1, queue_capacity_); | |||
| gpu_connector_ = std::make_unique<GpuConnector>(num_workers_, 1, queue_capacity_); | |||
| receive_queues_.Init(num_workers_, queue_capacity_); | |||
| RETURN_IF_NOT_OK(receive_queues_.Register(tree_->AllTasks())); | |||
| RETURN_IF_NOT_OK( | |||
| @@ -417,73 +417,78 @@ Status DeviceQueueOp::PushDataToGPU() { | |||
| RETURN_IF_NOT_OK(tree_->GetProfilingManager()->GetTracingNode(kDeviceQueueTracingName, &node)); | |||
| profiling_node = std::dynamic_pointer_cast<DeviceQueueTracing>(node); | |||
| batch_start_time = ProfilingTime::GetCurMilliSecond(); | |||
| connector_capacity = gpu_item_connector_->capacity(); | |||
| connector_capacity = gpu_connector_->capacity(); | |||
| } | |||
| #endif | |||
| #ifdef ENABLE_DUMP_IR | |||
| md_channel_info_->RecordBatchQueue(gpu_item_connector_->size()); | |||
| md_channel_info_->RecordBatchQueue(gpu_connector_->size()); | |||
| md_channel_info_->RecordPreprocessBatch(0); | |||
| #endif | |||
| std::vector<device::DataItemGpu> items; | |||
| RETURN_IF_NOT_OK(gpu_item_connector_->Pop(0, &items)); | |||
| GpuConnectorItem item; | |||
| RETURN_IF_NOT_OK(gpu_connector_->Pop(0, &item)); | |||
| auto items = std::move(item.data_item); | |||
| bool eoe_flag = item.eoe_flag; | |||
| int64_t send_batch = 0; | |||
| bool is_open = false; | |||
| uint32_t handle = INVALID_HANDLE; | |||
| auto release_function = std::bind(&DeviceQueueOp::ReleaseData, this, std::placeholders::_1, std::placeholders::_2); | |||
| while (!items.empty() && !GpuBufferMgr::GetInstance().IsClosed()) { | |||
| handle = GpuBufferMgr::GetInstance().Open(0, channel_name_, {}, release_function); | |||
| if (handle == INVALID_HANDLE) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "[Internal ERROR] Failed to open channel for sending data."); | |||
| } | |||
| while (!(items.empty() && !eoe_flag) && !GpuBufferMgr::GetInstance().IsClosed()) { | |||
| if (!eoe_flag) { | |||
| #ifdef ENABLE_DUMP_IR | |||
| md_channel_info_->RecordBatchQueue(gpu_item_connector_->size()); | |||
| md_channel_info_->RecordPreprocessBatch(send_batch); | |||
| md_channel_info_->RecordPushStartTime(); | |||
| #endif | |||
| if (!is_open) { | |||
| std::vector<size_t> data_size; | |||
| for (int32_t index = 0; index < items.size(); index++) { | |||
| data_size.push_back(items[index].data_len_); | |||
| } | |||
| handle = GpuBufferMgr::GetInstance().Open(0, channel_name_, data_size, release_function); | |||
| if (handle == INVALID_HANDLE) { | |||
| return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, | |||
| "[Internal ERROR] Failed to open channel for sending data."); | |||
| md_channel_info_->RecordBatchQueue(gpu_connector_->size()); | |||
| md_channel_info_->RecordPreprocessBatch(send_batch); | |||
| md_channel_info_->RecordPushStartTime(); | |||
| #endif | |||
| // Data prefetch only when PS mode enables cache. | |||
| if (!ps::PsDataPrefetch::GetInstance().PrefetchData(channel_name_, items[0].data_ptr_, items[0].data_len_, | |||
| items[0].data_type_)) { | |||
| return Status(StatusCode::kMDTimeOut, __LINE__, __FILE__, | |||
| "Failed to prefetch data in current PS mode(cache data when sending)."); | |||
| } | |||
| is_open = true; | |||
| } | |||
| // Data prefetch only when PS mode enables cache. | |||
| if (!ps::PsDataPrefetch::GetInstance().PrefetchData(channel_name_, items[0].data_ptr_, items[0].data_len_, | |||
| items[0].data_type_)) { | |||
| return Status(StatusCode::kMDTimeOut, __LINE__, __FILE__, | |||
| "Failed to prefetch data in current PS mode(cache data when sending)."); | |||
| } | |||
| RETURN_IF_NOT_OK(RetryPushData(handle, items)); | |||
| send_batch++; | |||
| RETURN_IF_NOT_OK(RetryPushData(handle, items)); | |||
| send_batch++; | |||
| #ifndef ENABLE_SECURITY | |||
| if (is_profiling_enable) { | |||
| uint64_t end_time = ProfilingTime::GetCurMilliSecond(); | |||
| // record push data time | |||
| profiling_node->Record(TIME, TDT_PUSH_TIME, send_batch, push_cost, end_time); | |||
| int32_t batch_cost = (int32_t)(end_time - batch_start_time); | |||
| // record batch time | |||
| profiling_node->Record(TIME, BATCH_TIME, send_batch, batch_cost, end_time); | |||
| // record pipeline time | |||
| profiling_node->Record(TIME, PIPELINE_TIME, send_batch, batch_cost - push_cost, end_time); | |||
| batch_start_time = end_time; | |||
| // record connector depth | |||
| profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, send_batch, connector_size, end_time); | |||
| connector_size = gpu_item_connector_->size(); | |||
| connector_capacity = gpu_item_connector_->capacity(); | |||
| } | |||
| if (is_profiling_enable) { | |||
| uint64_t end_time = ProfilingTime::GetCurMilliSecond(); | |||
| // record push data time | |||
| profiling_node->Record(TIME, TDT_PUSH_TIME, send_batch, push_cost, end_time); | |||
| int32_t batch_cost = (int32_t)(end_time - batch_start_time); | |||
| // record batch time | |||
| profiling_node->Record(TIME, BATCH_TIME, send_batch, batch_cost, end_time); | |||
| // record pipeline time | |||
| profiling_node->Record(TIME, PIPELINE_TIME, send_batch, batch_cost - push_cost, end_time); | |||
| batch_start_time = end_time; | |||
| // record connector depth | |||
| profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, send_batch, connector_size, end_time); | |||
| connector_size = gpu_connector_->size(); | |||
| connector_capacity = gpu_connector_->capacity(); | |||
| } | |||
| #endif | |||
| #ifdef ENABLE_DUMP_IR | |||
| md_channel_info_->RecordBatchQueue(gpu_item_connector_->size()); | |||
| md_channel_info_->RecordPreprocessBatch(send_batch); | |||
| md_channel_info_->RecordPushEndTime(); | |||
| md_channel_info_->RecordBatchQueue(gpu_connector_->size()); | |||
| md_channel_info_->RecordPreprocessBatch(send_batch); | |||
| md_channel_info_->RecordPushEndTime(); | |||
| #endif | |||
| if (total_batch_ > 0 && send_batch >= total_batch_) { | |||
| break; | |||
| } | |||
| } else { | |||
| #ifndef ENABLE_SECURITY | |||
| if (is_profiling_enable) { | |||
| tree_->SetEpochEnd(); | |||
| tree_->GetProfilingManager()->RecordEndOfEpoch(send_batch); | |||
| } | |||
| #endif | |||
| if (total_batch_ > 0 && send_batch >= total_batch_) { | |||
| break; | |||
| } | |||
| if (!TaskManager::FindMe()->Interrupted() && !GpuBufferMgr::GetInstance().IsClosed()) { | |||
| auto rc = gpu_item_connector_->Pop(0, &items); | |||
| auto rc = gpu_connector_->Pop(0, &item); | |||
| items = std::move(item.data_item); | |||
| eoe_flag = item.eoe_flag; | |||
| // If the batches send by dataset are more than gpu calculate, gpu will core for no signal notify. | |||
| if (rc.IsError()) { | |||
| GpuBufferMgr::GetInstance().Close(handle); | |||
| @@ -543,25 +548,30 @@ Status DeviceQueueOp::WorkerEntry(int32_t worker_id) { | |||
| uint32_t batch_num = 0; | |||
| RETURN_IF_NOT_OK(receive_queues_[worker_id]->PopFront(¤t_row)); | |||
| while (!current_row.quit() && !GpuBufferMgr::GetInstance().IsClosed()) { | |||
| std::vector<device::DataItemGpu> items; | |||
| for (int i = 0; i < current_row.size(); i++) { | |||
| device::DataItemGpu data_item; | |||
| data_item.data_len_ = static_cast<size_t>(current_row[i]->SizeInBytes()); | |||
| data_item.data_ptr_ = nullptr; | |||
| data_item.worker_id_ = worker_id; | |||
| items.push_back(data_item); | |||
| GpuConnectorItem connector_item = {{}, current_row.eoe()}; | |||
| if (!connector_item.eoe_flag) { | |||
| std::vector<device::DataItemGpu> items; | |||
| for (auto &i : current_row) { | |||
| device::DataItemGpu data_item; | |||
| data_item.data_len_ = static_cast<size_t>(i->SizeInBytes()); | |||
| data_item.data_ptr_ = nullptr; | |||
| data_item.worker_id_ = worker_id; | |||
| items.push_back(data_item); | |||
| } | |||
| RETURN_IF_NOT_OK(MallocForGPUData(&items, current_row, worker_id)); | |||
| connector_item.data_item = std::move(items); | |||
| batch_num++; | |||
| } else { | |||
| MS_LOG(INFO) << "EOE Detected"; | |||
| } | |||
| RETURN_IF_NOT_OK(MallocForGPUData(&items, current_row, worker_id)); | |||
| RETURN_IF_NOT_OK(gpu_item_connector_->Add(worker_id, std::move(items))); | |||
| batch_num++; | |||
| RETURN_IF_NOT_OK(gpu_connector_->Add(worker_id, std::move(connector_item))); | |||
| RETURN_IF_NOT_OK(receive_queues_[worker_id]->PopFront(¤t_row)); | |||
| } | |||
| MS_LOG(INFO) << "Device queue worker id " << worker_id << "proc " << batch_num << "batch."; | |||
| // Add empty vector as quit flag. | |||
| std::vector<device::DataItemGpu> items; | |||
| RETURN_IF_NOT_OK(gpu_item_connector_->Add(worker_id, std::move(items))); | |||
| // Add empty data_item vector with eoe_flag=false as quit flag. | |||
| GpuConnectorItem connector_item = {{}, false}; | |||
| RETURN_IF_NOT_OK(gpu_connector_->Add(worker_id, std::move(connector_item))); | |||
| return Status::OK(); | |||
| } | |||
| @@ -599,12 +609,12 @@ Status DeviceQueueOp::SendDataToGPU() { | |||
| } | |||
| } | |||
| #ifndef ENABLE_SECURITY | |||
| if (current_row.eoe() && tree_->GetProfilingManager()->IsProfilingEnable()) { | |||
| tree_->SetEpochEnd(); | |||
| tree_->GetProfilingManager()->RecordEndOfEpoch(batch_num); | |||
| if (current_row.eoe()) { | |||
| MS_LOG(INFO) << "EOE Detected"; | |||
| TensorRow eoe_flag(TensorRow::kFlagEOE); | |||
| RETURN_IF_NOT_OK(receive_queues_[num_buf % num_workers_]->Add(std::move(eoe_flag))); | |||
| } | |||
| #endif | |||
| if (!TaskManager::FindMe()->Interrupted() && !GpuBufferMgr::GetInstance().IsClosed()) { | |||
| RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(¤t_row)); | |||
| } else { | |||
| @@ -613,6 +623,7 @@ Status DeviceQueueOp::SendDataToGPU() { | |||
| } | |||
| for (uint32_t index = 0; index < num_workers_; index++) { | |||
| MS_LOG(INFO) << "Adding quit flag to Workers"; | |||
| TensorRow quit_flag(TensorRow::kFlagQuit); | |||
| RETURN_IF_NOT_OK(receive_queues_[num_buf++ % num_workers_]->Add(std::move(quit_flag))); | |||
| } | |||
| @@ -152,7 +152,7 @@ class DeviceQueueOp : public PipelineOp { | |||
| QueueList<TensorRow> receive_queues_; | |||
| std::vector<std::shared_ptr<MemoryPool>> pool_; | |||
| std::unique_ptr<GpuItemConnector> gpu_item_connector_; | |||
| std::unique_ptr<GpuConnector> gpu_connector_; | |||
| const uint32_t kDeviceQueGpuNumThreads = 2; | |||
| const uint32_t kDeviceQueGpuQueueCapacity = 8; | |||
| const uint32_t kDeviceQueGpuThreadMemory = 1024; | |||
| @@ -235,7 +235,7 @@ void ExecutionTree::Iterator::PostOrderTraverse(const std::shared_ptr<DatasetOp> | |||
| ExecutionTree::Iterator::Iterator(const std::shared_ptr<DatasetOp> &root) : ind_(0) { | |||
| // post-order traverse the tree, if root is null, it return | |||
| PostOrderTraverse(root); | |||
| nodes_.emplace_back(nullptr); | |||
| (void)nodes_.emplace_back(nullptr); | |||
| } | |||
| // Given the number of workers, launches the worker entry function for each. Essentially a | |||
| @@ -30,34 +30,41 @@ using mindspore::device::DataItemGpu; | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class GpuItemConnector : public Connector<std::vector<device::DataItemGpu>> { | |||
| struct GpuConnectorItem { | |||
| std::vector<device::DataItemGpu> data_item; | |||
| bool eoe_flag; // flag to indicate an EOE item in the connector | |||
| }; | |||
| class GpuConnector : public Connector<GpuConnectorItem> { | |||
| public: | |||
| GpuItemConnector(int32_t num_producers, int32_t num_consumers, int32_t queue_capacity) | |||
| : Connector<std::vector<device::DataItemGpu>>(num_producers, num_consumers, queue_capacity) { | |||
| GpuConnector(int32_t num_producers, int32_t num_consumers, int32_t queue_capacity) | |||
| : Connector<GpuConnectorItem>(num_producers, num_consumers, queue_capacity) { | |||
| for (int i = 0; i < num_producers; i++) { | |||
| is_queue_finished_.push_back(false); | |||
| } | |||
| } | |||
| ~GpuItemConnector() = default; | |||
| ~GpuConnector() = default; | |||
| Status Add(int32_t worker_d, std::vector<device::DataItemGpu> &&element) noexcept { | |||
| return Connector<std::vector<device::DataItemGpu>>::Push(worker_d, std::move(element)); | |||
| Status Add(int32_t worker_d, GpuConnectorItem &&element) noexcept { | |||
| return Connector<GpuConnectorItem>::Push(worker_d, std::move(element)); | |||
| } | |||
| Status Pop(int32_t worker_id, std::vector<device::DataItemGpu> *result) noexcept override { | |||
| Status Pop(int32_t worker_id, GpuConnectorItem *result) noexcept override { | |||
| RETURN_UNEXPECTED_IF_NULL(result); | |||
| { | |||
| MS_ASSERT(worker_id < num_consumers_); | |||
| std::unique_lock<std::mutex> lock(m_); | |||
| RETURN_IF_NOT_OK(cv_.Wait(&lock, [this, worker_id]() { return expect_consumer_ == worker_id; })); | |||
| if (is_queue_finished_[pop_from_]) { | |||
| std::string errMsg = "ERROR: popping from a finished queue in GpuItemConnector"; | |||
| std::string errMsg = "ERROR: popping from a finished queue in GpuConnector"; | |||
| RETURN_STATUS_UNEXPECTED(errMsg); | |||
| } | |||
| RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result)); | |||
| if ((*result).empty()) { | |||
| // empty data_item and eoe_flag=false is EOF | |||
| if ((*result).data_item.empty() && !(*result).eoe_flag) { | |||
| is_queue_finished_[pop_from_] = true; | |||
| } | |||
| @@ -81,5 +88,5 @@ class GpuItemConnector : public Connector<std::vector<device::DataItemGpu>> { | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_GPU_ITEM_CONNECTOR_H_ | |||
| #endif // ENABLE_GPUQUE | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_GPU_ITEM_CONNECTOR_H_ | |||
| @@ -1,9 +1,9 @@ | |||
| add_library(engine-perf OBJECT | |||
| profiling.cc | |||
| monitor.cc | |||
| device_queue_tracing.cc | |||
| connector_size.cc | |||
| dataset_iterator_tracing.cc | |||
| connector_throughput.cc | |||
| cpu_sampling.cc | |||
| ) | |||
| add_library( | |||
| engine-perf OBJECT | |||
| profiling.cc | |||
| monitor.cc | |||
| device_queue_tracing.cc | |||
| connector_size.cc | |||
| dataset_iterator_tracing.cc | |||
| cpu_sampler.cc | |||
| ) | |||
| @@ -15,6 +15,8 @@ | |||
| */ | |||
| #include "minddata/dataset/engine/perf/connector_size.h" | |||
| #include <fstream> | |||
| #include <algorithm> | |||
| #include <memory> | |||
| #include "minddata/dataset/core/config_manager.h" | |||
| #include "minddata/dataset/engine/execution_tree.h" | |||
| #include "minddata/dataset/util/path.h" | |||
| @@ -27,10 +29,12 @@ using Qrow = std::vector<int>; | |||
| // Sample action | |||
| Status ConnectorSize::Sample() { | |||
| Qrow cur_row; | |||
| std::transform(tree_->begin(), tree_->end(), std::back_inserter(cur_row), | |||
| [](DatasetOp &op) { return op.ConnectorSize(); }); | |||
| (void)std::transform(tree_->begin(), tree_->end(), std::back_inserter(cur_row), | |||
| [](DatasetOp &op) { return op.ConnectorSize(); }); | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| // Push new row of sample | |||
| sample_table_.push_back(cur_row); | |||
| (void)ts_.emplace_back(ProfilingTime::GetCurMilliSecond()); | |||
| return Status::OK(); | |||
| } | |||
| @@ -70,8 +74,8 @@ Status ConnectorSize::SaveToFile() { | |||
| // Traverse the ExecutionTree for JSON node generation | |||
| for (auto &node : *tree_) { | |||
| std::vector<int32_t> cur_queue_size; | |||
| std::transform(sample_table_.begin(), sample_table_.end(), std::back_inserter(cur_queue_size), | |||
| [&](const ConnectorSizeSample &sample) { return sample[idx]; }); | |||
| (void)std::transform(sample_table_.begin(), sample_table_.end(), std::back_inserter(cur_queue_size), | |||
| [&](const ConnectorSizeSample &sample) { return sample[idx]; }); | |||
| if (!path.Exists()) { | |||
| json json_node = ParseOpInfo(node, cur_queue_size); | |||
| output["op_info"].push_back(json_node); | |||
| @@ -102,5 +106,37 @@ Status ConnectorSize::Init(const std::string &dir_path, const std::string &devic | |||
| } | |||
| Status ConnectorSize::Analyze() { return Status::OK(); } | |||
| Status ConnectorSize::GetOpConnectorSize(int32_t op_id, uint64_t start_time, uint64_t end_time, | |||
| std::vector<int32_t> *result) { | |||
| MS_LOG(DEBUG) << "Op_id: " << op_id << " start_ts: " << start_time << " end_ts: " << end_time; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_time < end_time, | |||
| "Expected start_time < end_time. Got start_ts: " + std::to_string(start_time) + | |||
| " end_ts: " + std::to_string(end_time)); | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| ts_.size() == sample_table_.size(), | |||
| "Expected ts_.size() == sample_table_.size(). Got ts_.size: " + std::to_string(ts_.size()) + | |||
| " sample_table_.size: " + std::to_string(sample_table_.size())); | |||
| // find first ts that is not less than start_ts | |||
| auto lower = std::lower_bound(ts_.begin(), ts_.end(), start_time); | |||
| // find first ts that is greater than end_ts | |||
| auto upper = std::upper_bound(ts_.begin(), ts_.end(), end_time); | |||
| // get ts_ indices | |||
| auto start_index = std::distance(ts_.begin(), lower); | |||
| auto end_index = std::distance(ts_.begin(), upper); | |||
| MS_LOG(INFO) << "start_index: " << start_index << " end_index: " << end_index; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_index < end_index, | |||
| "Expected start_index < end_index. Got start_index: " + std::to_string(start_index) + | |||
| " end_index: " + std::to_string(end_index)); | |||
| // convert indices to sample_table_ iterator | |||
| auto first_iter = sample_table_.begin() + start_index; | |||
| auto last_iter = sample_table_.begin() + end_index; | |||
| // op_id corresponds to the index in sample vector | |||
| (void)std::transform(first_iter, last_iter, std::back_inserter(*result), | |||
| [&](const ConnectorSizeSample &sample) { return sample[op_id]; }); | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -40,6 +40,7 @@ class ConnectorSize : public Sampling { | |||
| // A circular buffer will be implemented in the future to make this table more flexible. | |||
| using ConnectorSizeSample = std::vector<int>; | |||
| using ConnectorSizeSampleTable = std::vector<ConnectorSizeSample>; | |||
| using Timestamps = std::vector<uint64_t>; | |||
| public: | |||
| explicit ConnectorSize(ExecutionTree *tree) : tree_(tree) {} | |||
| @@ -62,13 +63,17 @@ class ConnectorSize : public Sampling { | |||
| json ParseOpInfo(const DatasetOp &node, const std::vector<int32_t> &size); | |||
| // Change file mode after save throughput data | |||
| Status ChangeFileMode() { return Status::OK(); } | |||
| Status ChangeFileMode() override { return Status::OK(); } | |||
| Status Analyze() override; | |||
| // Get the vector of connector sizes of given op for samples taken between start and end time | |||
| Status GetOpConnectorSize(int32_t op_id, uint64_t start_time, uint64_t end_time, std::vector<int32_t> *result); | |||
| private: | |||
| ExecutionTree *tree_ = nullptr; // ExecutionTree pointer | |||
| ConnectorSizeSampleTable sample_table_; // Dataset structure to store all samples of connector size sampling | |||
| Timestamps ts_; // time of sample | |||
| }; | |||
| } // namespace dataset | |||
| @@ -1,154 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <sys/stat.h> | |||
| #include <iterator> | |||
| #include <algorithm> | |||
| #include <memory> | |||
| #include <string> | |||
| #include <nlohmann/json.hpp> | |||
| #include "minddata/dataset/engine/perf/connector_throughput.h" | |||
| #include "minddata/dataset/engine/execution_tree.h" | |||
| #include "minddata/dataset/util/path.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| // temporary helper | |||
| int ConnectorThroughput::InitNodes() { | |||
| if (tree_ == nullptr) { | |||
| return 0; | |||
| } | |||
| auto it = (*tree_).begin(); | |||
| return it.NumNodes(); | |||
| } | |||
| // Sample action | |||
| Status ConnectorThroughput::Sample() { | |||
| std::vector<int64_t> out_row_count_row(n_nodes_); | |||
| std::vector<double> throughput_row(n_nodes_); | |||
| TimePoint cur_time; // initialised inside the loop, used outside the loop to update prev sample time. | |||
| auto col = 0; | |||
| for (const auto &node : *tree_) { | |||
| auto cur_out_rows_count = node.ConnectorOutRowsCount(); | |||
| out_row_count_row[col] = cur_out_rows_count; | |||
| auto sz = timestamps_.size(); | |||
| cur_time = std::chrono::steady_clock::now(); | |||
| double data_time = 0; | |||
| if (sz > 1) { | |||
| auto full_time = | |||
| std::chrono::duration_cast<std::chrono::microseconds>(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]); | |||
| data_time = std::chrono::duration<double>(full_time).count(); | |||
| } | |||
| auto prev_out_rows_count = out_row_count_table_[col][out_row_count_table_.size() - 1]; | |||
| if (data_time != 0) { | |||
| const int32_t multiplier = 1000; | |||
| auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * data_time); | |||
| throughput_row[col] = thr; | |||
| } else { | |||
| throughput_row[col] = 0; | |||
| } | |||
| col++; | |||
| } | |||
| std::vector<TimePoint> v = {cur_time}; // temporary fix | |||
| timestamps_.AddSample(v); | |||
| // Push new row of sample | |||
| out_row_count_table_.AddSample(out_row_count_row); | |||
| throughput_.AddSample(throughput_row); | |||
| return Status::OK(); | |||
| } | |||
| json ConnectorThroughput::ParseOpInfo(const DatasetOp &node, const std::vector<double> &thr) { | |||
| auto children = node.Children(); | |||
| std::vector<int32_t> children_id; | |||
| std::transform(children.begin(), children.end(), std::back_inserter(children_id), | |||
| [](const std::shared_ptr<DatasetOp> &op) -> int32_t { return op ? op->id() : 0; }); | |||
| json json_node; | |||
| json_node["op_id"] = node.id(); | |||
| json_node["op_type"] = node.Name(); | |||
| json_node["num_workers"] = node.NumWorkers(); | |||
| json metrics; | |||
| // DeviceQueueOp is a special op,it is not inlined but its output queue is invalid. | |||
| // So we should not output its connector throughput. | |||
| if (!node.inlined() && node.Name() != "DeviceQueueOp") { | |||
| metrics["output_queue"] = {{"throughput", thr}}; | |||
| } | |||
| json_node["metrics"] = metrics; | |||
| if (!children_id.empty()) { | |||
| json_node["children"] = children_id; | |||
| } | |||
| return json_node; | |||
| } | |||
| // Save profiling data to file | |||
| // If the file is already exist (created by other sampling node), simply add the data to metrics field. | |||
| Status ConnectorThroughput::SaveToFile() { | |||
| json output; | |||
| RETURN_IF_NOT_OK(ReadJson(&output)); | |||
| Path path = Path(file_path_); | |||
| // Traverse the ExecutionTree for JSON node generation | |||
| int col = 0; | |||
| for (auto &node : *tree_) { | |||
| std::vector<double> throughput; | |||
| if (throughput_.size() > col) { | |||
| for (auto i = 0; i < throughput_[col].size(); i++) { | |||
| throughput.push_back(throughput_[col][i]); | |||
| } | |||
| } | |||
| if (!path.Exists()) { | |||
| json json_node = ParseOpInfo(node, throughput); | |||
| output["op_info"].push_back(json_node); | |||
| } else { | |||
| if (!node.inlined() && node.Name() != "DeviceQueueOp") { | |||
| auto &ops_data = output["op_info"]; | |||
| ops_data[col]["metrics"]["output_queue"]["throughput"] = throughput; | |||
| } | |||
| } | |||
| col++; | |||
| } | |||
| // Discard the content of the file when opening. | |||
| std::ofstream os(file_path_, std::ios::trunc); | |||
| os << output; | |||
| os.close(); | |||
| return Status::OK(); | |||
| } | |||
| Status ConnectorThroughput::Init(const std::string &dir_path, const std::string &device_id) { | |||
| file_path_ = (Path(dir_path) / Path("pipeline_profiling_" + device_id + ".json")).ToString(); | |||
| Path path = Path(file_path_); | |||
| // Remove the file if it exists (from prior profiling usage) | |||
| RETURN_IF_NOT_OK(path.Remove()); | |||
| return Status::OK(); | |||
| } | |||
| Status ConnectorThroughput::ChangeFileMode() { | |||
| if (file_path_.empty()) { | |||
| return Status::OK(); | |||
| } | |||
| if (chmod(common::SafeCStr(file_path_), S_IRUSR | S_IWUSR) == -1) { | |||
| std::string err_str = "Change file mode failed," + file_path_; | |||
| return Status(StatusCode::kMDUnexpectedError, err_str); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status ConnectorThroughput::Analyze() { return Status::OK(); } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -1,92 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CONNECTOR_THROUGHPUT_H | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_CONNECTOR_THROUGHPUT_H | |||
| #include <vector> | |||
| #include <chrono> | |||
| #include <fstream> | |||
| #include <string> | |||
| #include <nlohmann/json.hpp> | |||
| #include "minddata/dataset/engine/perf/profiling.h" | |||
| #include "minddata/dataset/engine/perf/perf_data.h" | |||
| #include "minddata/dataset/engine/perf/cyclic_array.h" | |||
| #include "minddata/dataset/engine/datasetops/dataset_op.h" | |||
| #include "minddata/dataset/engine/execution_tree.h" | |||
| using json = nlohmann::json; | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| // Connector throughput samples the output connector size of each op in the pipeline. | |||
| // For the description of the data structure see perf_data.h | |||
| // It support JSON serialization for external usage. | |||
| class ConnectorThroughput : public Sampling { | |||
| using OutRowCount = PerfData<CyclicArray<int64_t>>; | |||
| using Throughput = PerfData<CyclicArray<double>>; | |||
| using TimePoint = std::chrono::time_point<std::chrono::steady_clock>; | |||
| using TimeStamps = PerfData<CyclicArray<TimePoint>>; | |||
| public: | |||
| explicit ConnectorThroughput(ExecutionTree *tree, int64_t max_rows = 1000000) | |||
| : tree_(tree), | |||
| max_rows_(max_rows), | |||
| n_nodes_(InitNodes()), | |||
| out_row_count_table_(OutRowCount(max_rows_, n_nodes_)), | |||
| throughput_(Throughput(max_rows_, n_nodes_)), | |||
| timestamps_(TimeStamps(max_rows_, 1)) { | |||
| timestamps_.AddSample(std::vector<TimePoint>(1)); | |||
| out_row_count_table_.AddSample(std::vector<int64_t>(n_nodes_)); | |||
| } | |||
| /// \brief Destructor | |||
| ~ConnectorThroughput() = default; | |||
| // Driver function for connector size sampling. | |||
| // This function samples the connector size of every nodes within the ExecutionTree | |||
| Status Sample() override; | |||
| // Traverse the tree nodes and count them | |||
| int InitNodes(); | |||
| std::string Name() const override { return name_; }; | |||
| // Save sampling data to file | |||
| // @return Status The status code returned | |||
| Status SaveToFile() override; | |||
| Status Init(const std::string &dir_path, const std::string &device_id) override; | |||
| json ParseOpInfo(const DatasetOp &node, const std::vector<double> &thr); | |||
| Status ChangeFileMode() override; | |||
| Status Analyze() override; | |||
| private: | |||
| ExecutionTree *tree_ = nullptr; // ExecutionTree pointer | |||
| int64_t max_rows_; | |||
| int32_t n_nodes_; | |||
| OutRowCount out_row_count_table_; | |||
| Throughput throughput_; | |||
| TimeStamps timestamps_; | |||
| std::string name_ = kConnectorThroughputSamplingName; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CONNECTOR_THROUGHPUT_H | |||
| @@ -0,0 +1,511 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/engine/perf/cpu_sampler.h" | |||
| #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) | |||
| #include <sys/syscall.h> | |||
| #endif | |||
| #include <cmath> | |||
| #include <cstdio> | |||
| #include <algorithm> | |||
| #include <utility> | |||
| #include <fstream> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "minddata/dataset/api/python/pybind_conversion.h" | |||
| #include "minddata/dataset/core/config_manager.h" | |||
| #include "minddata/dataset/engine/execution_tree.h" | |||
| #include "minddata/dataset/util/path.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| using json = nlohmann::json; | |||
| #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) | |||
| #define USING_LINUX | |||
| #endif | |||
| #if defined(USING_LINUX) | |||
| int32_t SystemCpuInfo::num_cpu_ = get_nprocs_conf(); | |||
| #else | |||
| int32_t SystemCpuInfo::num_cpu_ = 0; | |||
| #endif | |||
| Status SystemCpuInfo::ParseCpuInfo(const std::string &str) { | |||
| SystemStat system_cpu_stat; | |||
| uint64_t nice = 0; | |||
| uint64_t irq = 0; | |||
| uint64_t softirq = 0; | |||
| if (sscanf_s(str.c_str(), "%*s %lu %lu %lu %lu %lu %lu %lu", &system_cpu_stat.user_stat, &nice, | |||
| &system_cpu_stat.sys_stat, &system_cpu_stat.idle_stat, &system_cpu_stat.io_stat, &irq, | |||
| &softirq) == EOF) { | |||
| return Status(StatusCode::kMDUnexpectedError, "Get System CPU failed."); | |||
| } | |||
| system_cpu_stat.total_stat = system_cpu_stat.user_stat + nice + system_cpu_stat.sys_stat + system_cpu_stat.idle_stat + | |||
| system_cpu_stat.io_stat + irq + softirq; | |||
| SystemUtil system_cpu_util = {0, 0, 0, 0}; | |||
| // Calculate the utilization from the second sampling | |||
| if (!first_sample_) { | |||
| system_cpu_util.user_utilization = round((system_cpu_stat.user_stat - prev_sys_stat_.user_stat) * 1.0 / | |||
| (system_cpu_stat.total_stat - prev_sys_stat_.total_stat) * 100); | |||
| system_cpu_util.sys_utilization = round((system_cpu_stat.sys_stat - prev_sys_stat_.sys_stat) * 1.0 / | |||
| (system_cpu_stat.total_stat - prev_sys_stat_.total_stat) * 100); | |||
| system_cpu_util.io_utilization = round((system_cpu_stat.io_stat - prev_sys_stat_.io_stat) * 1.0 / | |||
| (system_cpu_stat.total_stat - prev_sys_stat_.total_stat) * 100); | |||
| system_cpu_util.idle_utilization = round((system_cpu_stat.idle_stat - prev_sys_stat_.idle_stat) * 1.0 / | |||
| (system_cpu_stat.total_stat - prev_sys_stat_.total_stat) * 100); | |||
| } | |||
| // append the 0 util as well to maintain sys_cpu_util_.size == ts_.size | |||
| (void)sys_cpu_util_.emplace_back(system_cpu_util); | |||
| prev_sys_stat_ = system_cpu_stat; | |||
| return Status::OK(); | |||
| } | |||
| Status SystemCpuInfo::ParseCtxt(const std::string &str) { | |||
| uint64_t ctxt; | |||
| if (sscanf_s(str.c_str(), "%*s %lu", &ctxt) == EOF) { | |||
| return Status(StatusCode::kMDUnexpectedError, "Get context switch count failed."); | |||
| } | |||
| // first context switch count will be 0 | |||
| auto val = first_sample_ ? 0 : ctxt - prev_context_switch_count_; | |||
| context_switch_count_.push_back(val); | |||
| prev_context_switch_count_ = ctxt; | |||
| return Status::OK(); | |||
| } | |||
| Status SystemCpuInfo::ParseRunningProcess(const std::string &str) { | |||
| uint32_t running_process; | |||
| if (sscanf_s(str.c_str(), "%*s %ud", &running_process) == EOF) { | |||
| return Status(StatusCode::kMDUnexpectedError, "Get context switch count failed."); | |||
| } | |||
| running_process_.push_back(running_process); | |||
| return Status::OK(); | |||
| } | |||
| Status SystemCpuInfo::SampleAndGetCurrPrevStat(SystemStat *current_stat, SystemStat *previous_stat) { | |||
| std::ifstream file("/proc/stat"); | |||
| if (!file.is_open()) { | |||
| MS_LOG(INFO) << "Failed to open /proc/stat file"; | |||
| return {StatusCode::kMDUnexpectedError, "Failed to open /proc/stat file"}; | |||
| } | |||
| *previous_stat = prev_sys_stat_; | |||
| bool first_line = true; | |||
| std::string line; | |||
| while (getline(file, line)) { | |||
| if (first_line) { | |||
| first_line = false; | |||
| RETURN_IF_NOT_OK(ParseCpuInfo(line)); | |||
| } | |||
| if (line.find("ctxt") != std::string::npos) { | |||
| RETURN_IF_NOT_OK(ParseCtxt(line)); | |||
| } | |||
| if (line.find("procs_running") != std::string::npos) { | |||
| RETURN_IF_NOT_OK(ParseRunningProcess(line)); | |||
| } | |||
| } | |||
| // after the loop above, prev_sys_stat_ has the current value | |||
| *current_stat = prev_sys_stat_; | |||
| file.close(); | |||
| first_sample_ = false; | |||
| return Status::OK(); | |||
| } | |||
| Status SystemCpuInfo::GetUserCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint8_t> *result) const { | |||
| MS_LOG(DEBUG) << "start_index: " << start_index << " end_index: " << end_index | |||
| << " sys_cpu_util.size: " << sys_cpu_util_.size(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_index < end_index, | |||
| "Expected start_index < end_index. Got start_index: " + std::to_string(start_index) + | |||
| " end_index: " + std::to_string(end_index)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| end_index <= sys_cpu_util_.size(), | |||
| "Expected end_index <= sys_cpu_util_.size(). Got end_index: " + std::to_string(end_index) + | |||
| " sys_cpu_util_.size: " + std::to_string(sys_cpu_util_.size())); | |||
| (void)std::transform(sys_cpu_util_.begin() + start_index, sys_cpu_util_.begin() + end_index, | |||
| std::back_inserter(*result), [&](const SystemUtil &info) { return info.user_utilization; }); | |||
| return Status::OK(); | |||
| } | |||
| Status SystemCpuInfo::GetSysCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint8_t> *result) const { | |||
| MS_LOG(DEBUG) << "start_index: " << start_index << " end_index: " << end_index | |||
| << "sys_cpu_util.size: " << sys_cpu_util_.size(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_index < end_index, | |||
| "Expected start_index < end_index. Got start_index: " + std::to_string(start_index) + | |||
| " end_index: " + std::to_string(end_index)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| end_index <= sys_cpu_util_.size(), | |||
| "Expected end_index <= sys_cpu_util_.size(). Got end_index: " + std::to_string(end_index) + | |||
| " sys_cpu_util_.size: " + std::to_string(sys_cpu_util_.size())); | |||
| (void)std::transform(sys_cpu_util_.begin() + start_index, sys_cpu_util_.begin() + end_index, | |||
| std::back_inserter(*result), [&](const SystemUtil &info) { return info.sys_utilization; }); | |||
| return Status::OK(); | |||
| } | |||
| std::vector<uint8_t> SystemCpuInfo::GetIOCpuUtil() const { | |||
| std::vector<uint8_t> io_util; | |||
| (void)std::transform(sys_cpu_util_.begin(), sys_cpu_util_.end(), std::back_inserter(io_util), | |||
| [&](const SystemUtil &info) { return info.io_utilization; }); | |||
| return io_util; | |||
| } | |||
| std::vector<uint8_t> SystemCpuInfo::GetIdleCpuUtil() const { | |||
| std::vector<uint8_t> idle_util; | |||
| (void)std::transform(sys_cpu_util_.begin(), sys_cpu_util_.end(), std::back_inserter(idle_util), | |||
| [&](const SystemUtil &info) { return info.idle_utilization; }); | |||
| return idle_util; | |||
| } | |||
| std::vector<uint16_t> TaskCpuInfo::GetSysCpuUtil() const { | |||
| std::vector<uint16_t> sys_util; | |||
| (void)std::transform( | |||
| task_cpu_util_.begin(), task_cpu_util_.end(), std::back_inserter(sys_util), [&](const TaskUtil &info) { | |||
| return static_cast<uint16_t>(info.sys_utilization * static_cast<float>(SystemCpuInfo::num_cpu_)); | |||
| }); | |||
| return sys_util; | |||
| } | |||
| std::vector<uint16_t> TaskCpuInfo::GetUserCpuUtil() const { | |||
| std::vector<uint16_t> user_util; | |||
| (void)std::transform( | |||
| task_cpu_util_.begin(), task_cpu_util_.end(), std::back_inserter(user_util), [&](const TaskUtil &info) { | |||
| return static_cast<uint16_t>(info.user_utilization * static_cast<float>(SystemCpuInfo::num_cpu_)); | |||
| }); | |||
| return user_util; | |||
| } | |||
| TaskUtil TaskCpuInfo::GetLatestCpuUtil() const { | |||
| TaskUtil ret = {0, 0}; | |||
| if (!task_cpu_util_.empty() && !last_sampling_failed_) { | |||
| ret = task_cpu_util_.back(); | |||
| } | |||
| return ret; | |||
| } | |||
| Status ProcessCpuInfo::Sample(uint64_t total_time_elapsed) { | |||
| std::ifstream file("/proc/" + std::to_string(pid_) + "/stat"); | |||
| if (!file.is_open()) { | |||
| MS_LOG(INFO) << "Failed to open /proc/" << pid_ << "/stat/ file"; | |||
| last_sampling_failed_ = true; | |||
| return Status::OK(); | |||
| } | |||
| std::string str; | |||
| (void)getline(file, str); | |||
| uint64_t utime = 0, stime = 0; | |||
| if (sscanf_s(str.c_str(), "%*d %*s %*s %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %lu %lu", &utime, &stime) == | |||
| EOF) { | |||
| file.close(); | |||
| last_sampling_failed_ = true; | |||
| return Status(StatusCode::kMDUnexpectedError, "Get device CPU failed."); | |||
| } | |||
| file.close(); | |||
| last_sampling_failed_ = false; | |||
| if (!first_sample_) { | |||
| float user_util = (utime - prev_task_stat_.user_stat) * 1.0 / (total_time_elapsed)*100.0; | |||
| float sys_util = (stime - prev_task_stat_.sys_stat) * 1.0 / (total_time_elapsed)*100.0; | |||
| (void)task_cpu_util_.emplace_back(TaskUtil{user_util, sys_util}); | |||
| } | |||
| prev_task_stat_.user_stat = utime; | |||
| prev_task_stat_.sys_stat = stime; | |||
| first_sample_ = false; | |||
| return Status::OK(); | |||
| } | |||
| Status ThreadCpuInfo::Sample(uint64_t total_time_elapsed) { | |||
| std::ifstream file("/proc/" + std::to_string(pid_) + "/task/" + std::to_string(tid_) + "/stat"); | |||
| if (!file.is_open()) { | |||
| MS_LOG(INFO) << "Failed to open /proc/" << pid_ << "/task/" << tid_ << "/stat file"; | |||
| last_sampling_failed_ = true; | |||
| return Status::OK(); | |||
| } | |||
| std::string str; | |||
| (void)getline(file, str); | |||
| uint64_t utime; | |||
| uint64_t stime; | |||
| if (sscanf_s(str.c_str(), "%*d %*s %*s %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %lu %lu", &utime, &stime) == | |||
| EOF) { | |||
| file.close(); | |||
| last_sampling_failed_ = true; | |||
| return Status(StatusCode::kMDUnexpectedError, "Get thread CPU failed."); | |||
| } | |||
| file.close(); | |||
| last_sampling_failed_ = false; | |||
| if (!first_sample_) { | |||
| float user_util = ((utime - prev_task_stat_.user_stat) * 1.0 / total_time_elapsed) * 100.0; | |||
| float sys_util = ((stime - prev_task_stat_.sys_stat) * 1.0 / total_time_elapsed) * 100.0; | |||
| (void)task_cpu_util_.emplace_back(TaskUtil{user_util, sys_util}); | |||
| } | |||
| prev_task_stat_.user_stat = utime; | |||
| prev_task_stat_.sys_stat = stime; | |||
| first_sample_ = false; | |||
| return Status::OK(); | |||
| } | |||
| bool MDOperatorCpuInfo::TaskExists(pid_t id) const { return task_by_id_.find(id) != task_by_id_.end(); } | |||
| void MDOperatorCpuInfo::AddTask(const std::shared_ptr<TaskCpuInfo> &task_ptr) { | |||
| auto id = task_ptr->GetId(); | |||
| if (!TaskExists(id)) { | |||
| (void)task_by_id_.emplace(id, task_ptr); | |||
| } | |||
| } | |||
| void MDOperatorCpuInfo::CalculateOperatorUtilization() { | |||
| OpUtil op_util{0, 0}; | |||
| for (auto const &[task_id, task_ptr] : task_by_id_) { | |||
| MS_LOG(DEBUG) << "Processing task_id: " << task_id; | |||
| auto task_util = task_ptr->GetLatestCpuUtil(); | |||
| op_util.user_utilization += task_util.user_utilization; | |||
| op_util.sys_utilization += task_util.sys_utilization; | |||
| } | |||
| (void)op_cpu_util_.emplace_back(op_util); | |||
| } | |||
| Status MDOperatorCpuInfo::GetUserCpuUtil(uint64_t start_index, uint64_t end_index, | |||
| std::vector<uint16_t> *result) const { | |||
| MS_LOG(DEBUG) << "start_index: " << start_index << " end_index: " << end_index | |||
| << " op_cpu_util_.size: " << op_cpu_util_.size(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_index < end_index, | |||
| "Expected start_index < end_index. Got start_index: " + std::to_string(start_index) + | |||
| " end_index: " + std::to_string(end_index)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| end_index <= op_cpu_util_.size(), | |||
| "Expected end_index <= op_cpu_util_.size(). Got end_index: " + std::to_string(end_index) + | |||
| " op_cpu_util_.size: " + std::to_string(op_cpu_util_.size())); | |||
| auto first_iter = op_cpu_util_.begin() + start_index; | |||
| auto last_iter = op_cpu_util_.begin() + end_index; | |||
| (void)std::transform(first_iter, last_iter, std::back_inserter(*result), [&](const OpUtil &info) { | |||
| return static_cast<uint16_t>(info.user_utilization * static_cast<float>(SystemCpuInfo::num_cpu_)); | |||
| }); | |||
| return Status::OK(); | |||
| } | |||
| Status MDOperatorCpuInfo::GetSysCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint16_t> *result) const { | |||
| MS_LOG(DEBUG) << "start_index: " << start_index << " end_index: " << end_index | |||
| << " op_cpu_util_.size: " << op_cpu_util_.size(); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_index < end_index, | |||
| "Expected start_index < end_index. Got start_index: " + std::to_string(start_index) + | |||
| " end_index: " + std::to_string(end_index)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED( | |||
| end_index <= op_cpu_util_.size(), | |||
| "Expected end_index <= op_cpu_util_.size(). Got end_index: " + std::to_string(end_index) + | |||
| " op_cpu_util_.size: " + std::to_string(op_cpu_util_.size())); | |||
| auto first_iter = op_cpu_util_.begin() + start_index; | |||
| auto last_iter = op_cpu_util_.begin() + end_index; | |||
| (void)std::transform(first_iter, last_iter, std::back_inserter(*result), [&](const OpUtil &info) { | |||
| return static_cast<uint16_t>(info.sys_utilization * static_cast<float>(SystemCpuInfo::num_cpu_)); | |||
| }); | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampler::Sample() { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| // Function to Update TaskList | |||
| // Loop through all tasks to find any new threads | |||
| // Get all multi-processing Ops from Python only if fetched_all_process = False | |||
| // Create new TaskCpuInfo as required and update OpInfo | |||
| RETURN_IF_NOT_OK(UpdateTaskList()); | |||
| // Sample SystemInfo - Update current and move current to previous stat and calc Util | |||
| SystemStat current_sys_stat; | |||
| SystemStat previous_sys_stat; | |||
| RETURN_IF_NOT_OK(sys_cpu_info_.SampleAndGetCurrPrevStat(¤t_sys_stat, &previous_sys_stat)); | |||
| auto total_time_elapsed = current_sys_stat.total_stat - previous_sys_stat.total_stat; | |||
| // Call Sample on all | |||
| // Read /proc/ files and get stat, calculate util | |||
| for (auto &task_ptr : tasks_) { | |||
| (void)task_ptr->Sample(total_time_elapsed); | |||
| } | |||
| // Calculate OperatorCpuInfo | |||
| for (auto &[op_id, op_info] : op_info_by_id_) { | |||
| MS_LOG(DEBUG) << "Calculate operator cpu utilization for OpId: " << op_id; | |||
| op_info.CalculateOperatorUtilization(); | |||
| } | |||
| // Get sampling time. | |||
| (void)ts_.emplace_back(ProfilingTime::GetCurMilliSecond()); | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampler::UpdateTaskList() { | |||
| List<Task> allTasks = tree->AllTasks()->GetTask(); | |||
| for (auto &task : allTasks) { | |||
| int32_t op_id = task.get_operator_id(); | |||
| // check if the op_info was initialized in Init | |||
| auto iter = op_info_by_id_.find(op_id); | |||
| if (iter != op_info_by_id_.end()) { | |||
| int32_t tid = task.get_linux_id(); | |||
| if (!iter->second.TaskExists(tid)) { | |||
| auto task_cpu_info_ptr = std::make_shared<ThreadCpuInfo>(main_pid_, tid); | |||
| (void)tasks_.emplace_back(task_cpu_info_ptr); | |||
| iter->second.AddTask(task_cpu_info_ptr); | |||
| } | |||
| } | |||
| } | |||
| if (!fetched_all_python_multiprocesses_) { | |||
| py::gil_scoped_acquire gil_acquire; | |||
| py::module ds = py::module::import("mindspore.dataset.engine.datasets"); | |||
| py::tuple process_info = ds.attr("_get_operator_process")(); | |||
| auto sub_process = py::reinterpret_borrow<py::dict>(process_info[0]); | |||
| fetched_all_python_multiprocesses_ = py::reinterpret_borrow<py::bool_>(process_info[1]); | |||
| // parse dict value | |||
| auto op_to_process = toIntMap(sub_process); | |||
| for (auto const &[op_id, process_list] : op_to_process) { | |||
| for (auto pid : process_list) { | |||
| auto iter = op_info_by_id_.find(op_id); | |||
| if (iter != op_info_by_id_.end()) { | |||
| if (!iter->second.TaskExists(pid)) { | |||
| auto task_cpu_info_ptr = std::make_shared<ProcessCpuInfo>(pid); | |||
| (void)tasks_.emplace_back(task_cpu_info_ptr); | |||
| iter->second.AddTask(task_cpu_info_ptr); | |||
| } | |||
| } | |||
| } | |||
| } | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampler::Init(const std::string &dir_path, const std::string &device_id) { | |||
| #if defined(USING_LINUX) | |||
| main_pid_ = syscall(SYS_getpid); | |||
| #endif | |||
| auto path = Path(dir_path) / Path("minddata_cpu_utilization_" + device_id + ".json"); | |||
| // remove file if it already exists | |||
| RETURN_IF_NOT_OK(path.Remove()); | |||
| file_path_ = path.ToString(); | |||
| for (auto iter = tree->begin(); iter != tree->end(); iter++) { | |||
| auto op_id = iter->id(); | |||
| (void)op_info_by_id_.emplace(std::make_pair(op_id, MDOperatorCpuInfo(op_id))); | |||
| } | |||
| // thread id of main thread is same as the process ID | |||
| main_thread_cpu_info_ = std::make_shared<ThreadCpuInfo>(main_pid_, main_pid_); | |||
| (void)tasks_.emplace_back(main_thread_cpu_info_); | |||
| main_process_cpu_info_ = std::make_shared<ProcessCpuInfo>(main_pid_); | |||
| (void)tasks_.emplace_back(main_process_cpu_info_); | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampler::ChangeFileMode() { | |||
| if (chmod(common::SafeCStr(file_path_), S_IRUSR | S_IWUSR) == -1) { | |||
| std::string err_str = "Change file mode failed," + file_path_; | |||
| return Status(StatusCode::kMDUnexpectedError, err_str); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampler::SaveToFile() { | |||
| // construct json obj to write to file | |||
| json output; | |||
| output["cpu_processor_num"] = SystemCpuInfo::num_cpu_; | |||
| std::vector<uint8_t> system_user_util, system_sys_util; | |||
| // end_index = ts_.size() essentially means to get all sampled points | |||
| (void)sys_cpu_info_.GetUserCpuUtil(0, ts_.size(), &system_user_util); | |||
| (void)sys_cpu_info_.GetSysCpuUtil(0, ts_.size(), &system_sys_util); | |||
| output["device_info"] = {{"context_switch_count", sys_cpu_info_.GetContextSwitchCount()}, | |||
| {"idle_utilization", sys_cpu_info_.GetIdleCpuUtil()}, | |||
| {"io_utilization", sys_cpu_info_.GetIOCpuUtil()}, | |||
| {"sys_utilization", system_sys_util}, | |||
| {"user_utilization", system_user_util}, | |||
| {"runnable_process", sys_cpu_info_.GetRunningProcess()}}; | |||
| // array of op_info json objects | |||
| json op_infos; | |||
| for (auto &[op_id, op_info] : op_info_by_id_) { | |||
| MS_LOG(INFO) << "Processing op_id: " << op_id; | |||
| std::vector<uint16_t> user_util, sys_util; | |||
| (void)op_info.GetSysCpuUtil(0, ts_.size(), &sys_util); | |||
| (void)op_info.GetUserCpuUtil(0, ts_.size(), &user_util); | |||
| json op_info_json = {{"metrics", {{"user_utilization", user_util}, {"sys_utilization", sys_util}}}, | |||
| {"op_id", op_id}}; | |||
| op_infos.emplace_back(op_info_json); | |||
| } | |||
| output["op_info"] = op_infos; | |||
| output["process_info"] = {{"user_utilization", main_process_cpu_info_->GetUserCpuUtil()}, | |||
| {"sys_utilization", main_process_cpu_info_->GetSysCpuUtil()}}; | |||
| output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval(); | |||
| output["time_stamp"] = ts_; | |||
| // Discard the content of the file when opening. | |||
| std::ofstream os(file_path_, std::ios::trunc); | |||
| os << output; | |||
| os.close(); | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampler::Analyze() { return Status::OK(); } | |||
| Status CpuSampler::GetOpUserCpuUtil(int32_t op_id, uint64_t start_ts, uint64_t end_ts, std::vector<uint16_t> *result) { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| // find first ts that is not less than start_ts | |||
| auto lower = std::lower_bound(ts_.begin(), ts_.end(), start_ts); | |||
| // find first ts that is greater than end_ts | |||
| auto upper = std::upper_bound(ts_.begin(), ts_.end(), end_ts); | |||
| // std::distance is O(1) since vector allows random access | |||
| auto start_index = std::distance(ts_.begin(), lower); | |||
| auto end_index = std::distance(ts_.begin(), upper); | |||
| auto op_info = op_info_by_id_.find(op_id); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(op_info != op_info_by_id_.end(), "Op Id: " + std::to_string(op_id) + " not found."); | |||
| return op_info->second.GetUserCpuUtil(start_index, end_index, result); | |||
| } | |||
| Status CpuSampler::GetOpSysCpuUtil(int32_t op_id, uint64_t start_ts, uint64_t end_ts, std::vector<uint16_t> *result) { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| // find first ts that is not less than start_ts | |||
| auto lower = std::lower_bound(ts_.begin(), ts_.end(), start_ts); | |||
| // find first ts that is greater than end_ts | |||
| auto upper = std::upper_bound(ts_.begin(), ts_.end(), end_ts); | |||
| // std::distance is O(1) since vector allows random access | |||
| auto start_index = std::distance(ts_.begin(), lower); | |||
| auto end_index = std::distance(ts_.begin(), upper); | |||
| auto op_info = op_info_by_id_.find(op_id); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(op_info != op_info_by_id_.end(), "Op Id: " + std::to_string(op_id) + " not found."); | |||
| return op_info->second.GetSysCpuUtil(start_index, end_index, result); | |||
| } | |||
| Status CpuSampler::GetSystemUserCpuUtil(uint64_t start_ts, uint64_t end_ts, std::vector<uint8_t> *result) { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| // find first ts that is not less than start_ts | |||
| auto lower = std::lower_bound(ts_.begin(), ts_.end(), start_ts); | |||
| // find first ts that is greater than end_ts | |||
| auto upper = std::upper_bound(ts_.begin(), ts_.end(), end_ts); | |||
| // std::distance is O(1) since vector allows random access | |||
| auto start_index = std::distance(ts_.begin(), lower); | |||
| auto end_index = std::distance(ts_.begin(), upper); | |||
| return sys_cpu_info_.GetUserCpuUtil(start_index, end_index, result); | |||
| } | |||
| Status CpuSampler::GetSystemSysCpuUtil(uint64_t start_ts, uint64_t end_ts, std::vector<uint8_t> *result) { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| // find first ts that is not less than start_ts | |||
| auto lower = std::lower_bound(ts_.begin(), ts_.end(), start_ts); | |||
| // find first ts that is greater than end_ts | |||
| auto upper = std::upper_bound(ts_.begin(), ts_.end(), end_ts); | |||
| // std::distance is O(1) since vector allows random access | |||
| auto start_index = std::distance(ts_.begin(), lower); | |||
| auto end_index = std::distance(ts_.begin(), upper); | |||
| return sys_cpu_info_.GetSysCpuUtil(start_index, end_index, result); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -0,0 +1,169 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PERF_CPU_SAMPLER_H_ | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PERF_CPU_SAMPLER_H_ | |||
| #include <memory> | |||
| #include <string> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| #include <nlohmann/json.hpp> | |||
| #include "minddata/dataset/engine/perf/profiling.h" | |||
| #include "minddata/dataset/engine/datasetops/dataset_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class ExecutionTree; | |||
| typedef struct SystemStat_s { | |||
| uint64_t user_stat; | |||
| uint64_t sys_stat; | |||
| uint64_t io_stat; | |||
| uint64_t idle_stat; | |||
| uint64_t total_stat; | |||
| } SystemStat; | |||
| typedef struct SystemUtil_s { | |||
| uint8_t user_utilization; | |||
| uint8_t sys_utilization; | |||
| uint8_t io_utilization; | |||
| uint8_t idle_utilization; | |||
| } SystemUtil; | |||
| typedef struct TaskStat_s { | |||
| uint64_t user_stat; | |||
| uint64_t sys_stat; | |||
| } TaskStat; | |||
| struct TaskUtil_s { | |||
| float user_utilization; | |||
| float sys_utilization; | |||
| }; | |||
| typedef struct TaskUtil_s TaskUtil; | |||
| typedef struct TaskUtil_s OpUtil; | |||
| class SystemCpuInfo { | |||
| public: | |||
| SystemCpuInfo() : first_sample_(true), prev_context_switch_count_(0) {} | |||
| // Read in current stats and return previous and currently read stats | |||
| Status SampleAndGetCurrPrevStat(SystemStat *current_stat, SystemStat *previous_stat); | |||
| static int32_t num_cpu_; | |||
| const std::vector<uint32_t> &GetRunningProcess() const { return running_process_; } | |||
| const std::vector<uint64_t> &GetContextSwitchCount() const { return context_switch_count_; } | |||
| Status GetUserCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint8_t> *result) const; | |||
| Status GetSysCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint8_t> *result) const; | |||
| std::vector<uint8_t> GetIOCpuUtil() const; | |||
| std::vector<uint8_t> GetIdleCpuUtil() const; | |||
| private: | |||
| Status ParseCpuInfo(const std::string &str); | |||
| Status ParseCtxt(const std::string &str); | |||
| Status ParseRunningProcess(const std::string &str); | |||
| SystemStat prev_sys_stat_{}; // last read data /proc/stat file | |||
| std::vector<SystemUtil> sys_cpu_util_; // vector of system cpu utilization | |||
| std::vector<uint32_t> running_process_; // vector of running processes in system | |||
| std::vector<uint64_t> context_switch_count_; // vector of number of context switches between two sampling points | |||
| bool first_sample_; // flag to indicate first time sampling | |||
| uint64_t prev_context_switch_count_; // last read context switch count from /proc/stat file | |||
| }; | |||
| class TaskCpuInfo { | |||
| public: | |||
| explicit TaskCpuInfo(pid_t pid) : pid_(pid), first_sample_(true), last_sampling_failed_(false) {} | |||
| virtual ~TaskCpuInfo() = default; | |||
| virtual Status Sample(uint64_t total_time_elapsed) = 0; | |||
| virtual pid_t GetId() = 0; | |||
| TaskUtil GetLatestCpuUtil() const; | |||
| std::vector<uint16_t> GetSysCpuUtil() const; | |||
| std::vector<uint16_t> GetUserCpuUtil() const; | |||
| protected: | |||
| pid_t pid_; | |||
| TaskStat prev_task_stat_; | |||
| std::vector<TaskUtil> task_cpu_util_; | |||
| bool first_sample_; | |||
| bool last_sampling_failed_; | |||
| }; | |||
| class ProcessCpuInfo : public TaskCpuInfo { | |||
| public: | |||
| explicit ProcessCpuInfo(pid_t pid) : TaskCpuInfo(pid) {} | |||
| ~ProcessCpuInfo() override = default; | |||
| Status Sample(uint64_t total_time_elapsed) override; | |||
| pid_t GetId() override { return pid_; } | |||
| }; | |||
| class ThreadCpuInfo : public TaskCpuInfo { | |||
| public: | |||
| explicit ThreadCpuInfo(pid_t pid, pid_t tid) : TaskCpuInfo(pid), tid_(tid) {} | |||
| ~ThreadCpuInfo() override = default; | |||
| Status Sample(uint64_t total_time_elapsed) override; | |||
| pid_t GetId() override { return tid_; } | |||
| private: | |||
| pid_t tid_; | |||
| }; | |||
| class MDOperatorCpuInfo { | |||
| public: | |||
| void AddTask(const std::shared_ptr<TaskCpuInfo> &task_ptr); | |||
| bool TaskExists(pid_t id) const; | |||
| explicit MDOperatorCpuInfo(const int32_t op_id) : id_(op_id) {} | |||
| void CalculateOperatorUtilization(); | |||
| Status GetUserCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint16_t> *result) const; | |||
| Status GetSysCpuUtil(uint64_t start_index, uint64_t end_index, std::vector<uint16_t> *result) const; | |||
| private: | |||
| int32_t id_; | |||
| // tid is key for threadinfo, pid is key for processinfo | |||
| std::unordered_map<pid_t, std::shared_ptr<TaskCpuInfo>> task_by_id_; | |||
| std::vector<OpUtil> op_cpu_util_; | |||
| }; | |||
| class CpuSampler : public Sampling { | |||
| using Timestamps = std::vector<uint64_t>; | |||
| public: | |||
| explicit CpuSampler(ExecutionTree *tree) : fetched_all_python_multiprocesses_(false), tree(tree) {} | |||
| ~CpuSampler() = default; | |||
| Status Sample() override; | |||
| Status Init(const std::string &dir_path, const std::string &device_id) override; | |||
| Status ChangeFileMode() override; | |||
| Status SaveToFile() override; | |||
| std::string Name() const override { return kCpuSamplerName; } | |||
| Status Analyze() override; | |||
| Status GetSystemUserCpuUtil(uint64_t start_ts, uint64_t end_ts, std::vector<uint8_t> *result); | |||
| Status GetSystemSysCpuUtil(uint64_t start_ts, uint64_t end_ts, std::vector<uint8_t> *result); | |||
| Status GetOpUserCpuUtil(int32_t op_id, uint64_t start_ts, uint64_t end_ts, std::vector<uint16_t> *result); | |||
| Status GetOpSysCpuUtil(int32_t op_id, uint64_t start_ts, uint64_t end_ts, std::vector<uint16_t> *result); | |||
| private: | |||
| Status UpdateTaskList(); | |||
| bool fetched_all_python_multiprocesses_{}; | |||
| ExecutionTree *tree = nullptr; | |||
| pid_t main_pid_{}; | |||
| Timestamps ts_; | |||
| SystemCpuInfo sys_cpu_info_; // store the system cpu utilization | |||
| std::vector<std::shared_ptr<TaskCpuInfo>> tasks_; // vector of all process and thread tasks | |||
| std::shared_ptr<ThreadCpuInfo> main_thread_cpu_info_; | |||
| std::shared_ptr<ProcessCpuInfo> main_process_cpu_info_; | |||
| std::unordered_map<int32_t, MDOperatorCpuInfo> op_info_by_id_; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_PERF_CPU_SAMPLER_H_ | |||
| @@ -1,699 +0,0 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/engine/perf/cpu_sampling.h" | |||
| #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) | |||
| #include <sys/syscall.h> | |||
| #endif | |||
| #include <cmath> | |||
| #include <cstdio> | |||
| #include <algorithm> | |||
| #include <fstream> | |||
| #include <memory> | |||
| #include <string> | |||
| #include "minddata/dataset/api/python/pybind_conversion.h" | |||
| #include "minddata/dataset/core/config_manager.h" | |||
| #include "minddata/dataset/engine/execution_tree.h" | |||
| #include "minddata/dataset/util/path.h" | |||
| using json = nlohmann::json; | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| bool BaseCpu::fetched_all_process_shared_ = false; | |||
| std::unordered_map<int32_t, std::vector<pid_t>> BaseCpu::op_process_shared_ = {}; | |||
| #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) | |||
| #define USING_LINUX | |||
| #endif | |||
| BaseCpu::BaseCpu() { | |||
| pre_cpu_stat_.user_stat_ = 0; | |||
| pre_cpu_stat_.sys_stat_ = 0; | |||
| pre_cpu_stat_.io_stat_ = 0; | |||
| pre_cpu_stat_.idle_stat_ = 0; | |||
| pre_cpu_stat_.total_stat_ = 0; | |||
| fetched_all_process_ = false; | |||
| pre_fetched_state_ = false; | |||
| cpu_processor_num_ = 0; | |||
| } | |||
| Status BaseCpu::GetTotalCpuTime(uint64_t *total_stat) { | |||
| std::ifstream file("/proc/stat"); | |||
| if (!file.is_open()) { | |||
| MS_LOG(INFO) << "Open CPU file failed when collect CPU information"; | |||
| return Status::OK(); | |||
| } | |||
| std::string str; | |||
| getline(file, str); | |||
| uint64_t user = 0, sys = 0, idle = 0, iowait = 0, nice = 0, irq = 0, softirq = 0; | |||
| if (sscanf_s(str.c_str(), "%*s %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &sys, &idle, &iowait, &irq, &softirq) == | |||
| EOF) { | |||
| file.close(); | |||
| return Status(StatusCode::kMDUnexpectedError, "Get device CPU failed."); | |||
| } | |||
| file.close(); | |||
| *total_stat = user + nice + sys + idle + iowait + irq + softirq; | |||
| return Status::OK(); | |||
| } | |||
| Status DeviceCpu::ParseCpuInfo(const std::string &str) { | |||
| CpuStat cpu_stat; | |||
| uint64_t nice = 0; | |||
| uint64_t irq = 0; | |||
| uint64_t softirq = 0; | |||
| if (sscanf_s(str.c_str(), "%*s %lu %lu %lu %lu %lu %lu %lu", &cpu_stat.user_stat_, &nice, &cpu_stat.sys_stat_, | |||
| &cpu_stat.idle_stat_, &cpu_stat.io_stat_, &irq, &softirq) == EOF) { | |||
| return Status(StatusCode::kMDUnexpectedError, "Get device CPU failed."); | |||
| } | |||
| cpu_stat.total_stat_ = | |||
| cpu_stat.user_stat_ + nice + cpu_stat.sys_stat_ + cpu_stat.idle_stat_ + cpu_stat.io_stat_ + irq + softirq; | |||
| // Calculate the utilization from the second sampling | |||
| if (!first_collect_) { | |||
| CpuUtil info; | |||
| info.user_utilization_ = round((cpu_stat.user_stat_ - pre_cpu_stat_.user_stat_) * 1.0 / | |||
| (cpu_stat.total_stat_ - pre_cpu_stat_.total_stat_) * 100); | |||
| info.sys_utilization_ = round((cpu_stat.sys_stat_ - pre_cpu_stat_.sys_stat_) * 1.0 / | |||
| (cpu_stat.total_stat_ - pre_cpu_stat_.total_stat_) * 100); | |||
| info.io_utilization_ = round((cpu_stat.io_stat_ - pre_cpu_stat_.io_stat_) * 1.0 / | |||
| (cpu_stat.total_stat_ - pre_cpu_stat_.total_stat_) * 100); | |||
| info.idle_utilization_ = round((cpu_stat.idle_stat_ - pre_cpu_stat_.idle_stat_) * 1.0 / | |||
| (cpu_stat.total_stat_ - pre_cpu_stat_.total_stat_) * 100); | |||
| cpu_util_.emplace_back(info); | |||
| } | |||
| pre_cpu_stat_.user_stat_ = cpu_stat.user_stat_; | |||
| pre_cpu_stat_.sys_stat_ = cpu_stat.sys_stat_; | |||
| pre_cpu_stat_.io_stat_ = cpu_stat.io_stat_; | |||
| pre_cpu_stat_.idle_stat_ = cpu_stat.idle_stat_; | |||
| pre_cpu_stat_.total_stat_ = cpu_stat.total_stat_; | |||
| return Status::OK(); | |||
| } | |||
| Status DeviceCpu::ParseCtxt(const std::string &str) { | |||
| uint64_t ctxt; | |||
| if (sscanf_s(str.c_str(), "%*s %lu", &ctxt) == EOF) { | |||
| return Status(StatusCode::kMDUnexpectedError, "Get context switch count failed."); | |||
| } | |||
| // Calculate the utilization from the second sampling | |||
| if (!first_collect_) { | |||
| context_switch_count_.push_back(ctxt - pre_context_switch_count_); | |||
| } | |||
| pre_context_switch_count_ = ctxt; | |||
| return Status::OK(); | |||
| } | |||
| Status DeviceCpu::ParseRunningProcess(const std::string &str) { | |||
| uint32_t running_process; | |||
| if (sscanf_s(str.c_str(), "%*s %ud", &running_process) == EOF) { | |||
| return Status(StatusCode::kMDUnexpectedError, "Get context switch count failed."); | |||
| } | |||
| // Drop the first value in order to collect same amount of CPU utilization | |||
| if (!first_collect_) { | |||
| running_process_.push_back(running_process); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status DeviceCpu::Collect(const ExecutionTree *tree) { | |||
| std::ifstream file("/proc/stat"); | |||
| if (!file.is_open()) { | |||
| MS_LOG(INFO) << "Open CPU file failed when collect CPU information"; | |||
| return Status::OK(); | |||
| } | |||
| bool first_line = true; | |||
| std::string line; | |||
| while (getline(file, line)) { | |||
| if (first_line) { | |||
| first_line = false; | |||
| RETURN_IF_NOT_OK(ParseCpuInfo(line)); | |||
| } | |||
| if (line.find("ctxt") != std::string::npos) { | |||
| RETURN_IF_NOT_OK(ParseCtxt(line)); | |||
| } | |||
| if (line.find("procs_running") != std::string::npos) { | |||
| RETURN_IF_NOT_OK(ParseRunningProcess(line)); | |||
| } | |||
| } | |||
| file.close(); | |||
| first_collect_ = false; | |||
| return Status::OK(); | |||
| } | |||
| Status DeviceCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { | |||
| RETURN_UNEXPECTED_IF_NULL(name); | |||
| name->clear(); | |||
| name->append("device_info"); | |||
| int total_samples = cpu_util_.size(); | |||
| int sum = 0; | |||
| // Only analyze the middle half of the samples | |||
| // Starting and ending may be impacted by startup or ending pipeline activities | |||
| int start_analyze = total_samples / 4; | |||
| int end_analyze = total_samples - start_analyze; | |||
| for (int i = start_analyze; i < end_analyze; i++) { | |||
| sum += cpu_util_[i].user_utilization_; | |||
| sum += cpu_util_[i].sys_utilization_; | |||
| } | |||
| // Note device utilization is already in range of 0-1, so don't | |||
| // need to divide by number of CPUS | |||
| if ((end_analyze - start_analyze) > 0) { | |||
| *utilization = sum / (end_analyze - start_analyze); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status DeviceCpu::SaveToFile(const std::string &file_path) { | |||
| Path path = Path(file_path); | |||
| json output; | |||
| if (path.Exists()) { | |||
| MS_LOG(DEBUG) << file_path << " exists already"; | |||
| try { | |||
| std::ifstream file(file_path); | |||
| file >> output; | |||
| } catch (const std::exception &err) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + file_path + | |||
| ", please delete it and try again!"); | |||
| } | |||
| } else { | |||
| output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval(); | |||
| } | |||
| std::vector<int8_t> user_util; | |||
| std::transform(cpu_util_.begin(), cpu_util_.end(), std::back_inserter(user_util), | |||
| [&](const CpuUtil &info) { return info.user_utilization_; }); | |||
| std::vector<int8_t> sys_util; | |||
| std::transform(cpu_util_.begin(), cpu_util_.end(), std::back_inserter(sys_util), | |||
| [&](const CpuUtil &info) { return info.sys_utilization_; }); | |||
| std::vector<int8_t> io_util; | |||
| std::transform(cpu_util_.begin(), cpu_util_.end(), std::back_inserter(io_util), | |||
| [&](const CpuUtil &info) { return info.io_utilization_; }); | |||
| std::vector<int8_t> idle_util; | |||
| std::transform(cpu_util_.begin(), cpu_util_.end(), std::back_inserter(idle_util), | |||
| [&](const CpuUtil &info) { return info.idle_utilization_; }); | |||
| output["device_info"] = {{"user_utilization", user_util}, | |||
| {"sys_utilization", sys_util}, | |||
| {"io_utilization", io_util}, | |||
| {"idle_utilization", idle_util}, | |||
| {"runable_processes", running_process_}, | |||
| {"context_switch_count", context_switch_count_}}; | |||
| // Discard the content of the file when opening. | |||
| std::ofstream os(file_path, std::ios::trunc); | |||
| os << output; | |||
| os.close(); | |||
| MS_LOG(INFO) << "Save device CPU success."; | |||
| return Status::OK(); | |||
| } | |||
| Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id, | |||
| std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> *op_stat) { | |||
| RETURN_UNEXPECTED_IF_NULL(op_stat); | |||
| pid_t pid = 0; | |||
| #if defined(USING_LINUX) | |||
| pid = syscall(SYS_getpid); | |||
| #endif | |||
| std::string stat_path = "/proc/" + std::to_string(pid) + "/task/" + std::to_string(thread_id) + "/stat"; | |||
| // Judge whether file exist first | |||
| Path temp_path(stat_path); | |||
| if (!temp_path.Exists()) { | |||
| (*op_stat)[op_id][thread_id].user_stat_ = 0; | |||
| (*op_stat)[op_id][thread_id].sys_stat_ = 0; | |||
| return Status(StatusCode::kMDFileNotExist); | |||
| } | |||
| std::ifstream file(stat_path); | |||
| if (!file.is_open()) { | |||
| MS_LOG(INFO) << "Open CPU file failed when collect CPU information"; | |||
| return Status::OK(); | |||
| } | |||
| std::string str; | |||
| getline(file, str); | |||
| uint64_t utime; | |||
| uint64_t stime; | |||
| if (sscanf_s(str.c_str(), "%*d %*s %*s %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %lu %lu", &utime, &stime) == | |||
| EOF) { | |||
| file.close(); | |||
| return Status(StatusCode::kMDUnexpectedError, "Get device CPU failed."); | |||
| } | |||
| file.close(); | |||
| (*op_stat)[op_id][thread_id].user_stat_ = utime; | |||
| (*op_stat)[op_id][thread_id].sys_stat_ = stime; | |||
| return Status::OK(); | |||
| } | |||
| Status OperatorCpu::Collect(const ExecutionTree *tree) { | |||
| RETURN_UNEXPECTED_IF_NULL(tree); | |||
| if (first_collect_) { | |||
| for (auto iter = tree->begin(); iter != tree->end(); ++iter) { | |||
| id_count_++; | |||
| op_name_[iter->id()] = iter->NameWithID(); | |||
| op_parallel_workers_[iter->id()] = iter->NumWorkers(); | |||
| } | |||
| #if defined(USING_LINUX) | |||
| cpu_processor_num_ = get_nprocs_conf(); | |||
| #endif | |||
| } | |||
| // Obtain the op and thread mapping | |||
| op_thread_.clear(); | |||
| List<Task> allTasks = tree->AllTasks()->GetTask(); | |||
| for (auto &task1 : allTasks) { | |||
| int32_t op_id = task1.get_operator_id(); | |||
| op_thread_[op_id].emplace_back(task1.get_linux_id()); | |||
| } | |||
| // add process id into op_thread | |||
| if (!fetched_all_process_) { | |||
| { | |||
| py::gil_scoped_acquire gil_acquire; | |||
| py::module ds = py::module::import("mindspore.dataset.engine.datasets"); | |||
| py::tuple process_info = ds.attr("_get_operator_process")(); | |||
| py::dict sub_process = py::reinterpret_borrow<py::dict>(process_info[0]); | |||
| fetched_all_process_ = py::reinterpret_borrow<py::bool_>(process_info[1]); | |||
| // parse dict value | |||
| op_process_ = toIntMap(sub_process); | |||
| BaseCpu::op_process_shared_ = op_process_; | |||
| BaseCpu::fetched_all_process_shared_ = fetched_all_process_; | |||
| } | |||
| // judge whether there is device_que operator, if so operator id may need increase by one, temp use directly | |||
| for (auto item : op_process_) { | |||
| if (!item.second.empty()) { | |||
| if (op_thread_.find(item.first) != op_thread_.end()) { | |||
| op_thread_[item.first].insert(op_thread_[item.first].end(), item.second.begin(), item.second.end()); | |||
| } else { | |||
| op_thread_[item.first] = item.second; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| uint64_t total_stat_; | |||
| RETURN_IF_NOT_OK(GetTotalCpuTime(&total_stat_)); | |||
| std::vector<CpuOpUtil> cpu_step_util_; | |||
| std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> op_stat_; | |||
| if (!first_collect_) { | |||
| // obtain all the op id in current tasks | |||
| std::vector<int32_t> total_op_id; | |||
| (void)std::transform(op_thread_.begin(), op_thread_.end(), std::back_inserter(total_op_id), | |||
| [](const auto &iter) { return iter.first; }); | |||
| // iter all the op, and obtain the CPU utilization of each operator | |||
| for (auto op_id = -1; op_id < id_count_; op_id++) { | |||
| float user_util = 0, sys_util = 0; | |||
| auto iter = std::find(total_op_id.begin(), total_op_id.end(), op_id); | |||
| if (iter != total_op_id.end()) { | |||
| for (auto thread_id : op_thread_[op_id]) { | |||
| if (ParseCpuInfo(op_id, thread_id, &op_stat_) == Status::OK()) { | |||
| user_util += (op_stat_[op_id][thread_id].user_stat_ - pre_op_stat_[op_id][thread_id].user_stat_) * 1.0 / | |||
| (total_stat_ - pre_total_stat_) * 100; | |||
| sys_util += (op_stat_[op_id][thread_id].sys_stat_ - pre_op_stat_[op_id][thread_id].sys_stat_) * 1.0 / | |||
| (total_stat_ - pre_total_stat_) * 100; | |||
| } | |||
| } | |||
| } | |||
| CpuOpUtil info; | |||
| info.op_id_ = op_id; | |||
| info.sys_utilization_ = sys_util; | |||
| info.user_utilization_ = user_util; | |||
| cpu_step_util_.emplace_back(info); | |||
| } | |||
| cpu_op_util_.emplace_back(cpu_step_util_); | |||
| } else { | |||
| // mainly obtain the init CPU execute time in first collect | |||
| for (const auto &iter : op_thread_) { | |||
| int32_t op_id = iter.first; | |||
| for (auto thread_id_ : iter.second) { | |||
| // ParseCpuInfo may execute failed for cpu data not ready, but we still get next thread cpu info | |||
| (void)ParseCpuInfo(op_id, thread_id_, &op_stat_); | |||
| } | |||
| } | |||
| } | |||
| // copy current op_stat into pre_op_stat | |||
| pre_op_stat_ = op_stat_; | |||
| pre_total_stat_ = total_stat_; | |||
| first_collect_ = false; | |||
| return Status::OK(); | |||
| } | |||
| Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { | |||
| RETURN_UNEXPECTED_IF_NULL(name); | |||
| RETURN_UNEXPECTED_IF_NULL(extra_message); | |||
| int total_samples = cpu_op_util_.size(); | |||
| // Only analyze the middle half of the samples | |||
| // Starting and ending may be impacted by startup or ending pipeline activities | |||
| constexpr int64_t sample_sections = 4; | |||
| int64 start_analyze = total_samples / sample_sections; | |||
| int64 end_analyze = total_samples - start_analyze; | |||
| double op_util = 0; | |||
| *utilization = 0; | |||
| // start loop from 0 was as don't want to analyze op -1 | |||
| for (auto op_id = 0; op_id < id_count_; op_id++) { | |||
| int64 sum = 0; | |||
| int64 index = op_id + 1; | |||
| for (int i = start_analyze; i < end_analyze; i++) { | |||
| sum += cpu_op_util_[i][index].user_utilization_; | |||
| sum += cpu_op_util_[i][index].sys_utilization_; | |||
| } | |||
| if ((end_analyze - start_analyze) > 0) { | |||
| op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers_[op_id] * (end_analyze - start_analyze)); | |||
| } | |||
| if (op_util > *utilization) { | |||
| *utilization = op_util; | |||
| name->clear(); | |||
| (void)name->append(op_name_[op_id]); | |||
| } | |||
| (void)extra_message->append(op_name_[op_id] + " utilization per thread: " + std::to_string(op_util) + "% (" + | |||
| std::to_string(op_parallel_workers_[op_id]) + " parallel_workers); "); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status OperatorCpu::SaveToFile(const std::string &file_path) { | |||
| Path path = Path(file_path); | |||
| json output; | |||
| if (path.Exists()) { | |||
| MS_LOG(DEBUG) << file_path << "already exist."; | |||
| try { | |||
| std::ifstream file(file_path); | |||
| file >> output; | |||
| } catch (const std::exception &err) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + file_path + | |||
| ", please delete it and try again!"); | |||
| } | |||
| } | |||
| uint8_t index = 0; | |||
| json OpWriter; | |||
| for (auto op_id = -1; op_id < id_count_; op_id++) { | |||
| std::vector<uint16_t> user_util; | |||
| std::vector<uint16_t> sys_util; | |||
| std::transform( | |||
| cpu_op_util_.begin(), cpu_op_util_.end(), std::back_inserter(user_util), | |||
| [&](const std::vector<CpuOpUtil> &info) { return int16_t(info[index].user_utilization_ * cpu_processor_num_); }); | |||
| std::transform( | |||
| cpu_op_util_.begin(), cpu_op_util_.end(), std::back_inserter(sys_util), | |||
| [&](const std::vector<CpuOpUtil> &info) { return int16_t(info[index].sys_utilization_ * cpu_processor_num_); }); | |||
| json per_op_info = {{"metrics", {{"user_utilization", user_util}, {"sys_utilization", sys_util}}}, | |||
| {"op_id", op_id}}; | |||
| OpWriter.emplace_back(per_op_info); | |||
| index++; | |||
| } | |||
| output["op_info"] = OpWriter; | |||
| // Discard the content of the file when opening. | |||
| std::ofstream os(file_path, std::ios::trunc); | |||
| os << output; | |||
| os.close(); | |||
| MS_LOG(INFO) << "Save device CPU success."; | |||
| return Status::OK(); | |||
| } | |||
| Status ProcessCpu::ParseCpuInfo() { | |||
| uint64_t total_stat_; | |||
| RETURN_IF_NOT_OK(GetTotalCpuTime(&total_stat_)); | |||
| if (!pre_fetched_state_) { | |||
| process_id_.clear(); | |||
| pid_t main_pid = 0; | |||
| #if defined(USING_LINUX) | |||
| main_pid = syscall(SYS_getpid); | |||
| #endif | |||
| process_id_.emplace_back(main_pid); | |||
| op_process_ = BaseCpu::op_process_shared_; | |||
| fetched_all_process_ = BaseCpu::fetched_all_process_shared_; | |||
| for (const auto &item : op_process_) { | |||
| for (const auto &id : item.second) { | |||
| process_id_.emplace_back(id); | |||
| } | |||
| } | |||
| } | |||
| float user_util = 0, sys_util = 0; | |||
| for (const auto &pid : process_id_) { | |||
| std::string stat_path = "/proc/" + std::to_string(pid) + "/stat"; | |||
| std::ifstream file(stat_path); | |||
| if (!file.is_open()) { | |||
| MS_LOG(INFO) << "Open CPU file failed when collect CPU information"; | |||
| continue; | |||
| } | |||
| std::string str; | |||
| getline(file, str); | |||
| uint64_t user = 0, sys = 0; | |||
| if (sscanf_s(str.c_str(), "%*d %*s %*s %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %lu %lu", &user, &sys) == | |||
| EOF) { | |||
| file.close(); | |||
| return Status(StatusCode::kMDUnexpectedError, "Get device CPU failed."); | |||
| } | |||
| file.close(); | |||
| // Calculate the utilization from the second sampling | |||
| if (!first_collect_ && (pre_process_stat_.find(pid) != pre_process_stat_.end())) { | |||
| user_util += (user - pre_process_stat_[pid].user_stat_) * 1.0 / (total_stat_ - pre_total_stat_) * 100; | |||
| sys_util += (sys - pre_process_stat_[pid].sys_stat_) * 1.0 / (total_stat_ - pre_total_stat_) * 100; | |||
| } | |||
| pre_process_stat_[pid].user_stat_ = user; | |||
| pre_process_stat_[pid].sys_stat_ = sys; | |||
| } | |||
| if (!first_collect_) { | |||
| CpuProcessUtil info; | |||
| info.user_utilization_ = user_util; | |||
| info.sys_utilization_ = sys_util; | |||
| process_util_.emplace_back(info); | |||
| } | |||
| pre_total_stat_ = total_stat_; | |||
| first_collect_ = false; | |||
| pre_fetched_state_ = fetched_all_process_; | |||
| return Status::OK(); | |||
| } | |||
| Status ProcessCpu::Collect(const ExecutionTree *tree) { | |||
| RETURN_UNEXPECTED_IF_NULL(tree); | |||
| if (first_collect_) { | |||
| #if defined(USING_LINUX) | |||
| cpu_processor_num_ = get_nprocs_conf(); | |||
| #endif | |||
| } | |||
| RETURN_IF_NOT_OK(ParseCpuInfo()); | |||
| return Status::OK(); | |||
| } | |||
| Status ProcessCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { | |||
| RETURN_UNEXPECTED_IF_NULL(name); | |||
| RETURN_UNEXPECTED_IF_NULL(utilization); | |||
| RETURN_UNEXPECTED_IF_NULL(extra_message); | |||
| name->clear(); | |||
| name->append("process_info"); | |||
| int total_samples = process_util_.size(); | |||
| int64 sum = 0; | |||
| // Only analyze the middle half of the samples | |||
| // Starting and ending may be impacted by startup or ending pipeline activities | |||
| constexpr int64_t sample_sections = 4; | |||
| int64 start_analyze = total_samples / sample_sections; | |||
| int64 end_analyze = total_samples - start_analyze; | |||
| for (int i = start_analyze; i < end_analyze; i++) { | |||
| sum += process_util_[i].user_utilization_; | |||
| sum += process_util_[i].sys_utilization_; | |||
| } | |||
| if ((end_analyze - start_analyze) > 0) { | |||
| *utilization = sum / (end_analyze - start_analyze); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status ProcessCpu::SaveToFile(const std::string &file_path) { | |||
| Path path = Path(file_path); | |||
| json output; | |||
| if (path.Exists()) { | |||
| MS_LOG(DEBUG) << file_path << "already exist."; | |||
| try { | |||
| std::ifstream file(file_path); | |||
| file >> output; | |||
| } catch (const std::exception &err) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + file_path + | |||
| ", please delete it and try again!"); | |||
| } | |||
| } else { | |||
| output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval(); | |||
| } | |||
| std::vector<int16_t> user_util; | |||
| std::transform(process_util_.begin(), process_util_.end(), std::back_inserter(user_util), | |||
| [&](const CpuProcessUtil &info) { return uint16_t(info.user_utilization_ * cpu_processor_num_); }); | |||
| std::vector<int16_t> sys_util; | |||
| std::transform(process_util_.begin(), process_util_.end(), std::back_inserter(sys_util), | |||
| [&](const CpuProcessUtil &info) { return uint16_t(info.sys_utilization_ * cpu_processor_num_); }); | |||
| output["process_info"] = {{"user_utilization", user_util}, {"sys_utilization", sys_util}}; | |||
| output["cpu_processor_num"] = cpu_processor_num_; | |||
| // Discard the content of the file when opening. | |||
| std::ofstream os(file_path, std::ios::trunc); | |||
| os << output; | |||
| os.close(); | |||
| MS_LOG(INFO) << "Save process CPU success."; | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampling::CollectTimeStamp() { | |||
| time_stamp_.emplace_back(ProfilingTime::GetCurMilliSecond()); | |||
| return Status::OK(); | |||
| } | |||
| // Sample action | |||
| Status CpuSampling::Sample() { | |||
| // Collect cpu information | |||
| for (auto cpu : cpu_) { | |||
| RETURN_IF_NOT_OK(cpu->Collect(this->tree_)); | |||
| } | |||
| // Collect time stamp | |||
| RETURN_IF_NOT_OK(CollectTimeStamp()); | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampling::SaveTimeStampToFile() { | |||
| // Save time stamp to json file | |||
| // If the file is already exist, simply add the data to corresponding field. | |||
| Path path = Path(file_path_); | |||
| json output; | |||
| if (path.Exists()) { | |||
| try { | |||
| std::ifstream file(file_path_); | |||
| file >> output; | |||
| } catch (const std::exception &err) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + file_path_ + | |||
| ", please delete it and try again!"); | |||
| } | |||
| } | |||
| output["time_stamp"] = time_stamp_; | |||
| std::ofstream os(file_path_, std::ios::trunc); | |||
| os << output; | |||
| os.close(); | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampling::SaveSamplingItervalToFile() { | |||
| // If the file is already exist, simply add the data to corresponding field. | |||
| Path path = Path(file_path_); | |||
| json output; | |||
| if (path.Exists()) { | |||
| try { | |||
| std::ifstream file(file_path_); | |||
| file >> output; | |||
| } catch (const std::exception &err) { | |||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + file_path_ + | |||
| ", please delete it and try again!"); | |||
| } | |||
| } | |||
| output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval(); | |||
| std::ofstream os(file_path_, std::ios::trunc); | |||
| os << output; | |||
| os.close(); | |||
| return Status::OK(); | |||
| } | |||
| // Analyze profiling data and output warning messages | |||
| Status CpuSampling::Analyze() { | |||
| std::string name; | |||
| double utilization = 0; | |||
| constexpr double total_cpu_thold = 90; | |||
| constexpr double op_cpu_thold = 80; | |||
| // Keep track of specific information returned by differentn CPU sampling types | |||
| double total_utilization = 0; | |||
| double max_op_utilization = 0; | |||
| std::string max_op_name; | |||
| std::string detailed_op_cpu_message; | |||
| // Save cpu information to json file | |||
| for (auto cpu : cpu_) { | |||
| std::string extra_message; | |||
| RETURN_IF_NOT_OK(cpu->Analyze(&name, &utilization, &extra_message)); | |||
| if (name == "device_info") { | |||
| total_utilization = utilization; | |||
| } else if (name != "process_info") { | |||
| max_op_utilization = utilization; | |||
| max_op_name = name; | |||
| detailed_op_cpu_message = extra_message; | |||
| } | |||
| } | |||
| if ((total_utilization < total_cpu_thold) && (max_op_utilization > op_cpu_thold)) { | |||
| MS_LOG(WARNING) << "Operator " << max_op_name << " is using " << max_op_utilization << "% CPU per thread. " | |||
| << "This operator may benefit from increasing num_parallel_workers." | |||
| << "Full Operator CPU utiliization for all operators: " << detailed_op_cpu_message << std::endl; | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| // Save profiling data to file | |||
| Status CpuSampling::SaveToFile() { | |||
| // Save time stamp to json file | |||
| RETURN_IF_NOT_OK(SaveTimeStampToFile()); | |||
| // Save time stamp to json file | |||
| RETURN_IF_NOT_OK(SaveSamplingItervalToFile()); | |||
| // Save cpu information to json file | |||
| for (auto cpu : cpu_) { | |||
| RETURN_IF_NOT_OK(cpu->SaveToFile(file_path_)); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampling::Init(const std::string &dir_path, const std::string &device_id) { | |||
| file_path_ = (Path(dir_path) / Path("minddata_cpu_utilization_" + device_id + ".json")).ToString(); | |||
| std::shared_ptr<DeviceCpu> device_cpu = std::make_shared<DeviceCpu>(); | |||
| std::shared_ptr<OperatorCpu> operator_cpu = std::make_shared<OperatorCpu>(); | |||
| std::shared_ptr<ProcessCpu> process_cpu = std::make_shared<ProcessCpu>(); | |||
| cpu_.push_back(device_cpu); | |||
| cpu_.push_back(operator_cpu); | |||
| cpu_.push_back(process_cpu); | |||
| return Status::OK(); | |||
| } | |||
| Status CpuSampling::ChangeFileMode() { | |||
| if (chmod(common::SafeCStr(file_path_), S_IRUSR | S_IWUSR) == -1) { | |||
| std::string err_str = "Change file mode failed," + file_path_; | |||
| return Status(StatusCode::kMDUnexpectedError, err_str); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -1,210 +0,0 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CPU_SAMPLING_H | |||
| #define MINDSPORE_CCSRC_MINDDATA_DATASET_CPU_SAMPLING_H | |||
| #include <memory> | |||
| #include <string> | |||
| #include <unordered_map> | |||
| #include <vector> | |||
| #include <nlohmann/json.hpp> | |||
| #include "minddata/dataset/engine/perf/profiling.h" | |||
| #include "minddata/dataset/engine/datasetops/dataset_op.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| class ExecutionTree; | |||
| // CPU information from /proc/stat or /proc/pid/stat file | |||
| typedef struct CpuStat_s { | |||
| uint64_t user_stat_; | |||
| uint64_t sys_stat_; | |||
| uint64_t io_stat_; | |||
| uint64_t idle_stat_; | |||
| uint64_t total_stat_; | |||
| } CpuStat; | |||
| // Cpu utilization | |||
| typedef struct CpuInfo_s { | |||
| uint8_t user_utilization_; | |||
| uint8_t sys_utilization_; | |||
| uint8_t io_utilization_; | |||
| uint8_t idle_utilization_; | |||
| } CpuUtil; | |||
| // CPU utilization of operator | |||
| typedef struct CpuOpInfo_s { | |||
| float user_utilization_; | |||
| float sys_utilization_; | |||
| int32_t op_id_; | |||
| } CpuOpUtil; | |||
| // CPU utilization of process | |||
| typedef struct CpuProcessInfo_s { | |||
| float user_utilization_; | |||
| float sys_utilization_; | |||
| } CpuProcessUtil; | |||
| // CPU stat of operator | |||
| typedef struct CpuOpStat_s { | |||
| uint64_t user_stat_; | |||
| uint64_t sys_stat_; | |||
| } CpuOpStat; | |||
| class BaseCpu { | |||
| public: | |||
| BaseCpu(); | |||
| ~BaseCpu() = default; | |||
| // Collect CPU information | |||
| virtual Status Collect(const ExecutionTree *tree) = 0; | |||
| virtual Status SaveToFile(const std::string &file_path) = 0; | |||
| virtual Status Analyze(std::string *name, double *utilization, std::string *extra_message) = 0; | |||
| // Get the total CPU time of device | |||
| Status GetTotalCpuTime(uint64_t *total_stat); | |||
| protected: | |||
| std::vector<CpuUtil> cpu_util_; | |||
| CpuStat pre_cpu_stat_; | |||
| static bool fetched_all_process_shared_; | |||
| static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared_; | |||
| bool fetched_all_process_; | |||
| bool pre_fetched_state_; | |||
| std::unordered_map<int32_t, std::vector<pid_t>> op_process_; | |||
| int32_t cpu_processor_num_; | |||
| }; | |||
| // Collect device CPU information | |||
| class DeviceCpu : public BaseCpu { | |||
| public: | |||
| DeviceCpu() : pre_running_process_(0), pre_context_switch_count_(0), first_collect_(true) {} | |||
| ~DeviceCpu() = default; | |||
| Status Collect(const ExecutionTree *tree) override; | |||
| Status SaveToFile(const std::string &file_path) override; | |||
| Status Analyze(std::string *name, double *utilization, std::string *extra_message) override; | |||
| private: | |||
| // Get CPU information, include use/sys/idle/io utilization | |||
| Status ParseCpuInfo(const std::string &str); | |||
| // Get context switch count | |||
| Status ParseCtxt(const std::string &str); | |||
| // Get running process count | |||
| Status ParseRunningProcess(const std::string &str); | |||
| std::vector<uint32_t> running_process_; | |||
| std::vector<uint64_t> context_switch_count_; | |||
| uint32_t pre_running_process_; | |||
| uint64_t pre_context_switch_count_; | |||
| bool first_collect_; | |||
| }; | |||
| // Collect operator CPU information | |||
| class OperatorCpu : public BaseCpu { | |||
| public: | |||
| OperatorCpu() : first_collect_(true), pre_total_stat_(0), id_count_(0) {} | |||
| ~OperatorCpu() = default; | |||
| Status Collect(const ExecutionTree *tree) override; | |||
| Status SaveToFile(const std::string &file_path) override; | |||
| // Analyze will output the name of the metric, the avg utiliization of highest | |||
| // object within the class and any extra message that would be useful for the user. | |||
| // The Higher level CPUSampling class will combine information from different classes | |||
| // to decide if warning should be output. | |||
| Status Analyze(std::string *name, double *utilization, std::string *extra_message) override; | |||
| private: | |||
| // Get cpu information, include use/sys/idle/io utilization | |||
| Status ParseCpuInfo(int32_t op_id, int64_t thread_id, | |||
| std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> *op_stat); | |||
| // Store the CPU utilization of each operator | |||
| std::vector<std::vector<CpuOpUtil>> cpu_op_util_; | |||
| bool first_collect_; | |||
| // Store the id and its corresponding threads. | |||
| std::unordered_map<int32_t, std::vector<pid_t>> op_thread_; | |||
| std::unordered_map<int32_t, std::string> op_name_; | |||
| std::unordered_map<int32_t, int32_t> op_parallel_workers_; | |||
| std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> pre_op_stat_; | |||
| uint64_t pre_total_stat_; | |||
| int32_t id_count_; | |||
| }; | |||
| // Collect operator CPU information | |||
| class ProcessCpu : public BaseCpu { | |||
| public: | |||
| ProcessCpu() : first_collect_(true), pre_total_stat_(0) {} | |||
| ~ProcessCpu() = default; | |||
| Status Collect(const ExecutionTree *tree) override; | |||
| Status SaveToFile(const std::string &file_path) override; | |||
| Status Analyze(std::string *name, double *utilization, std::string *extra_message) override; | |||
| private: | |||
| // Get CPU information, include use/sys/idle/io utilization | |||
| Status ParseCpuInfo(); | |||
| bool first_collect_; | |||
| std::vector<CpuProcessUtil> process_util_; | |||
| uint64_t pre_total_stat_; | |||
| std::unordered_map<int64_t, CpuOpStat> pre_process_stat_; | |||
| std::vector<pid_t> process_id_; | |||
| }; | |||
| // Sampling CPU information | |||
| // It support JSON serialization for external usage. | |||
| class CpuSampling : public Sampling { | |||
| using TimeStamp = std::vector<uint32_t>; | |||
| public: | |||
| explicit CpuSampling(ExecutionTree *tree) : tree_(tree) {} | |||
| ~CpuSampling() = default; | |||
| // Driver function for CPU sampling. | |||
| // This function samples the CPU information of device/process/op | |||
| Status Sample() override; | |||
| std::string Name() const override { return kCpuSamplingName; } | |||
| // Save sampling data to file | |||
| // @return Status - The error code return | |||
| Status SaveToFile() override; | |||
| Status Init(const std::string &dir_path, const std::string &device_id) override; | |||
| // Change file mode after save CPU data | |||
| Status ChangeFileMode() override; | |||
| // Analyze sampling data and print message to log | |||
| Status Analyze() override; | |||
| private: | |||
| Status CollectTimeStamp(); | |||
| Status SaveTimeStampToFile(); | |||
| Status SaveSamplingItervalToFile(); | |||
| ExecutionTree *tree_ = nullptr; // ExecutionTree pointer | |||
| std::vector<std::shared_ptr<BaseCpu>> cpu_; // CPU information of device/process/op | |||
| TimeStamp time_stamp_; // Time stamp | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CPU_SAMPLING_H | |||
| @@ -13,49 +13,66 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <sys/stat.h> | |||
| #include "minddata/dataset/engine/perf/dataset_iterator_tracing.h" | |||
| #include <fstream> | |||
| #include <string> | |||
| #include "minddata/dataset/engine/perf/dataset_iterator_tracing.h" | |||
| #ifndef ENABLE_ANDROID | |||
| #include "utils/log_adapter.h" | |||
| #else | |||
| #include "mindspore/lite/src/common/log_adapter.h" | |||
| #endif | |||
| #include "minddata/dataset/util/path.h" | |||
| #include "mindspore/core/utils/ms_utils.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| Status DatasetIteratorTracing::Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, | |||
| const int32_t value, const uint64_t time_stamp) { | |||
| // Format: "type extra-info batch-num value" | |||
| // type: 0: time, 1: connector size | |||
| // extra-info: if type is 0 - 0: pipeline time, 1: push tdt time, 2: batch time | |||
| // if type is 1 - connector capacity | |||
| // batch-num: batch number | |||
| // value: if type is 0 - value is time(ms) | |||
| // if type is 1 - value is connector size | |||
| // Examples: | |||
| // 0 0 20 10 - The 20th batch took 10ms to get data from pipeline. | |||
| // 1 64 20 5 - Connector size is 5 when get the 20th batch.Connector capacity is 64. | |||
| std::string data = std::to_string(type) + " " + std::to_string(extra_info) + " " + std::to_string(batch_num) + " " + | |||
| std::to_string(value) + " " + std::to_string(time_stamp); | |||
| value_.emplace_back(data); | |||
| return Status::OK(); | |||
| } | |||
| constexpr int32_t CONNECTOR_CAPACITY_OFFSET = 0; | |||
| Status DatasetIteratorTracing::Init(const std::string &dir_path, const std::string &device_id) { | |||
| file_path_ = (Path(dir_path) / Path("dataset_iterator_profiling_" + device_id + ".txt")).ToString(); | |||
| return Status::OK(); | |||
| } | |||
| Status DatasetIteratorTracing::ChangeFileMode() { | |||
| if (value_.empty()) { | |||
| return Status::OK(); | |||
| } | |||
| Status DatasetIteratorTracing::GetPipelineTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) { | |||
| return {StatusCode::kMDUnexpectedError, "Dataset Iterator Tracing does not record pipeline time."}; | |||
| } | |||
| Status DatasetIteratorTracing::GetPushTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) { | |||
| return {StatusCode::kMDUnexpectedError, "Dataset Iterator Tracing does not record push time."}; | |||
| } | |||
| Status DatasetIteratorTracing::GetBatchTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) { | |||
| return {StatusCode::kMDUnexpectedError, "Dataset Iterator Tracing does not record batch time."}; | |||
| } | |||
| if (chmod(common::SafeCStr(file_path_), S_IRUSR | S_IWUSR) == -1) { | |||
| std::string err_str = "Change file mode failed," + file_path_; | |||
| return Status(StatusCode::kMDUnexpectedError, err_str); | |||
| Status DatasetIteratorTracing::GetConnectorSize(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) { | |||
| return GetRecordEntry(start_step, end_step, CONNECTOR_CAPACITY_OFFSET, result); | |||
| } | |||
| Status DatasetIteratorTracing::GetEmptyQueueFrequency(int32_t start_step, int32_t end_step, float_t *empty_queue_freq) { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| auto total_steps = records_.size() / records_per_step_; | |||
| MS_LOG(DEBUG) << "start_step: " << start_step << " end_step: " << end_step; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_step <= total_steps, | |||
| "Expected start_step <= total_steps. Got start_step: " + std::to_string(start_step) + | |||
| " total_steps: " + std::to_string(total_steps)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(end_step <= total_steps, | |||
| "Expected end_step <= total_steps. Got end_step: " + std::to_string(end_step) + | |||
| " total_steps: " + std::to_string(total_steps)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_step <= end_step, | |||
| "Expected start_step <= end_step. Got start_step: " + std::to_string(start_step) + | |||
| " end_step: " + std::to_string(end_step)); | |||
| uint32_t total = end_step - start_step + 1; | |||
| uint32_t count = 0U; | |||
| for (auto step_num = start_step; step_num <= end_step; step_num++) { | |||
| auto idx = (step_num - 1) * records_per_step_ + CONNECTOR_CAPACITY_OFFSET; | |||
| count += static_cast<uint32_t>(records_[idx].value == 0); | |||
| } | |||
| *empty_queue_freq = static_cast<float_t>(count) / static_cast<float_t>(total); | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -23,24 +23,24 @@ | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| constexpr int32_t RECORDS_PER_STEP_DATASET_ITERATOR = 1; | |||
| class DatasetIteratorTracing : public Tracing { | |||
| public: | |||
| // Constructor | |||
| DatasetIteratorTracing() = default; | |||
| DatasetIteratorTracing() : Tracing(RECORDS_PER_STEP_DATASET_ITERATOR) {} | |||
| // Destructor | |||
| ~DatasetIteratorTracing() override = default; | |||
| // Record tracing data | |||
| // @return Status The status code returned | |||
| Status Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, const int32_t value, | |||
| const uint64_t time_stamp); | |||
| std::string Name() const override { return kDatasetIteratorTracingName; }; | |||
| Status Init(const std::string &dir_path, const std::string &device_id) override; | |||
| Status ChangeFileMode() override; | |||
| Status GetPipelineTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) override; | |||
| Status GetPushTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) override; | |||
| Status GetBatchTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) override; | |||
| Status GetConnectorSize(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) override; | |||
| Status GetEmptyQueueFrequency(int32_t start_step, int32_t end_step, float_t *empty_queue_freq) override; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -14,47 +14,67 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include <sys/stat.h> | |||
| #include "minddata/dataset/engine/perf/device_queue_tracing.h" | |||
| #include <fstream> | |||
| #include <string> | |||
| #include "minddata/dataset/engine/perf/device_queue_tracing.h" | |||
| #ifndef ENABLE_ANDROID | |||
| #include "utils/log_adapter.h" | |||
| #else | |||
| #include "mindspore/lite/src/common/log_adapter.h" | |||
| #endif | |||
| #include "minddata/dataset/util/path.h" | |||
| #include "mindspore/core/utils/ms_utils.h" | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| void DeviceQueueTracing::Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, | |||
| const int32_t value, const uint64_t time_stamp) { | |||
| // Format: "type extra-info batch-num value" | |||
| // type: 0: time, 1: connector size | |||
| // extra-info: if type is 0 - 0: pipeline time, 1: push tdt time, 2: batch time | |||
| // if type is 1 - connector capacity | |||
| // batch-num: batch number | |||
| // value: if type is 0 - value is time(ms) | |||
| // if type is 1 - value is connector size | |||
| // time-stamp: time stamp | |||
| // Examples: | |||
| // 0 0 20 10 xxx- The 20th batch took 10ms to get data from pipeline. | |||
| // 1 64 20 5 xxx- Connector size is 5 when get the 20th batch.Connector capacity is 64. | |||
| std::string data = std::to_string(type) + " " + std::to_string(extra_info) + " " + std::to_string(batch_num) + " " + | |||
| std::to_string(value) + " " + std::to_string(time_stamp); | |||
| value_.emplace_back(data); | |||
| } | |||
| constexpr int32_t PUSH_TIME_OFFSET = 0; | |||
| constexpr int32_t BATCH_TIME_OFFSET = 1; | |||
| constexpr int32_t PIPELINE_TIME_OFFSET = 2; | |||
| constexpr int32_t CONNECTOR_CAPACITY_OFFSET = 3; | |||
| Status DeviceQueueTracing::Init(const std::string &dir_path, const std::string &device_id) { | |||
| file_path_ = (Path(dir_path) / Path("device_queue_profiling_" + device_id + ".txt")).ToString(); | |||
| return Status::OK(); | |||
| } | |||
| Status DeviceQueueTracing::ChangeFileMode() { | |||
| if (value_.empty()) { | |||
| return Status::OK(); | |||
| } | |||
| Status DeviceQueueTracing::GetPipelineTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) { | |||
| return GetRecordEntry(start_step, end_step, PIPELINE_TIME_OFFSET, result); | |||
| } | |||
| Status DeviceQueueTracing::GetPushTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) { | |||
| return GetRecordEntry(start_step, end_step, PUSH_TIME_OFFSET, result); | |||
| } | |||
| Status DeviceQueueTracing::GetBatchTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) { | |||
| return GetRecordEntry(start_step, end_step, BATCH_TIME_OFFSET, result); | |||
| } | |||
| Status DeviceQueueTracing::GetConnectorSize(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) { | |||
| return GetRecordEntry(start_step, end_step, CONNECTOR_CAPACITY_OFFSET, result); | |||
| } | |||
| Status DeviceQueueTracing::GetEmptyQueueFrequency(int32_t start_step, int32_t end_step, float_t *empty_queue_freq) { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| auto total_steps = records_.size() / records_per_step_; | |||
| MS_LOG(DEBUG) << "start_step: " << start_step << " end_step: " << end_step; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_step <= total_steps, | |||
| "Expected start_step <= total_steps. Got start_step: " + std::to_string(start_step) + | |||
| " total_steps: " + std::to_string(total_steps)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(end_step <= total_steps, | |||
| "Expected end_step <= total_steps. Got end_step: " + std::to_string(end_step) + | |||
| " total_steps: " + std::to_string(total_steps)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_step <= end_step, | |||
| "Expected start_step <= end_step. Got start_step: " + std::to_string(start_step) + | |||
| " end_step: " + std::to_string(end_step)); | |||
| if (chmod(common::SafeCStr(file_path_), S_IRUSR | S_IWUSR) == -1) { | |||
| std::string err_str = "Change file mode failed," + file_path_; | |||
| return Status(StatusCode::kMDUnexpectedError, err_str); | |||
| uint32_t total = end_step - start_step + 1; | |||
| uint32_t count = 0U; | |||
| for (auto step_num = start_step; step_num <= end_step; step_num++) { | |||
| auto idx = (step_num - 1) * records_per_step_ + CONNECTOR_CAPACITY_OFFSET; | |||
| count += static_cast<uint32_t>(records_[idx].value == 0); | |||
| } | |||
| *empty_queue_freq = static_cast<float_t>(count) / static_cast<float_t>(total); | |||
| return Status::OK(); | |||
| } | |||
| } // namespace dataset | |||
| @@ -23,24 +23,24 @@ | |||
| namespace mindspore { | |||
| namespace dataset { | |||
| constexpr int32_t RECORDS_PER_STEP_DEVICE_QUEUE = 4; | |||
| class DeviceQueueTracing : public Tracing { | |||
| public: | |||
| // Constructor | |||
| DeviceQueueTracing() = default; | |||
| DeviceQueueTracing() : Tracing(RECORDS_PER_STEP_DEVICE_QUEUE) {} | |||
| // Destructor | |||
| ~DeviceQueueTracing() override = default; | |||
| // Record tracing data | |||
| // @return Status The status code returned | |||
| void Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, const int32_t value, | |||
| const uint64_t time_stamp); | |||
| std::string Name() const override { return kDeviceQueueTracingName; }; | |||
| Status Init(const std::string &dir_path, const std::string &device_id) override; | |||
| Status ChangeFileMode() override; | |||
| Status GetPipelineTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) override; | |||
| Status GetPushTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) override; | |||
| Status GetBatchTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) override; | |||
| Status GetConnectorSize(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) override; | |||
| Status GetEmptyQueueFrequency(int32_t start_step, int32_t end_step, float_t *empty_queue_freq) override; | |||
| }; | |||
| } // namespace dataset | |||
| } // namespace mindspore | |||
| @@ -60,7 +60,6 @@ Status Monitor::operator()() { | |||
| RETURN_IF_NOT_OK(tree_consumer_->GetProfilingManager()->Analyze()); | |||
| RETURN_IF_NOT_OK(tree_consumer_->GetProfilingManager()->SaveProfilingData()); | |||
| RETURN_IF_NOT_OK(tree_consumer_->GetProfilingManager()->ChangeFileMode()); | |||
| cfg->set_profiler_file_status(true); | |||
| return Status::OK(); | |||
| } | |||
| @@ -14,6 +14,7 @@ | |||
| * limitations under the License. | |||
| */ | |||
| #include "minddata/dataset/engine/perf/profiling.h" | |||
| #include <sys/stat.h> | |||
| #include <cstdlib> | |||
| #include <fstream> | |||
| #include "utils/ms_utils.h" | |||
| @@ -25,9 +26,9 @@ | |||
| #include "minddata/dataset/engine/perf/monitor.h" | |||
| #include "minddata/dataset/engine/perf/device_queue_tracing.h" | |||
| #include "minddata/dataset/engine/perf/connector_size.h" | |||
| #include "minddata/dataset/engine/perf/connector_throughput.h" | |||
| #include "minddata/dataset/engine/perf/cpu_sampling.h" | |||
| #include "minddata/dataset/engine/perf/cpu_sampler.h" | |||
| #include "minddata/dataset/engine/perf/dataset_iterator_tracing.h" | |||
| #include "minddata/dataset/engine/execution_tree.h" | |||
| #include "minddata/dataset/util/log_adapter.h" | |||
| namespace mindspore { | |||
| @@ -50,6 +51,63 @@ Status Tracing::SaveToFile() { | |||
| return Status::OK(); | |||
| } | |||
| Status Tracing::ChangeFileMode() { | |||
| if (value_.empty()) { | |||
| return Status::OK(); | |||
| } | |||
| if (chmod(common::SafeCStr(file_path_), S_IRUSR | S_IWUSR) == -1) { | |||
| std::string err_str = "Change file mode failed," + file_path_; | |||
| return Status(StatusCode::kMDUnexpectedError, err_str); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| void Tracing::Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, const int32_t value, | |||
| const uint64_t time_stamp) { | |||
| // Format: "type extra-info batch-num value" | |||
| // type: 0: time, 1: connector size | |||
| // extra-info: if type is 0 - 0: pipeline time, 1: push tdt time, 2: batch time | |||
| // if type is 1 - connector capacity | |||
| // batch-num: batch number | |||
| // value: if type is 0 - value is time(ms) | |||
| // if type is 1 - value is connector size | |||
| // time-stamp: time stamp | |||
| // Examples: | |||
| // 0 0 20 10 xxx- The 20th batch took 10ms to get data from pipeline. | |||
| // 1 64 20 5 xxx- Connector size is 5 when get the 20th batch.Connector capacity is 64. | |||
| TracingRecord record = {type, extra_info, batch_num, value, time_stamp}; | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| (void)records_.emplace_back(record); | |||
| (void)value_.emplace_back(record.ToString()); | |||
| } | |||
| Status Tracing::GetRecordEntry(int32_t start_step, int32_t end_step, int32_t record_offset, | |||
| std::vector<int32_t> *result) { | |||
| std::lock_guard<std::mutex> guard(lock_); | |||
| auto total_steps = records_.size() / records_per_step_; | |||
| MS_LOG(DEBUG) << "start_step: " << start_step << " end_step: " << end_step; | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_step <= total_steps, | |||
| "Expected start_step <= total_steps. Got start_step: " + std::to_string(start_step) + | |||
| " total_steps: " + std::to_string(total_steps)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(end_step <= total_steps, | |||
| "Expected end_step <= total_steps. Got end_step: " + std::to_string(end_step) + | |||
| " total_steps: " + std::to_string(total_steps)); | |||
| CHECK_FAIL_RETURN_UNEXPECTED(start_step <= end_step, | |||
| "Expected start_step <= end_step. Got start_step: " + std::to_string(start_step) + | |||
| " end_step: " + std::to_string(end_step)); | |||
| for (auto step_num = start_step; step_num <= end_step; step_num++) { | |||
| // each step has 4 entries in device queue tracing | |||
| auto idx = (step_num - 1) * records_per_step_ + record_offset; | |||
| assert(idx < records_.size()); | |||
| (void)result->emplace_back(records_[idx].value); | |||
| } | |||
| return Status::OK(); | |||
| } | |||
| Tracing::Tracing(int32_t records_per_step) : records_per_step_(records_per_step) {} | |||
| Status Sampling::ReadJson(nlohmann::json *output) { | |||
| RETURN_UNEXPECTED_IF_NULL(output); | |||
| Path path = Path(file_path_); | |||
| @@ -134,13 +192,14 @@ Status ProfilingManager::Initialize(ExecutionTree *tree) { | |||
| std::shared_ptr<Sampling> connector_size_sampling = std::make_shared<ConnectorSize>(tree_); | |||
| RETURN_IF_NOT_OK(RegisterSamplingNode(connector_size_sampling)); | |||
| std::shared_ptr<Sampling> connector_thr_sampling = std::make_shared<ConnectorThroughput>(tree_); | |||
| RETURN_IF_NOT_OK(RegisterSamplingNode(connector_thr_sampling)); | |||
| #ifndef ENABLE_ANDROID | |||
| std::shared_ptr<Sampling> cpu_sampling = std::make_shared<CpuSampling>(tree_); | |||
| RETURN_IF_NOT_OK(RegisterSamplingNode(cpu_sampling)); | |||
| std::shared_ptr<Sampling> cpu_sampler = std::make_shared<CpuSampler>(tree_); | |||
| RETURN_IF_NOT_OK(RegisterSamplingNode(cpu_sampler)); | |||
| #endif | |||
| // can insert a correct timestamp so that we can ignore the samples that were taken | |||
| // during start up of the pipeline. | |||
| (void)epoch_end_ts_.emplace_back(0); | |||
| (void)epoch_end_step_.emplace_back(0); | |||
| return Status::OK(); | |||
| } | |||
| @@ -214,6 +273,7 @@ Status ProfilingManager::SaveProfilingData() { | |||
| MS_LOG(INFO) << "Save profiling data end."; | |||
| return Status::OK(); | |||
| } | |||
| Status ProfilingManager::Analyze() { | |||
| if (!IsProfilingEnable()) { | |||
| return Status::OK(); | |||
| @@ -240,8 +300,138 @@ Status ProfilingManager::ChangeFileMode() { | |||
| return Status::OK(); | |||
| } | |||
| #ifndef ENABLE_ANDROID | |||
| Status ProfilingManager::GetUserCpuUtil(int32_t epoch_num, std::vector<uint8_t> *result) { | |||
| std::shared_ptr<CpuSampler> cpu_node; | |||
| uint64_t start_ts, end_ts; | |||
| RETURN_IF_NOT_OK(PopulateCpuSamplerAPIInputs(epoch_num, &start_ts, &end_ts, &cpu_node)); | |||
| return cpu_node->GetSystemUserCpuUtil(start_ts, end_ts, result); | |||
| } | |||
| Status ProfilingManager::GetSysCpuUtil(int32_t epoch_num, std::vector<uint8_t> *result) { | |||
| std::shared_ptr<CpuSampler> cpu_node; | |||
| uint64_t start_ts, end_ts; | |||
| RETURN_IF_NOT_OK(PopulateCpuSamplerAPIInputs(epoch_num, &start_ts, &end_ts, &cpu_node)); | |||
| return cpu_node->GetSystemSysCpuUtil(start_ts, end_ts, result); | |||
| } | |||
| Status ProfilingManager::GetUserCpuUtil(int32_t op_id, int32_t epoch_num, std::vector<uint16_t> *result) { | |||
| std::shared_ptr<CpuSampler> cpu_node; | |||
| uint64_t start_ts, end_ts; | |||
| RETURN_IF_NOT_OK(PopulateCpuSamplerAPIInputs(epoch_num, &start_ts, &end_ts, &cpu_node)); | |||
| return cpu_node->GetOpUserCpuUtil(op_id, start_ts, end_ts, result); | |||
| } | |||
| Status ProfilingManager::GetSysCpuUtil(int32_t op_id, int32_t epoch_num, std::vector<uint16_t> *result) { | |||
| std::shared_ptr<CpuSampler> cpu_node; | |||
| uint64_t start_ts, end_ts; | |||
| RETURN_IF_NOT_OK(PopulateCpuSamplerAPIInputs(epoch_num, &start_ts, &end_ts, &cpu_node)); | |||
| return cpu_node->GetOpSysCpuUtil(op_id, start_ts, end_ts, result); | |||
| } | |||
| Status ProfilingManager::PopulateCpuSamplerAPIInputs(int32_t epoch_num, uint64_t *start_ts, uint64_t *end_ts, | |||
| std::shared_ptr<CpuSampler> *node) { | |||
| RETURN_IF_NOT_OK(EpochToTimeInterval(epoch_num, start_ts, end_ts)); | |||
| std::shared_ptr<Sampling> sampling_node; | |||
| RETURN_IF_NOT_OK(GetSamplingNode(kCpuSamplerName, &sampling_node)); | |||
| *node = std::dynamic_pointer_cast<CpuSampler>(sampling_node); | |||
| return Status::OK(); | |||
| } | |||
| #endif | |||
| Status ProfilingManager::EpochToTimeInterval(int32_t epoch_num, uint64_t *start_ts, uint64_t *end_ts) { | |||
| if (epoch_num <= 0 || epoch_num >= epoch_end_ts_.size()) { | |||
| std::string err = "Epoch: " + std::to_string(epoch_num) + " is invalid."; | |||
| MS_LOG(INFO) << err; | |||
| return {StatusCode::kMDUnexpectedError, err}; | |||
| } | |||
| *start_ts = epoch_end_ts_[epoch_num - 1]; | |||
| *end_ts = epoch_end_ts_[epoch_num]; | |||
| return Status::OK(); | |||
| } | |||
| Status ProfilingManager::EpochToStepInterval(int32_t epoch_num, uint32_t *start_step, uint32_t *end_step) { | |||
| if (epoch_num <= 0 || epoch_num >= epoch_end_step_.size()) { | |||
| std::string err = "Epoch: " + std::to_string(epoch_num) + " is invalid."; | |||
| MS_LOG(INFO) << err; | |||
| return {StatusCode::kMDUnexpectedError, err}; | |||
| } | |||
| *start_step = epoch_end_step_[epoch_num - 1] + 1; | |||
| *end_step = epoch_end_step_[epoch_num]; | |||
| return Status::OK(); | |||
| } | |||
| Status ProfilingManager::GetConnectorSize(int32_t op_id, int32_t epoch_num, std::vector<int32_t> *result) { | |||
| uint64_t start_ts, end_ts; | |||
| RETURN_IF_NOT_OK(EpochToTimeInterval(epoch_num, &start_ts, &end_ts)); | |||
| std::shared_ptr<Sampling> node; | |||
| RETURN_IF_NOT_OK(GetSamplingNode(kConnectorSizeSamplingName, &node)); | |||
| auto connector_node = std::dynamic_pointer_cast<ConnectorSize>(node); | |||
| return connector_node->GetOpConnectorSize(op_id, start_ts, end_ts, result); | |||
| } | |||
| Status ProfilingManager::GetPipelineTime(int32_t epoch_num, std::vector<int32_t> *result) { | |||
| uint32_t start_step, end_step; | |||
| RETURN_IF_NOT_OK(EpochToStepInterval(epoch_num, &start_step, &end_step)); | |||
| std::shared_ptr<Tracing> node; | |||
| if (GetTracingNode(kDeviceQueueTracingName, &node).IsOk() || | |||
| GetTracingNode(kDatasetIteratorTracingName, &node).IsOk()) { | |||
| return node->GetPipelineTime(start_step, end_step, result); | |||
| } else { | |||
| return {StatusCode::kMDUnexpectedError, "Cannot find appropriate tracing node"}; | |||
| } | |||
| } | |||
| Status ProfilingManager::GetPushTime(int32_t epoch_num, std::vector<int32_t> *result) { | |||
| uint32_t start_step, end_step; | |||
| RETURN_IF_NOT_OK(EpochToStepInterval(epoch_num, &start_step, &end_step)); | |||
| std::shared_ptr<Tracing> node; | |||
| if (GetTracingNode(kDeviceQueueTracingName, &node).IsOk() || | |||
| GetTracingNode(kDatasetIteratorTracingName, &node).IsOk()) { | |||
| return node->GetPushTime(start_step, end_step, result); | |||
| } else { | |||
| return {StatusCode::kMDUnexpectedError, "Cannot find appropriate tracing node"}; | |||
| } | |||
| } | |||
| Status ProfilingManager::GetBatchTime(int32_t epoch_num, std::vector<int32_t> *result) { | |||
| uint32_t start_step, end_step; | |||
| RETURN_IF_NOT_OK(EpochToStepInterval(epoch_num, &start_step, &end_step)); | |||
| std::shared_ptr<Tracing> node; | |||
| if (GetTracingNode(kDeviceQueueTracingName, &node).IsOk() || | |||
| GetTracingNode(kDatasetIteratorTracingName, &node).IsOk()) { | |||
| return node->GetBatchTime(start_step, end_step, result); | |||
| } else { | |||
| return {StatusCode::kMDUnexpectedError, "Cannot find appropriate tracing node"}; | |||
| } | |||
| } | |||
| Status ProfilingManager::GetConnectorSize(int32_t epoch_num, std::vector<int32_t> *result) { | |||
| uint32_t start_step, end_step; | |||
| RETURN_IF_NOT_OK(EpochToStepInterval(epoch_num, &start_step, &end_step)); | |||
| std::shared_ptr<Tracing> node; | |||
| if (GetTracingNode(kDeviceQueueTracingName, &node).IsOk() || | |||
| GetTracingNode(kDatasetIteratorTracingName, &node).IsOk()) { | |||
| return node->GetConnectorSize(start_step, end_step, result); | |||
| } else { | |||
| return {StatusCode::kMDUnexpectedError, "Cannot find appropriate tracing node"}; | |||
| } | |||
| } | |||
| Status ProfilingManager::GetEmptyQueueFrequency(int32_t epoch_num, float_t *result) { | |||
| uint32_t start_step, end_step; | |||
| RETURN_IF_NOT_OK(EpochToStepInterval(epoch_num, &start_step, &end_step)); | |||
| std::shared_ptr<Tracing> node; | |||
| if (GetTracingNode(kDeviceQueueTracingName, &node).IsOk() || | |||
| GetTracingNode(kDatasetIteratorTracingName, &node).IsOk()) { | |||
| return node->GetEmptyQueueFrequency(start_step, end_step, result); | |||
| } else { | |||
| return {StatusCode::kMDUnexpectedError, "Cannot find appropriate tracing node"}; | |||
| } | |||
| } | |||
| void ProfilingManager::RecordEndOfEpoch(uint32_t step_num) { | |||
| MS_LOG(INFO) << "Record end of epoch. step_num: " << step_num; | |||
| MS_LOG(INFO) << "Recording end of epoch. step_num: " << step_num; | |||
| (void)epoch_end_ts_.emplace_back(ProfilingTime::GetCurMilliSecond()); | |||
| (void)epoch_end_step_.emplace_back(step_num); | |||
| } | |||
| @@ -21,6 +21,7 @@ | |||
| #include <unordered_map> | |||
| #include <memory> | |||
| #include <chrono> | |||
| #include <mutex> | |||
| #include <nlohmann/json.hpp> | |||
| #include "minddata/dataset/util/status.h" | |||
| @@ -30,12 +31,12 @@ namespace dataset { | |||
| class Monitor; | |||
| class ExecutionTree; | |||
| class TreeConsumer; | |||
| class CpuSampler; | |||
| const char kDeviceQueueTracingName[] = "Device_Queue_Tracing"; | |||
| const char kDatasetIteratorTracingName[] = "Dataset_Iterator_Tracing"; | |||
| const char kConnectorSizeSamplingName[] = "Connector_Size_Sampling"; | |||
| const char kConnectorThroughputSamplingName[] = "Connector_Throughput_Sampling"; | |||
| const char kCpuSamplingName[] = "Cpu_Sampling"; | |||
| const char kCpuSamplerName[] = "Cpu_Sampler"; | |||
| // Profiling is a class of basic unit of profiling action | |||
| // This base class encapsulate the serialization output logic | |||
| @@ -59,6 +60,7 @@ class Profiling : std::enable_shared_from_this<Profiling> { | |||
| protected: | |||
| std::string file_path_; | |||
| std::mutex lock_; | |||
| }; | |||
| // Sampling is a class of profiling which generate samples periodically. | |||
| @@ -72,15 +74,40 @@ class Sampling : public Profiling { | |||
| Status ReadJson(nlohmann::json *output); | |||
| }; | |||
| typedef struct TracingRecord_s { | |||
| int32_t type; | |||
| int32_t extra_info; | |||
| int32_t batch_num; | |||
| int32_t value; | |||
| uint64_t ts; | |||
| std::string ToString() { | |||
| return std::to_string(type) + " " + std::to_string(extra_info) + " " + std::to_string(batch_num) + " " + | |||
| std::to_string(value) + " " + std::to_string(ts); | |||
| } | |||
| } TracingRecord; | |||
| // Tracing is class of profiling which record samples upon request. | |||
| class Tracing : public Profiling { | |||
| public: | |||
| // Tracing has minimal interface to provide flexible on data recording. | |||
| // It only includes some common routines. | |||
| Status SaveToFile(); | |||
| Status SaveToFile() override; | |||
| Status ChangeFileMode() override; | |||
| virtual Status GetPipelineTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) = 0; | |||
| virtual Status GetPushTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) = 0; | |||
| virtual Status GetBatchTime(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) = 0; | |||
| virtual Status GetConnectorSize(int32_t start_step, int32_t end_step, std::vector<int32_t> *result) = 0; | |||
| virtual Status GetEmptyQueueFrequency(int32_t start_step, int32_t end_step, float_t *empty_queue_freq) = 0; | |||
| void Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, const int32_t value, | |||
| const uint64_t time_stamp); | |||
| protected: | |||
| explicit Tracing(int32_t records_per_step); | |||
| const int32_t records_per_step_; | |||
| std::vector<std::string> value_; | |||
| std::vector<TracingRecord> records_; | |||
| Status GetRecordEntry(int32_t start_step, int32_t end_step, int32_t record_offset, std::vector<int32_t> *result); | |||
| }; | |||
| // ProfilingManager is a class manages all profiling infrastructure | |||
| @@ -135,13 +162,80 @@ class ProfilingManager { | |||
| // Analyze profile data and print warning messages | |||
| Status Analyze(); | |||
| #ifndef ENABLE_ANDROID | |||
| /// \brief API to get User CPU utilization for the system | |||
| /// \param [in] epoch_num The epoch number for which results are requested | |||
| /// \param [out] result A vector with the sampled User CPU Utilization for the entire system | |||
| /// \return Status object with the error code | |||
| Status GetUserCpuUtil(int32_t epoch_num, std::vector<uint8_t> *result); | |||
| /// \brief API to get System CPU utilization for the system | |||
| /// \param [in] epoch_num The epoch number for which results are requested | |||
| /// \param [out] result A vector with the sampled System CPU Utilization for the entire system | |||
| /// \return Status object with the error code | |||
| Status GetSysCpuUtil(int32_t epoch_num, std::vector<uint8_t> *result); | |||
| /// \brief API to get User CPU Utilization of an MD operator | |||
| /// \param [in] op_id The id of the operator | |||
| /// \param [in] epoch_num The epoch number for which results are requested | |||
| /// \param [out] result A vector with the sampled User CPU Utilization of the operator. | |||
| /// \return Status object with the error code | |||
| Status GetUserCpuUtil(int32_t op_id, int32_t epoch_num, std::vector<uint16_t> *result); | |||
| /// \brief API to get System CPU Utilization of an MD operator | |||
| /// \param [in] op_id The id of the operator | |||
| /// \param [in] epoch_num The epoch number for which results are requested | |||
| /// \param [out] result A vector with the sampled System CPU Utilization of the operator. | |||
| /// \return Status object with the error code | |||
| Status GetSysCpuUtil(int32_t op_id, int32_t epoch_num, std::vector<uint16_t> *result); | |||
| #endif | |||
| /// \brief API to get the connector size of an MD operator | |||
| /// \param [in] op_id The id of the operator | |||
| /// \param [in] epoch_num The epoch number for which results are requested | |||
| /// \param [out] result A vector with the sampled connector sizes of the operator | |||
| /// \return Status object with the error code | |||
| Status GetConnectorSize(int32_t op_id, int32_t epoch_num, std::vector<int32_t> *result); | |||
| /// \brief API to get the connector size of DatasetIterator or DeviceQueueOp | |||
| /// \param [in] epoch_num The epoch number for which results are requested | |||
| /// \param [out] result A vector with connector size at each step | |||
| /// \return Status object with the error code | |||
| Status GetConnectorSize(int32_t epoch_num, std::vector<int32_t> *result); | |||
| /// \brief API to get the pipeline time of batches | |||
| /// \param [in] epoch_num The epoch number for which results are requested | |||
| /// \param [out] result A vector with the pipeline time for each step | |||
| /// \return Status object with the error code | |||
| Status GetPipelineTime(int32_t epoch_num, std::vector<int32_t> *result); | |||
| /// \brief API to get the push time of batches | |||
| /// \param [in] epoch_num The epoch number for which results are requested | |||
| /// \param [out] result A vector with the push time for each each step | |||
| /// \return Status object with the error code | |||
| Status GetPushTime(int32_t epoch_num, std::vector<int32_t> *result); | |||
| /// \brief API to get the batch time of batches | |||
| /// \param [in] epoch_num The epoch number for which results are requested | |||
| /// \param [out] result A vector with the batch time for each step | |||
| /// \return Status object with the error code | |||
| Status GetBatchTime(int32_t epoch_num, std::vector<int32_t> *result); | |||
| /// \brief API to get fraction of steps that DatasetIterator or DeviceQueueOp connector was empty | |||
| /// \param [in] epoch_num The epoch number for which results are requested | |||
| /// \param [out] result The empty queue frequency | |||
| /// \return Status object with the error code | |||
| Status GetEmptyQueueFrequency(int32_t epoch_num, float_t *result); | |||
| private: | |||
| std::unique_ptr<Monitor> perf_monitor_; | |||
| bool enabled_; | |||
| std::unordered_map<std::string, std::shared_ptr<Tracing>> tracing_nodes_; | |||
| std::unordered_map<std::string, std::shared_ptr<Sampling>> sampling_nodes_; | |||
| ExecutionTree *tree_; // ExecutionTree pointer | |||
| TreeConsumer *tree_consumer_; // TreeConsumer pointer | |||
| std::string dir_path_; // where to create profiling file | |||
| std::string device_id_; // used when create profiling file,filename_device_id.suffix | |||
| std::vector<uint64_t> epoch_end_ts_; // End of epoch timestamp | |||
| std::vector<uint32_t> epoch_end_step_; // End of epoch step number | |||
| @@ -155,10 +249,13 @@ class ProfilingManager { | |||
| // @return Status The status code returned | |||
| Status RegisterSamplingNode(std::shared_ptr<Sampling> node); | |||
| ExecutionTree *tree_; // ExecutionTree pointer | |||
| TreeConsumer *tree_consumer_; // TreeConsumer pointer | |||
| std::string dir_path_; // where to create profiling file | |||
| std::string device_id_; // used when create profiling file,filename_device_id.suffix | |||
| Status EpochToStepInterval(int32_t epoch_num, uint32_t *start_step, uint32_t *end_step); | |||
| // get start and ending timestamp of an epoch | |||
| Status EpochToTimeInterval(int32_t epoch_num, uint64_t *start_ts, uint64_t *end_ts); | |||
| #ifndef ENABLE_ANDROID | |||
| Status PopulateCpuSamplerAPIInputs(int32_t epoch_num, uint64_t *start_ts, uint64_t *end_ts, | |||
| std::shared_ptr<CpuSampler> *node); | |||
| #endif | |||
| }; | |||
| enum ProfilingType { TIME, CONNECTOR_DEPTH }; | |||
| @@ -257,8 +257,7 @@ Status TreeAdapter::GetNext(TensorRow *row) { | |||
| cur_batch_num_++; | |||
| cur_connector_size_ = tree_->root()->ConnectorSize(); | |||
| cur_connector_capacity_ = tree_->root()->ConnectorCapacity(); | |||
| RETURN_IF_NOT_OK( | |||
| tracing_->Record(CONNECTOR_DEPTH, cur_connector_capacity_, cur_batch_num_, cur_connector_size_, end_time)); | |||
| tracing_->Record(CONNECTOR_DEPTH, cur_connector_capacity_, cur_batch_num_, cur_connector_size_, end_time); | |||
| } | |||
| #endif | |||
| return Status::OK(); | |||
| @@ -180,7 +180,6 @@ if(BUILD_MINDDATA STREQUAL "full") | |||
| ${MINDDATA_DIR}/engine/perf/monitor.cc | |||
| ${MINDDATA_DIR}/engine/perf/device_queue_tracing.cc | |||
| ${MINDDATA_DIR}/engine/perf/connector_size.cc | |||
| ${MINDDATA_DIR}/engine/perf/connector_throughput.cc | |||
| ${MINDDATA_DIR}/engine/perf/dataset_iterator_tracing.cc | |||
| ${MINDDATA_DIR}/engine/datasetops/source/sampler/sampler.cc | |||
| ${MINDDATA_DIR}/engine/datasetops/source/sampler/subset_sampler.cc | |||
| @@ -82,8 +82,7 @@ def confirm_cpuutil(num_pipeline_ops, cpu_uti_file): | |||
| with open(cpu_uti_file) as file1: | |||
| data = json.load(file1) | |||
| op_info = data["op_info"] | |||
| # Confirm <num_pipeline_ops>+1 ops in CPU util file (including op_id=-1 for monitor thread) | |||
| assert len(op_info) == num_pipeline_ops + 1 | |||
| assert len(op_info) == num_pipeline_ops | |||
| def confirm_ops_in_pipeline(num_ops, op_list, pipeline_file): | |||
| @@ -176,7 +175,6 @@ def test_profiling_complex_pipeline(): | |||
| if op_info[i]["op_type"] != "ZipOp": | |||
| assert "size" in op_info[i]["metrics"]["output_queue"] | |||
| assert "length" in op_info[i]["metrics"]["output_queue"] | |||
| assert "throughput" in op_info[i]["metrics"]["output_queue"] | |||
| else: | |||
| # Note: Zip is an inline op and hence does not have metrics information | |||
| assert op_info[i]["metrics"] is None | |||
| @@ -243,7 +241,6 @@ def test_profiling_inline_ops_pipeline1(): | |||
| else: | |||
| assert "size" in op_info[i]["metrics"]["output_queue"] | |||
| assert "length" in op_info[i]["metrics"]["output_queue"] | |||
| assert "throughput" in op_info[i]["metrics"]["output_queue"] | |||
| # Confirm CPU util JSON file content, when 4 ops are in the pipeline JSON file | |||
| confirm_cpuutil(4, cpu_util_file) | |||
| @@ -294,7 +291,6 @@ def test_profiling_inline_ops_pipeline2(): | |||
| else: | |||
| assert "size" in op_info[i]["metrics"]["output_queue"] | |||
| assert "length" in op_info[i]["metrics"]["output_queue"] | |||
| assert "throughput" in op_info[i]["metrics"]["output_queue"] | |||
| # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file | |||
| confirm_cpuutil(5, cpu_util_file) | |||
| @@ -384,7 +380,6 @@ def test_profiling_basic_pipeline(): | |||
| else: | |||
| assert "size" in op_info[i]["metrics"]["output_queue"] | |||
| assert "length" in op_info[i]["metrics"]["output_queue"] | |||
| assert "throughput" in op_info[i]["metrics"]["output_queue"] | |||
| # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file | |||
| confirm_cpuutil(5, cpu_util_file) | |||
| @@ -441,7 +436,6 @@ def test_profiling_cifar10_pipeline(): | |||
| else: | |||
| assert "size" in op_info[i]["metrics"]["output_queue"] | |||
| assert "length" in op_info[i]["metrics"]["output_queue"] | |||
| assert "throughput" in op_info[i]["metrics"]["output_queue"] | |||
| # Confirm CPU util JSON file content, when 5 ops are in the pipeline JSON file | |||
| confirm_cpuutil(5, cpu_util_file) | |||