| @@ -68,8 +68,13 @@ Status ConnectorSize::SaveToFile() { | |||||
| json output; | json output; | ||||
| if (path.Exists()) { | if (path.Exists()) { | ||||
| MS_LOG(DEBUG) << file_path_ << " exists"; | MS_LOG(DEBUG) << file_path_ << " exists"; | ||||
| std::ifstream file(file_path_); | |||||
| file >> output; | |||||
| try { | |||||
| std::ifstream file(file_path_); | |||||
| file >> output; | |||||
| } catch (const std::exception &err) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + file_path_ + | |||||
| ", please delete it and try again!"); | |||||
| } | |||||
| } else { | } else { | ||||
| output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval(); | output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval(); | ||||
| } | } | ||||
| @@ -97,8 +97,13 @@ Status ConnectorThroughput::SaveToFile() { | |||||
| json output; | json output; | ||||
| if (path.Exists()) { | if (path.Exists()) { | ||||
| MS_LOG(DEBUG) << file_path_ << " exists"; | MS_LOG(DEBUG) << file_path_ << " exists"; | ||||
| std::ifstream file(file_path_); | |||||
| file >> output; | |||||
| try { | |||||
| std::ifstream file(file_path_); | |||||
| file >> output; | |||||
| } catch (const std::exception &err) { | |||||
| RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + file_path_ + | |||||
| ", please delete it and try again!"); | |||||
| } | |||||
| } else { | } else { | ||||
| output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval(); | output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval(); | ||||
| } | } | ||||
| @@ -37,7 +37,7 @@ Status Monitor::operator()() { | |||||
| // 2) Iterator has not received EOF | // 2) Iterator has not received EOF | ||||
| while (!this_thread::is_interrupted() && !(tree_->isFinished())) { | while (!this_thread::is_interrupted() && !(tree_->isFinished())) { | ||||
| if (tree_->IsEpochEnd()) { | if (tree_->IsEpochEnd()) { | ||||
| tree_->GetProfilingManager()->SaveProfilingData(); | |||||
| RETURN_IF_NOT_OK(tree_->GetProfilingManager()->SaveProfilingData()); | |||||
| tree_->SetExecuting(); | tree_->SetExecuting(); | ||||
| } | } | ||||
| for (auto &node : tree_->GetProfilingManager()->GetSamplingNodes()) { | for (auto &node : tree_->GetProfilingManager()->GetSamplingNodes()) { | ||||
| @@ -47,8 +47,8 @@ Status Monitor::operator()() { | |||||
| } | } | ||||
| // Output all profiling data upon request. | // Output all profiling data upon request. | ||||
| tree_->GetProfilingManager()->SaveProfilingData(); | |||||
| tree_->GetProfilingManager()->ChangeFileMode(); | |||||
| RETURN_IF_NOT_OK(tree_->GetProfilingManager()->SaveProfilingData()); | |||||
| RETURN_IF_NOT_OK(tree_->GetProfilingManager()->ChangeFileMode()); | |||||
| return Status::OK(); | return Status::OK(); | ||||
| } | } | ||||
| @@ -81,6 +81,8 @@ Status PyFuncOp::Compute(const TensorRow &input, TensorRow *output) { | |||||
| } | } | ||||
| } | } | ||||
| } catch (const py::error_already_set &e) { | } catch (const py::error_already_set &e) { | ||||
| MS_LOG(ERROR) << "Pyfunc error, " << e.what() << ". Under sink mode, progress will late exit after 30s " | |||||
| << "for resource release and thread safe"; | |||||
| ret = Status(StatusCode::kPyFuncException, e.what()); | ret = Status(StatusCode::kPyFuncException, e.what()); | ||||
| } | } | ||||
| } | } | ||||