You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

common.rs 7.6 kB

7 months ago
7 months ago
7 months ago
7 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. use core::fmt;
  2. use std::borrow::Cow;
  3. use aligned_vec::{AVec, ConstAlign};
  4. use eyre::Context as _;
  5. use uuid::Uuid;
  6. use crate::{daemon_to_daemon::InterDaemonEvent, id::NodeId, BuildId, DataflowId};
  7. pub use log::Level as LogLevel;
  8. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
  9. #[must_use]
  10. pub struct LogMessage {
  11. pub session_id: Option<BuildId>,
  12. pub dataflow_id: Option<DataflowId>,
  13. pub node_id: Option<NodeId>,
  14. pub daemon_id: Option<DaemonId>,
  15. pub level: LogLevel,
  16. pub target: Option<String>,
  17. pub module_path: Option<String>,
  18. pub file: Option<String>,
  19. pub line: Option<u32>,
  20. pub message: String,
  21. }
  22. #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
  23. pub struct NodeError {
  24. pub timestamp: uhlc::Timestamp,
  25. pub cause: NodeErrorCause,
  26. pub exit_status: NodeExitStatus,
  27. }
  28. impl std::fmt::Display for NodeError {
  29. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  30. if let NodeErrorCause::FailedToSpawn(err) = &self.cause {
  31. return write!(f, "failed to spawn node: {err}");
  32. }
  33. match &self.exit_status {
  34. NodeExitStatus::Success => write!(f, "<success>"),
  35. NodeExitStatus::IoError(err) => write!(f, "I/O error while reading exit status: {err}"),
  36. NodeExitStatus::ExitCode(code) => write!(f, "exited with code {code}"),
  37. NodeExitStatus::Signal(signal) => {
  38. let signal_str: Cow<_> = match signal {
  39. 1 => "SIGHUP".into(),
  40. 2 => "SIGINT".into(),
  41. 3 => "SIGQUIT".into(),
  42. 4 => "SIGILL".into(),
  43. 6 => "SIGABRT".into(),
  44. 8 => "SIGFPE".into(),
  45. 9 => "SIGKILL".into(),
  46. 11 => "SIGSEGV".into(),
  47. 13 => "SIGPIPE".into(),
  48. 14 => "SIGALRM".into(),
  49. 15 => "SIGTERM".into(),
  50. 22 => "SIGABRT".into(),
  51. 23 => "NSIG".into(),
  52. other => other.to_string().into(),
  53. };
  54. if matches!(self.cause, NodeErrorCause::GraceDuration) {
  55. write!(f, "node was killed by dora because it didn't react to a stop message in time ({signal_str})")
  56. } else {
  57. write!(f, "exited because of signal {signal_str}")
  58. }
  59. }
  60. NodeExitStatus::Unknown => write!(f, "unknown exit status"),
  61. }?;
  62. match &self.cause {
  63. NodeErrorCause::GraceDuration => {}, // handled above
  64. NodeErrorCause::Cascading { caused_by_node } => write!(
  65. f,
  66. ". This error occurred because node `{caused_by_node}` exited before connecting to dora."
  67. )?,
  68. NodeErrorCause::FailedToSpawn(_) => unreachable!(), // handled above
  69. NodeErrorCause::Other { stderr } if stderr.is_empty() => {}
  70. NodeErrorCause::Other { stderr } => {
  71. let line: &str = "---------------------------------------------------------------------------------\n";
  72. let stderr = stderr.trim_end();
  73. write!(f, " with stderr output:\n{line}{stderr}\n{line}")?
  74. },
  75. }
  76. Ok(())
  77. }
  78. }
  79. #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
  80. pub enum NodeErrorCause {
  81. /// Node was killed because it didn't react to a stop message in time.
  82. GraceDuration,
  83. /// Node failed because another node failed before,
  84. Cascading {
  85. caused_by_node: NodeId,
  86. },
  87. FailedToSpawn(String),
  88. Other {
  89. stderr: String,
  90. },
  91. }
  92. #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
  93. pub enum NodeExitStatus {
  94. Success,
  95. IoError(String),
  96. ExitCode(i32),
  97. Signal(i32),
  98. Unknown,
  99. }
  100. impl From<Result<std::process::ExitStatus, std::io::Error>> for NodeExitStatus {
  101. fn from(result: Result<std::process::ExitStatus, std::io::Error>) -> Self {
  102. match result {
  103. Ok(status) => {
  104. if status.success() {
  105. NodeExitStatus::Success
  106. } else if let Some(code) = status.code() {
  107. Self::ExitCode(code)
  108. } else {
  109. #[cfg(unix)]
  110. {
  111. use std::os::unix::process::ExitStatusExt;
  112. if let Some(signal) = status.signal() {
  113. return Self::Signal(signal);
  114. }
  115. }
  116. Self::Unknown
  117. }
  118. }
  119. Err(err) => Self::IoError(err.to_string()),
  120. }
  121. }
  122. }
  123. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
  124. pub struct Timestamped<T> {
  125. pub inner: T,
  126. pub timestamp: uhlc::Timestamp,
  127. }
  128. impl<T> Timestamped<T>
  129. where
  130. T: serde::Serialize,
  131. {
  132. pub fn serialize(&self) -> Vec<u8> {
  133. bincode::serialize(self).unwrap()
  134. }
  135. }
  136. impl Timestamped<InterDaemonEvent> {
  137. pub fn deserialize_inter_daemon_event(bytes: &[u8]) -> eyre::Result<Self> {
  138. bincode::deserialize(bytes).wrap_err("failed to deserialize InterDaemonEvent")
  139. }
  140. }
  141. pub type SharedMemoryId = String;
  142. #[derive(serde::Serialize, serde::Deserialize, Clone)]
  143. pub enum DataMessage {
  144. Vec(AVec<u8, ConstAlign<128>>),
  145. SharedMemory {
  146. shared_memory_id: String,
  147. len: usize,
  148. drop_token: DropToken,
  149. },
  150. }
  151. impl DataMessage {
  152. pub fn drop_token(&self) -> Option<DropToken> {
  153. match self {
  154. DataMessage::Vec(_) => None,
  155. DataMessage::SharedMemory { drop_token, .. } => Some(*drop_token),
  156. }
  157. }
  158. }
  159. impl fmt::Debug for DataMessage {
  160. fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
  161. match self {
  162. Self::Vec(v) => f
  163. .debug_struct("Vec")
  164. .field("len", &v.len())
  165. .finish_non_exhaustive(),
  166. Self::SharedMemory {
  167. shared_memory_id,
  168. len,
  169. drop_token,
  170. } => f
  171. .debug_struct("SharedMemory")
  172. .field("shared_memory_id", shared_memory_id)
  173. .field("len", len)
  174. .field("drop_token", drop_token)
  175. .finish(),
  176. }
  177. }
  178. }
  179. #[derive(
  180. Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, serde::Serialize, serde::Deserialize,
  181. )]
  182. pub struct DropToken(Uuid);
  183. impl DropToken {
  184. pub fn generate() -> Self {
  185. Self(Uuid::new_v7(uuid::Timestamp::now(uuid::NoContext)))
  186. }
  187. }
  188. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize)]
  189. pub struct DaemonId {
  190. machine_id: Option<String>,
  191. uuid: Uuid,
  192. }
  193. impl DaemonId {
  194. pub fn new(machine_id: Option<String>) -> Self {
  195. DaemonId {
  196. machine_id,
  197. uuid: Uuid::new_v4(),
  198. }
  199. }
  200. pub fn matches_machine_id(&self, machine_id: &str) -> bool {
  201. self.machine_id
  202. .as_ref()
  203. .map(|id| id == machine_id)
  204. .unwrap_or_default()
  205. }
  206. pub fn machine_id(&self) -> Option<&str> {
  207. self.machine_id.as_deref()
  208. }
  209. }
  210. impl std::fmt::Display for DaemonId {
  211. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  212. if let Some(id) = &self.machine_id {
  213. write!(f, "{id}-")?;
  214. }
  215. write!(f, "{}", self.uuid)
  216. }
  217. }
  218. #[derive(Debug, serde::Deserialize, serde::Serialize, Clone, PartialEq, Eq)]
  219. pub struct GitSource {
  220. pub repo: String,
  221. pub commit_hash: String,
  222. }