| @@ -185,6 +185,7 @@ pub fn attach_dataflow( | |||||
| pub fn print_log_message(log_message: LogMessage) { | pub fn print_log_message(log_message: LogMessage) { | ||||
| let LogMessage { | let LogMessage { | ||||
| build_id, | |||||
| dataflow_id, | dataflow_id, | ||||
| node_id, | node_id, | ||||
| daemon_id, | daemon_id, | ||||
| @@ -201,7 +202,16 @@ pub fn print_log_message(log_message: LogMessage) { | |||||
| log::Level::Info => "INFO ".green(), | log::Level::Info => "INFO ".green(), | ||||
| other => format!("{other:5}").normal(), | other => format!("{other:5}").normal(), | ||||
| }; | }; | ||||
| let dataflow = format!(" dataflow `{dataflow_id}`").cyan(); | |||||
| let dataflow = if let Some(dataflow_id) = dataflow_id { | |||||
| format!(" dataflow `{dataflow_id}`").cyan() | |||||
| } else { | |||||
| String::new().cyan() | |||||
| }; | |||||
| let build = if let Some(build_id) = build_id { | |||||
| format!(" build `{build_id}`").cyan() | |||||
| } else { | |||||
| String::new().cyan() | |||||
| }; | |||||
| let daemon = match daemon_id { | let daemon = match daemon_id { | ||||
| Some(id) => format!(" on daemon `{id}`"), | Some(id) => format!(" on daemon `{id}`"), | ||||
| None => " on default daemon".to_string(), | None => " on default daemon".to_string(), | ||||
| @@ -216,7 +226,7 @@ pub fn print_log_message(log_message: LogMessage) { | |||||
| None => "".normal(), | None => "".normal(), | ||||
| }; | }; | ||||
| println!("{level}{dataflow}{daemon}{node}{target}: {message}"); | |||||
| println!("{level}{build}{dataflow}{daemon}{node}{target}: {message}"); | |||||
| } | } | ||||
| enum AttachEvent { | enum AttachEvent { | ||||
| @@ -17,19 +17,18 @@ use dora_message::{ | |||||
| cli_to_coordinator::ControlRequest, | cli_to_coordinator::ControlRequest, | ||||
| common::LogMessage, | common::LogMessage, | ||||
| coordinator_to_cli::{ControlRequestReply, DataflowList, DataflowResult, DataflowStatus}, | coordinator_to_cli::{ControlRequestReply, DataflowList, DataflowResult, DataflowStatus}, | ||||
| BuildId, SessionId, | |||||
| BuildId, | |||||
| }; | }; | ||||
| #[cfg(feature = "tracing")] | #[cfg(feature = "tracing")] | ||||
| use dora_tracing::set_up_tracing; | use dora_tracing::set_up_tracing; | ||||
| use dora_tracing::{set_up_tracing_opts, FileLogging}; | use dora_tracing::{set_up_tracing_opts, FileLogging}; | ||||
| use duration_str::parse; | use duration_str::parse; | ||||
| use eyre::{bail, Context, ContextCompat}; | |||||
| use eyre::{bail, Context}; | |||||
| use formatting::FormatDataflowError; | use formatting::FormatDataflowError; | ||||
| use std::{ | use std::{ | ||||
| env::current_dir, | env::current_dir, | ||||
| io::Write, | io::Write, | ||||
| net::{SocketAddr, TcpStream}, | net::{SocketAddr, TcpStream}, | ||||
| path::Path, | |||||
| }; | }; | ||||
| use std::{ | use std::{ | ||||
| net::{IpAddr, Ipv4Addr}, | net::{IpAddr, Ipv4Addr}, | ||||
| @@ -403,7 +402,7 @@ fn run(args: Args) -> eyre::Result<()> { | |||||
| } => template::create(args, internal_create_with_path_dependencies)?, | } => template::create(args, internal_create_with_path_dependencies)?, | ||||
| Command::Run { dataflow, uv } => { | Command::Run { dataflow, uv } => { | ||||
| let dataflow_path = resolve_dataflow(dataflow).context("could not resolve dataflow")?; | let dataflow_path = resolve_dataflow(dataflow).context("could not resolve dataflow")?; | ||||
| let dataflow_session = DataflowSession::read_session(&dataflow) | |||||
| let dataflow_session = DataflowSession::read_session(&dataflow_path) | |||||
| .context("failed to read DataflowSession")?; | .context("failed to read DataflowSession")?; | ||||
| let rt = Builder::new_multi_thread() | let rt = Builder::new_multi_thread() | ||||
| @@ -647,7 +646,7 @@ fn build_dataflow( | |||||
| dataflow: String, | dataflow: String, | ||||
| coordinator_socket: SocketAddr, | coordinator_socket: SocketAddr, | ||||
| uv: bool, | uv: bool, | ||||
| ) -> eyre::Result<(Box<TcpRequestReplyConnection>, Uuid)> { | |||||
| ) -> eyre::Result<(Box<TcpRequestReplyConnection>, BuildId)> { | |||||
| let dataflow = resolve_dataflow(dataflow).context("could not resolve dataflow")?; | let dataflow = resolve_dataflow(dataflow).context("could not resolve dataflow")?; | ||||
| let dataflow_descriptor = | let dataflow_descriptor = | ||||
| Descriptor::blocking_read(&dataflow).wrap_err("Failed to read yaml dataflow")?; | Descriptor::blocking_read(&dataflow).wrap_err("Failed to read yaml dataflow")?; | ||||
| @@ -669,13 +668,13 @@ fn build_dataflow( | |||||
| }; | }; | ||||
| let mut session = connect_to_coordinator(coordinator_socket) | let mut session = connect_to_coordinator(coordinator_socket) | ||||
| .wrap_err("failed to connect to dora coordinator")?; | .wrap_err("failed to connect to dora coordinator")?; | ||||
| let dataflow_id = { | |||||
| let build_id = { | |||||
| let dataflow = dataflow_descriptor.clone(); | let dataflow = dataflow_descriptor.clone(); | ||||
| let session: &mut TcpRequestReplyConnection = &mut *session; | let session: &mut TcpRequestReplyConnection = &mut *session; | ||||
| let reply_raw = session | let reply_raw = session | ||||
| .request( | .request( | ||||
| &serde_json::to_vec(&ControlRequest::Build { | &serde_json::to_vec(&ControlRequest::Build { | ||||
| session_id: dataflow_session.build_id, | |||||
| session_id: dataflow_session.session_id, | |||||
| dataflow, | dataflow, | ||||
| git_sources, | git_sources, | ||||
| prev_git_sources: dataflow_session.git_sources.clone(), | prev_git_sources: dataflow_session.git_sources.clone(), | ||||
| @@ -697,15 +696,15 @@ fn build_dataflow( | |||||
| other => bail!("unexpected start dataflow reply: {other:?}"), | other => bail!("unexpected start dataflow reply: {other:?}"), | ||||
| } | } | ||||
| }; | }; | ||||
| Ok((session, dataflow_id)) | |||||
| Ok((session, build_id)) | |||||
| } | } | ||||
| fn wait_until_dataflow_built( | fn wait_until_dataflow_built( | ||||
| build_id: Uuid, | |||||
| build_id: BuildId, | |||||
| session: &mut Box<TcpRequestReplyConnection>, | session: &mut Box<TcpRequestReplyConnection>, | ||||
| coordinator_addr: SocketAddr, | coordinator_addr: SocketAddr, | ||||
| log_level: log::LevelFilter, | log_level: log::LevelFilter, | ||||
| ) -> eyre::Result<()> { | |||||
| ) -> eyre::Result<BuildId> { | |||||
| // subscribe to log messages | // subscribe to log messages | ||||
| let mut log_session = TcpConnection { | let mut log_session = TcpConnection { | ||||
| stream: TcpStream::connect(coordinator_addr) | stream: TcpStream::connect(coordinator_addr) | ||||
| @@ -742,18 +741,16 @@ fn wait_until_dataflow_built( | |||||
| let result: ControlRequestReply = | let result: ControlRequestReply = | ||||
| serde_json::from_slice(&reply_raw).wrap_err("failed to parse reply")?; | serde_json::from_slice(&reply_raw).wrap_err("failed to parse reply")?; | ||||
| match result { | match result { | ||||
| ControlRequestReply::DataflowBuildFinished { | |||||
| build_id, | |||||
| session_id, | |||||
| result, | |||||
| } => match result { | |||||
| Ok(()) => eprintln!("dataflow build finished successfully"), | |||||
| ControlRequestReply::DataflowBuildFinished { build_id, result } => match result { | |||||
| Ok(()) => { | |||||
| eprintln!("dataflow build finished successfully"); | |||||
| Ok(build_id) | |||||
| } | |||||
| Err(err) => bail!("{err}"), | Err(err) => bail!("{err}"), | ||||
| }, | }, | ||||
| ControlRequestReply::Error(err) => bail!("{err}"), | ControlRequestReply::Error(err) => bail!("{err}"), | ||||
| other => bail!("unexpected start dataflow reply: {other:?}"), | other => bail!("unexpected start dataflow reply: {other:?}"), | ||||
| } | } | ||||
| Ok(()) | |||||
| } | } | ||||
| fn start_dataflow( | fn start_dataflow( | ||||
| @@ -789,8 +786,8 @@ fn start_dataflow( | |||||
| let reply_raw = session | let reply_raw = session | ||||
| .request( | .request( | ||||
| &serde_json::to_vec(&ControlRequest::Start { | &serde_json::to_vec(&ControlRequest::Start { | ||||
| build_id, | |||||
| session_id, | |||||
| build_id: dataflow_session.build_id, | |||||
| session_id: dataflow_session.session_id, | |||||
| dataflow, | dataflow, | ||||
| name, | name, | ||||
| local_working_dir, | local_working_dir, | ||||
| @@ -3,14 +3,13 @@ use std::{ | |||||
| path::{Path, PathBuf}, | path::{Path, PathBuf}, | ||||
| }; | }; | ||||
| use dora_message::{common::GitSource, id::NodeId, BuildId}; | |||||
| use dora_message::{common::GitSource, id::NodeId, BuildId, SessionId}; | |||||
| use eyre::{Context, ContextCompat}; | use eyre::{Context, ContextCompat}; | ||||
| use uuid::Uuid; | |||||
| #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] | #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] | ||||
| pub struct DataflowSession { | pub struct DataflowSession { | ||||
| pub build_id: Option<BuildId>, | pub build_id: Option<BuildId>, | ||||
| pub session_id: Uuid, | |||||
| pub session_id: SessionId, | |||||
| pub git_sources: BTreeMap<NodeId, GitSource>, | pub git_sources: BTreeMap<NodeId, GitSource>, | ||||
| } | } | ||||
| @@ -18,7 +17,7 @@ impl Default for DataflowSession { | |||||
| fn default() -> Self { | fn default() -> Self { | ||||
| Self { | Self { | ||||
| build_id: None, | build_id: None, | ||||
| session_id: Uuid::new_v4(), | |||||
| session_id: SessionId::generate(), | |||||
| git_sources: Default::default(), | git_sources: Default::default(), | ||||
| } | } | ||||
| } | } | ||||
| @@ -29,7 +28,7 @@ impl DataflowSession { | |||||
| let session_file = session_file_path(dataflow_path)?; | let session_file = session_file_path(dataflow_path)?; | ||||
| if session_file.exists() { | if session_file.exists() { | ||||
| if let Ok(parsed) = deserialize(&session_file) { | if let Ok(parsed) = deserialize(&session_file) { | ||||
| return Ok((parsed)); | |||||
| return Ok(parsed); | |||||
| } else { | } else { | ||||
| tracing::warn!("failed to read dataflow session file, regenerating (you might need to run `dora build` again)"); | tracing::warn!("failed to read dataflow session file, regenerating (you might need to run `dora build` again)"); | ||||
| } | } | ||||
| @@ -200,7 +200,6 @@ async fn start_inner( | |||||
| let mut events = (abortable_events, daemon_events).merge(); | let mut events = (abortable_events, daemon_events).merge(); | ||||
| let mut running_builds: HashMap<BuildId, RunningBuild> = HashMap::new(); | let mut running_builds: HashMap<BuildId, RunningBuild> = HashMap::new(); | ||||
| let mut build_log_subscribers: BTreeMap<BuildId, Vec<LogSubscriber>> = Default::default(); | |||||
| let mut running_dataflows: HashMap<DataflowId, RunningDataflow> = HashMap::new(); | let mut running_dataflows: HashMap<DataflowId, RunningDataflow> = HashMap::new(); | ||||
| let mut dataflow_results: HashMap<DataflowId, BTreeMap<DaemonId, DataflowDaemonResult>> = | let mut dataflow_results: HashMap<DataflowId, BTreeMap<DaemonId, DataflowDaemonResult>> = | ||||
| @@ -389,7 +388,7 @@ async fn start_inner( | |||||
| } | } | ||||
| if !matches!( | if !matches!( | ||||
| finished_dataflow.spawn_result, | finished_dataflow.spawn_result, | ||||
| SpawnResult::Spawned { .. } | |||||
| CachedResult::Cached { .. } | |||||
| ) { | ) { | ||||
| log::error!("pending spawn result on dataflow finish"); | log::error!("pending spawn result on dataflow finish"); | ||||
| } | } | ||||
| @@ -432,7 +431,8 @@ async fn start_inner( | |||||
| ) | ) | ||||
| .await; | .await; | ||||
| match result { | match result { | ||||
| Ok(()) => { | |||||
| Ok(build) => { | |||||
| running_builds.insert(build_id, build); | |||||
| let _ = reply_sender.send(Ok( | let _ = reply_sender.send(Ok( | ||||
| ControlRequestReply::DataflowBuildTriggered { build_id }, | ControlRequestReply::DataflowBuildTriggered { build_id }, | ||||
| )); | )); | ||||
| @@ -444,7 +444,7 @@ async fn start_inner( | |||||
| } | } | ||||
| ControlRequest::WaitForBuild { build_id } => { | ControlRequest::WaitForBuild { build_id } => { | ||||
| if let Some(build) = running_builds.get_mut(&build_id) { | if let Some(build) = running_builds.get_mut(&build_id) { | ||||
| build.spawn_result.register(reply_sender); | |||||
| build.build_result.register(reply_sender); | |||||
| } else { | } else { | ||||
| let _ = | let _ = | ||||
| reply_sender.send(Err(eyre!("unknown build id {build_id}"))); | reply_sender.send(Err(eyre!("unknown build id {build_id}"))); | ||||
| @@ -729,10 +729,11 @@ async fn start_inner( | |||||
| level, | level, | ||||
| connection, | connection, | ||||
| } => { | } => { | ||||
| build_log_subscribers | |||||
| .entry(build_id) | |||||
| .or_default() | |||||
| .push(LogSubscriber::new(level, connection)); | |||||
| if let Some(build) = running_builds.get_mut(&build_id) { | |||||
| build | |||||
| .log_subscribers | |||||
| .push(LogSubscriber::new(level, connection)); | |||||
| } | |||||
| } | } | ||||
| }, | }, | ||||
| Event::DaemonHeartbeatInterval => { | Event::DaemonHeartbeatInterval => { | ||||
| @@ -795,8 +796,8 @@ async fn start_inner( | |||||
| } | } | ||||
| } | } | ||||
| if let Some(build_id) = message.build_id { | if let Some(build_id) = message.build_id { | ||||
| if let Some(subscribers) = build_log_subscribers.get_mut(&build_id) { | |||||
| send_log_message(subscribers, &message).await; | |||||
| if let Some(build) = running_builds.get_mut(&build_id) { | |||||
| send_log_message(&mut build.log_subscribers, &message).await; | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -820,19 +821,15 @@ async fn start_inner( | |||||
| }; | }; | ||||
| if build.pending_build_results.is_empty() { | if build.pending_build_results.is_empty() { | ||||
| tracing::info!("dataflow build finished: `{build_id}`"); | tracing::info!("dataflow build finished: `{build_id}`"); | ||||
| let build = running_builds.remove(&build_id).unwrap(); | |||||
| let mut build = running_builds.remove(&build_id).unwrap(); | |||||
| let result = if build.errors.is_empty() { | let result = if build.errors.is_empty() { | ||||
| Ok(()) | Ok(()) | ||||
| } else { | } else { | ||||
| Err(format!("build failed: {}", build.errors.join("\n\n"))) | Err(format!("build failed: {}", build.errors.join("\n\n"))) | ||||
| }; | }; | ||||
| build.build_result_sender.send(Ok( | |||||
| ControlRequestReply::DataflowBuildFinished { | |||||
| build_id, | |||||
| session_id, | |||||
| result, | |||||
| }, | |||||
| build.build_result.set_result(Ok( | |||||
| ControlRequestReply::DataflowBuildFinished { build_id, result }, | |||||
| )); | )); | ||||
| } | } | ||||
| } | } | ||||
| @@ -960,12 +957,8 @@ async fn send_heartbeat_message( | |||||
| } | } | ||||
| struct RunningBuild { | struct RunningBuild { | ||||
| build_id: BuildId, | |||||
| /// The IDs of the daemons that the build is running on. | |||||
| daemons: BTreeSet<DaemonId>, | |||||
| errors: Vec<String>, | errors: Vec<String>, | ||||
| build_result_sender: tokio::sync::oneshot::Sender<eyre::Result<ControlRequestReply>>, | |||||
| build_result: CachedResult, | |||||
| log_subscribers: Vec<LogSubscriber>, | log_subscribers: Vec<LogSubscriber>, | ||||
| @@ -982,7 +975,7 @@ struct RunningDataflow { | |||||
| exited_before_subscribe: Vec<NodeId>, | exited_before_subscribe: Vec<NodeId>, | ||||
| nodes: BTreeMap<NodeId, ResolvedNode>, | nodes: BTreeMap<NodeId, ResolvedNode>, | ||||
| spawn_result: SpawnResult, | |||||
| spawn_result: CachedResult, | |||||
| stop_reply_senders: Vec<tokio::sync::oneshot::Sender<eyre::Result<ControlRequestReply>>>, | stop_reply_senders: Vec<tokio::sync::oneshot::Sender<eyre::Result<ControlRequestReply>>>, | ||||
| log_subscribers: Vec<LogSubscriber>, | log_subscribers: Vec<LogSubscriber>, | ||||
| @@ -990,16 +983,16 @@ struct RunningDataflow { | |||||
| pending_spawn_results: BTreeSet<DaemonId>, | pending_spawn_results: BTreeSet<DaemonId>, | ||||
| } | } | ||||
| pub enum SpawnResult { | |||||
| pub enum CachedResult { | |||||
| Pending { | Pending { | ||||
| result_senders: Vec<tokio::sync::oneshot::Sender<eyre::Result<ControlRequestReply>>>, | result_senders: Vec<tokio::sync::oneshot::Sender<eyre::Result<ControlRequestReply>>>, | ||||
| }, | }, | ||||
| Spawned { | |||||
| Cached { | |||||
| result: eyre::Result<ControlRequestReply>, | result: eyre::Result<ControlRequestReply>, | ||||
| }, | }, | ||||
| } | } | ||||
| impl Default for SpawnResult { | |||||
| impl Default for CachedResult { | |||||
| fn default() -> Self { | fn default() -> Self { | ||||
| Self::Pending { | Self::Pending { | ||||
| result_senders: Vec::new(), | result_senders: Vec::new(), | ||||
| @@ -1007,14 +1000,14 @@ impl Default for SpawnResult { | |||||
| } | } | ||||
| } | } | ||||
| impl SpawnResult { | |||||
| impl CachedResult { | |||||
| fn register( | fn register( | ||||
| &mut self, | &mut self, | ||||
| reply_sender: tokio::sync::oneshot::Sender<eyre::Result<ControlRequestReply>>, | reply_sender: tokio::sync::oneshot::Sender<eyre::Result<ControlRequestReply>>, | ||||
| ) { | ) { | ||||
| match self { | match self { | ||||
| SpawnResult::Pending { result_senders } => result_senders.push(reply_sender), | |||||
| SpawnResult::Spawned { result } => { | |||||
| CachedResult::Pending { result_senders } => result_senders.push(reply_sender), | |||||
| CachedResult::Cached { result } => { | |||||
| Self::send_result_to(result, reply_sender); | Self::send_result_to(result, reply_sender); | ||||
| } | } | ||||
| } | } | ||||
| @@ -1022,13 +1015,13 @@ impl SpawnResult { | |||||
| fn set_result(&mut self, result: eyre::Result<ControlRequestReply>) { | fn set_result(&mut self, result: eyre::Result<ControlRequestReply>) { | ||||
| match self { | match self { | ||||
| SpawnResult::Pending { result_senders } => { | |||||
| CachedResult::Pending { result_senders } => { | |||||
| for sender in result_senders.drain(..) { | for sender in result_senders.drain(..) { | ||||
| Self::send_result_to(&result, sender); | Self::send_result_to(&result, sender); | ||||
| } | } | ||||
| *self = SpawnResult::Spawned { result }; | |||||
| *self = CachedResult::Cached { result }; | |||||
| } | } | ||||
| SpawnResult::Spawned { .. } => {} | |||||
| CachedResult::Cached { .. } => {} | |||||
| } | } | ||||
| } | } | ||||
| @@ -1245,7 +1238,7 @@ async fn build_dataflow( | |||||
| clock: &HLC, | clock: &HLC, | ||||
| uv: bool, | uv: bool, | ||||
| daemon_connections: &mut DaemonConnections, | daemon_connections: &mut DaemonConnections, | ||||
| ) -> eyre::Result<()> { | |||||
| ) -> eyre::Result<RunningBuild> { | |||||
| let nodes = dataflow.resolve_aliases_and_set_defaults()?; | let nodes = dataflow.resolve_aliases_and_set_defaults()?; | ||||
| let mut git_sources_by_daemon = git_sources | let mut git_sources_by_daemon = git_sources | ||||
| @@ -1294,7 +1287,12 @@ async fn build_dataflow( | |||||
| tracing::info!("successfully triggered dataflow build `{session_id}`",); | tracing::info!("successfully triggered dataflow build `{session_id}`",); | ||||
| Ok(()) | |||||
| Ok(RunningBuild { | |||||
| errors: Vec::new(), | |||||
| build_result: CachedResult::default(), | |||||
| log_subscribers: Vec::new(), | |||||
| pending_build_results: daemons, | |||||
| }) | |||||
| } | } | ||||
| async fn build_dataflow_on_machine( | async fn build_dataflow_on_machine( | ||||
| @@ -1370,7 +1368,7 @@ async fn start_dataflow( | |||||
| exited_before_subscribe: Default::default(), | exited_before_subscribe: Default::default(), | ||||
| daemons: daemons.clone(), | daemons: daemons.clone(), | ||||
| nodes, | nodes, | ||||
| spawn_result: SpawnResult::default(), | |||||
| spawn_result: CachedResult::default(), | |||||
| stop_reply_senders: Vec::new(), | stop_reply_senders: Vec::new(), | ||||
| log_subscribers: Vec::new(), | log_subscribers: Vec::new(), | ||||
| pending_spawn_results: daemons, | pending_spawn_results: daemons, | ||||
| @@ -3,7 +3,7 @@ use std::collections::{BTreeMap, BTreeSet}; | |||||
| use uuid::Uuid; | use uuid::Uuid; | ||||
| pub use crate::common::{LogLevel, LogMessage, NodeError, NodeErrorCause, NodeExitStatus}; | pub use crate::common::{LogLevel, LogMessage, NodeError, NodeErrorCause, NodeExitStatus}; | ||||
| use crate::{common::DaemonId, id::NodeId, BuildId, SessionId}; | |||||
| use crate::{common::DaemonId, id::NodeId, BuildId}; | |||||
| #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] | #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] | ||||
| pub enum ControlRequestReply { | pub enum ControlRequestReply { | ||||
| @@ -14,7 +14,6 @@ pub enum ControlRequestReply { | |||||
| }, | }, | ||||
| DataflowBuildFinished { | DataflowBuildFinished { | ||||
| build_id: BuildId, | build_id: BuildId, | ||||
| session_id: SessionId, | |||||
| result: Result<(), String>, | result: Result<(), String>, | ||||
| }, | }, | ||||
| DataflowStartTriggered { | DataflowStartTriggered { | ||||