Make `dora-message` a dependency of `dora-core`, instead of the other way around. This way, we can continue to freely bump the version of `dora-core` with the other workspace crates, without introducing errors such as #708.tags/v0.3.7rc2
| @@ -2403,6 +2403,7 @@ dependencies = [ | |||
| name = "dora-core" | |||
| version = "0.3.6" | |||
| dependencies = [ | |||
| "dora-message", | |||
| "eyre", | |||
| "log", | |||
| "once_cell", | |||
| @@ -2413,7 +2414,6 @@ dependencies = [ | |||
| "serde_yaml 0.9.34+deprecated", | |||
| "tokio", | |||
| "tracing", | |||
| "uhlc", | |||
| "uuid", | |||
| "which", | |||
| ] | |||
| @@ -2486,12 +2486,16 @@ dependencies = [ | |||
| "aligned-vec", | |||
| "arrow-data", | |||
| "arrow-schema", | |||
| "dora-core", | |||
| "eyre", | |||
| "log", | |||
| "once_cell", | |||
| "schemars", | |||
| "semver", | |||
| "serde", | |||
| "serde-with-expand-env", | |||
| "serde_yaml 0.9.34+deprecated", | |||
| "tokio", | |||
| "uhlc", | |||
| "uuid", | |||
| ] | |||
| @@ -1,6 +1,6 @@ | |||
| use colored::Colorize; | |||
| use communication_layer_request_reply::{TcpConnection, TcpRequestReplyConnection}; | |||
| use dora_core::descriptor::{resolve_path, CoreNodeKind, Descriptor}; | |||
| use dora_core::descriptor::{resolve_path, CoreNodeKind, Descriptor, DescriptorExt}; | |||
| use dora_message::cli_to_coordinator::ControlRequest; | |||
| use dora_message::common::LogMessage; | |||
| use dora_message::coordinator_to_cli::ControlRequestReply; | |||
| @@ -1,6 +1,6 @@ | |||
| use dora_core::{ | |||
| config::OperatorId, | |||
| descriptor::{Descriptor, SINGLE_OPERATOR_DEFAULT_ID}, | |||
| descriptor::{Descriptor, DescriptorExt, NodeExt, SINGLE_OPERATOR_DEFAULT_ID}, | |||
| }; | |||
| use eyre::{eyre, Context}; | |||
| use std::{path::Path, process::Command}; | |||
| @@ -1,6 +1,6 @@ | |||
| use std::{fs::File, io::Write, path::Path}; | |||
| use dora_core::descriptor::Descriptor; | |||
| use dora_core::descriptor::{Descriptor, DescriptorExt}; | |||
| use eyre::Context; | |||
| const MERMAID_TEMPLATE: &str = include_str!("mermaid-template.html"); | |||
| @@ -3,7 +3,7 @@ use colored::Colorize; | |||
| use communication_layer_request_reply::{RequestReplyLayer, TcpLayer, TcpRequestReplyConnection}; | |||
| use dora_coordinator::Event; | |||
| use dora_core::{ | |||
| descriptor::{source_is_url, Descriptor}, | |||
| descriptor::{source_is_url, Descriptor, DescriptorExt}, | |||
| topics::{ | |||
| DORA_COORDINATOR_PORT_CONTROL_DEFAULT, DORA_COORDINATOR_PORT_DEFAULT, | |||
| DORA_DAEMON_LOCAL_LISTEN_PORT_DEFAULT, | |||
| @@ -5,7 +5,6 @@ use crate::{ | |||
| pub use control::ControlEvent; | |||
| use dora_core::{ | |||
| config::{NodeId, OperatorId}, | |||
| descriptor::{Descriptor, ResolvedNode}, | |||
| uhlc::{self, HLC}, | |||
| }; | |||
| use dora_message::{ | |||
| @@ -16,6 +15,7 @@ use dora_message::{ | |||
| }, | |||
| coordinator_to_daemon::{DaemonCoordinatorEvent, RegisterResult, Timestamped}, | |||
| daemon_to_coordinator::{DaemonCoordinatorReply, DataflowDaemonResult}, | |||
| descriptor::{Descriptor, ResolvedNode}, | |||
| }; | |||
| use eyre::{bail, eyre, ContextCompat, Result, WrapErr}; | |||
| use futures::{future::join_all, stream::FuturesUnordered, Future, Stream, StreamExt}; | |||
| @@ -3,13 +3,11 @@ use crate::{ | |||
| DaemonConnection, | |||
| }; | |||
| use dora_core::{ | |||
| descriptor::{Descriptor, ResolvedNode}, | |||
| uhlc::HLC, | |||
| }; | |||
| use dora_core::{descriptor::DescriptorExt, uhlc::HLC}; | |||
| use dora_message::{ | |||
| coordinator_to_daemon::{DaemonCoordinatorEvent, SpawnDataflowNodes, Timestamped}, | |||
| daemon_to_coordinator::DaemonCoordinatorReply, | |||
| descriptor::{Descriptor, ResolvedNode}, | |||
| }; | |||
| use eyre::{bail, eyre, ContextCompat, WrapErr}; | |||
| use std::{ | |||
| @@ -3,7 +3,10 @@ use coordinator::CoordinatorEvent; | |||
| use crossbeam::queue::ArrayQueue; | |||
| use dora_core::{ | |||
| config::{DataId, Input, InputMapping, NodeId, OperatorId}, | |||
| descriptor::{runtime_node_inputs, CoreNodeKind, Descriptor, ResolvedNode}, | |||
| descriptor::{ | |||
| read_as_descriptor, runtime_node_inputs, CoreNodeKind, Descriptor, DescriptorExt, | |||
| ResolvedNode, | |||
| }, | |||
| topics::LOCALHOST, | |||
| uhlc::{self, HLC}, | |||
| }; | |||
| @@ -162,7 +165,7 @@ impl Daemon { | |||
| .ok_or_else(|| eyre::eyre!("canonicalized dataflow path has no parent"))? | |||
| .to_owned(); | |||
| let descriptor = Descriptor::read(dataflow_path).await?; | |||
| let descriptor = read_as_descriptor(dataflow_path).await?; | |||
| descriptor.check(&working_dir)?; | |||
| let nodes = descriptor.resolve_aliases_and_set_defaults()?; | |||
| @@ -1,6 +1,6 @@ | |||
| use dora_coordinator::{ControlEvent, Event}; | |||
| use dora_core::{ | |||
| descriptor::Descriptor, | |||
| descriptor::{read_as_descriptor, DescriptorExt}, | |||
| topics::{DORA_COORDINATOR_PORT_CONTROL_DEFAULT, DORA_COORDINATOR_PORT_DEFAULT}, | |||
| }; | |||
| use dora_message::{ | |||
| @@ -115,7 +115,7 @@ async fn start_dataflow( | |||
| dataflow: &Path, | |||
| coordinator_events_tx: &Sender<Event>, | |||
| ) -> eyre::Result<Uuid> { | |||
| let dataflow_descriptor = Descriptor::read(dataflow) | |||
| let dataflow_descriptor = read_as_descriptor(dataflow) | |||
| .await | |||
| .wrap_err("failed to read yaml dataflow")?; | |||
| let working_dir = dataflow | |||
| @@ -10,6 +10,7 @@ repository.workspace = true | |||
| # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |||
| [dependencies] | |||
| dora-message = { workspace = true } | |||
| eyre = "0.6.8" | |||
| serde = { version = "1.0.136", features = ["derive"] } | |||
| serde_yaml = "0.9.11" | |||
| @@ -22,4 +23,3 @@ tokio = { version = "1.24.1", features = ["fs", "process", "sync"] } | |||
| schemars = "0.8.19" | |||
| serde_json = "1.0.117" | |||
| log = { version = "0.4.21", features = ["serde"] } | |||
| uhlc = "0.5.1" | |||
| @@ -1,6 +1,6 @@ | |||
| use std::{env, path::Path}; | |||
| use dora_core::descriptor::Descriptor; | |||
| use dora_message::descriptor::Descriptor; | |||
| use schemars::schema_for; | |||
| fn main() { | |||
| @@ -1,41 +1,43 @@ | |||
| use crate::config::{ | |||
| CommunicationConfig, DataId, Input, InputMapping, NodeId, NodeRunConfig, OperatorId, | |||
| use dora_message::{ | |||
| config::{Input, InputMapping, NodeRunConfig}, | |||
| id::{DataId, OperatorId}, | |||
| }; | |||
| use eyre::{bail, eyre, Context, OptionExt, Result}; | |||
| use schemars::JsonSchema; | |||
| use serde::{Deserialize, Serialize}; | |||
| use serde_with_expand_env::with_expand_envs; | |||
| use eyre::{bail, Context, OptionExt, Result}; | |||
| use std::{ | |||
| collections::{BTreeMap, BTreeSet, HashMap}, | |||
| collections::{BTreeMap, HashMap}, | |||
| env::consts::EXE_EXTENSION, | |||
| fmt, | |||
| path::{Path, PathBuf}, | |||
| }; | |||
| use tracing::warn; | |||
| // reexport for compatibility | |||
| pub use dora_message::descriptor::{ | |||
| runtime_node_inputs, CoreNodeKind, CustomNode, Descriptor, Node, OperatorConfig, | |||
| OperatorDefinition, OperatorSource, PythonSource, ResolvedDeploy, ResolvedNode, RuntimeNode, | |||
| SingleOperatorDefinition, DYNAMIC_SOURCE, SHELL_SOURCE, | |||
| }; | |||
| pub use visualize::collect_dora_timers; | |||
| mod validate; | |||
| mod visualize; | |||
| pub const SHELL_SOURCE: &str = "shell"; | |||
| pub const DYNAMIC_SOURCE: &str = "dynamic"; | |||
| /// Dataflow description | |||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(deny_unknown_fields)] | |||
| #[schemars(title = "dora-rs specification")] | |||
| pub struct Descriptor { | |||
| #[schemars(skip)] | |||
| #[serde(default)] | |||
| pub communication: CommunicationConfig, | |||
| #[schemars(skip)] | |||
| #[serde(default, rename = "_unstable_deploy")] | |||
| pub deploy: Deploy, | |||
| pub nodes: Vec<Node>, | |||
| pub trait DescriptorExt { | |||
| fn resolve_aliases_and_set_defaults(&self) -> eyre::Result<Vec<ResolvedNode>>; | |||
| fn visualize_as_mermaid(&self) -> eyre::Result<String>; | |||
| fn blocking_read(path: &Path) -> eyre::Result<Descriptor>; | |||
| fn parse(buf: Vec<u8>) -> eyre::Result<Descriptor>; | |||
| fn check(&self, working_dir: &Path) -> eyre::Result<()>; | |||
| fn check_in_daemon( | |||
| &self, | |||
| working_dir: &Path, | |||
| remote_machine_id: &[&str], | |||
| coordinator_is_remote: bool, | |||
| ) -> eyre::Result<()>; | |||
| } | |||
| pub const SINGLE_OPERATOR_DEFAULT_ID: &str = "op"; | |||
| impl Descriptor { | |||
| pub fn resolve_aliases_and_set_defaults(&self) -> eyre::Result<Vec<ResolvedNode>> { | |||
| impl DescriptorExt for Descriptor { | |||
| fn resolve_aliases_and_set_defaults(&self) -> eyre::Result<Vec<ResolvedNode>> { | |||
| let default_op_id = OperatorId::from(SINGLE_OPERATOR_DEFAULT_ID.to_string()); | |||
| let single_operator_nodes: HashMap<_, _> = self | |||
| @@ -51,7 +53,7 @@ impl Descriptor { | |||
| let mut resolved = vec![]; | |||
| for mut node in self.nodes.clone() { | |||
| // adjust input mappings | |||
| let mut node_kind = node.kind_mut()?; | |||
| let mut node_kind = node_kind_mut(&mut node)?; | |||
| let input_mappings: Vec<_> = match &mut node_kind { | |||
| NodeKindMut::Standard { path: _, inputs } => inputs.values_mut().collect(), | |||
| NodeKindMut::Runtime(node) => node | |||
| @@ -102,7 +104,14 @@ impl Descriptor { | |||
| name: node.name, | |||
| description: node.description, | |||
| env: node.env, | |||
| deploy: ResolvedDeploy::new(node.deploy, self), | |||
| deploy: { | |||
| let default_machine = self.deploy.machine.as_deref().unwrap_or_default(); | |||
| let machine = match node.deploy.machine { | |||
| Some(m) => m, | |||
| None => default_machine.to_owned(), | |||
| }; | |||
| ResolvedDeploy { machine } | |||
| }, | |||
| kind, | |||
| }); | |||
| } | |||
| @@ -110,35 +119,28 @@ impl Descriptor { | |||
| Ok(resolved) | |||
| } | |||
| pub fn visualize_as_mermaid(&self) -> eyre::Result<String> { | |||
| fn visualize_as_mermaid(&self) -> eyre::Result<String> { | |||
| let resolved = self.resolve_aliases_and_set_defaults()?; | |||
| let flowchart = visualize::visualize_nodes(&resolved); | |||
| Ok(flowchart) | |||
| } | |||
| pub async fn read(path: &Path) -> eyre::Result<Descriptor> { | |||
| let buf = tokio::fs::read(path) | |||
| .await | |||
| .context("failed to open given file")?; | |||
| Descriptor::parse(buf) | |||
| } | |||
| pub fn blocking_read(path: &Path) -> eyre::Result<Descriptor> { | |||
| fn blocking_read(path: &Path) -> eyre::Result<Descriptor> { | |||
| let buf = std::fs::read(path).context("failed to open given file")?; | |||
| Descriptor::parse(buf) | |||
| } | |||
| pub fn parse(buf: Vec<u8>) -> eyre::Result<Descriptor> { | |||
| fn parse(buf: Vec<u8>) -> eyre::Result<Descriptor> { | |||
| serde_yaml::from_slice(&buf).context("failed to parse given descriptor") | |||
| } | |||
| pub fn check(&self, working_dir: &Path) -> eyre::Result<()> { | |||
| fn check(&self, working_dir: &Path) -> eyre::Result<()> { | |||
| validate::check_dataflow(self, working_dir, None, false) | |||
| .wrap_err("Dataflow could not be validated.") | |||
| } | |||
| pub fn check_in_daemon( | |||
| fn check_in_daemon( | |||
| &self, | |||
| working_dir: &Path, | |||
| remote_machine_id: &[&str], | |||
| @@ -154,53 +156,70 @@ impl Descriptor { | |||
| } | |||
| } | |||
| #[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(deny_unknown_fields)] | |||
| pub struct Deploy { | |||
| pub machine: Option<String>, | |||
| pub async fn read_as_descriptor(path: &Path) -> eyre::Result<Descriptor> { | |||
| let buf = tokio::fs::read(path) | |||
| .await | |||
| .context("failed to open given file")?; | |||
| Descriptor::parse(buf) | |||
| } | |||
| fn node_kind_mut(node: &mut Node) -> eyre::Result<NodeKindMut> { | |||
| match node.kind()? { | |||
| NodeKind::Standard(_) => node | |||
| .path | |||
| .as_ref() | |||
| .map(|path| NodeKindMut::Standard { | |||
| path, | |||
| inputs: &mut node.inputs, | |||
| }) | |||
| .ok_or_eyre("no path"), | |||
| NodeKind::Runtime(_) => node | |||
| .operators | |||
| .as_mut() | |||
| .map(NodeKindMut::Runtime) | |||
| .ok_or_eyre("no operators"), | |||
| NodeKind::Custom(_) => node | |||
| .custom | |||
| .as_mut() | |||
| .map(NodeKindMut::Custom) | |||
| .ok_or_eyre("no custom"), | |||
| NodeKind::Operator(_) => node | |||
| .operator | |||
| .as_mut() | |||
| .map(NodeKindMut::Operator) | |||
| .ok_or_eyre("no operator"), | |||
| } | |||
| } | |||
| /// Dora Node | |||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(deny_unknown_fields)] | |||
| pub struct Node { | |||
| /// Node identifier | |||
| pub id: NodeId, | |||
| /// Node name | |||
| pub name: Option<String>, | |||
| /// Description of the node | |||
| pub description: Option<String>, | |||
| /// Environment variables | |||
| pub env: Option<BTreeMap<String, EnvValue>>, | |||
| pub fn source_is_url(source: &str) -> bool { | |||
| source.contains("://") | |||
| } | |||
| /// Unstable machine deployment configuration | |||
| #[schemars(skip)] | |||
| #[serde(default, rename = "_unstable_deploy")] | |||
| pub deploy: Deploy, | |||
| pub fn resolve_path(source: &str, working_dir: &Path) -> Result<PathBuf> { | |||
| let path = Path::new(&source); | |||
| let path = if path.extension().is_none() { | |||
| path.with_extension(EXE_EXTENSION) | |||
| } else { | |||
| path.to_owned() | |||
| }; | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| operators: Option<RuntimeNode>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| custom: Option<CustomNode>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| operator: Option<SingleOperatorDefinition>, | |||
| // Search path within current working directory | |||
| if let Ok(abs_path) = working_dir.join(&path).canonicalize() { | |||
| Ok(abs_path) | |||
| // Search path within $PATH | |||
| } else if let Ok(abs_path) = which::which(&path) { | |||
| Ok(abs_path) | |||
| } else { | |||
| bail!("Could not find source path {}", path.display()) | |||
| } | |||
| } | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub path: Option<String>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub args: Option<String>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub build: Option<String>, | |||
| #[serde(skip_serializing_if = "Option::is_none")] | |||
| pub send_stdout_as: Option<String>, | |||
| #[serde(default)] | |||
| pub inputs: BTreeMap<DataId, Input>, | |||
| #[serde(default)] | |||
| pub outputs: BTreeSet<DataId>, | |||
| pub trait NodeExt { | |||
| fn kind(&self) -> eyre::Result<NodeKind>; | |||
| } | |||
| impl Node { | |||
| pub fn kind(&self) -> eyre::Result<NodeKind> { | |||
| impl NodeExt for Node { | |||
| fn kind(&self) -> eyre::Result<NodeKind> { | |||
| match (&self.path, &self.operators, &self.custom, &self.operator) { | |||
| (None, None, None, None) => { | |||
| eyre::bail!( | |||
| @@ -220,34 +239,6 @@ impl Node { | |||
| } | |||
| } | |||
| } | |||
| fn kind_mut(&mut self) -> eyre::Result<NodeKindMut> { | |||
| match self.kind()? { | |||
| NodeKind::Standard(_) => self | |||
| .path | |||
| .as_ref() | |||
| .map(|path| NodeKindMut::Standard { | |||
| path, | |||
| inputs: &mut self.inputs, | |||
| }) | |||
| .ok_or_eyre("no path"), | |||
| NodeKind::Runtime(_) => self | |||
| .operators | |||
| .as_mut() | |||
| .map(NodeKindMut::Runtime) | |||
| .ok_or_eyre("no operators"), | |||
| NodeKind::Custom(_) => self | |||
| .custom | |||
| .as_mut() | |||
| .map(NodeKindMut::Custom) | |||
| .ok_or_eyre("no custom"), | |||
| NodeKind::Operator(_) => self | |||
| .operator | |||
| .as_mut() | |||
| .map(NodeKindMut::Operator) | |||
| .ok_or_eyre("no operator"), | |||
| } | |||
| } | |||
| } | |||
| #[derive(Debug)] | |||
| @@ -270,291 +261,3 @@ enum NodeKindMut<'a> { | |||
| Custom(&'a mut CustomNode), | |||
| Operator(&'a mut SingleOperatorDefinition), | |||
| } | |||
| #[derive(Debug, Clone, Serialize, Deserialize)] | |||
| pub struct ResolvedNode { | |||
| pub id: NodeId, | |||
| pub name: Option<String>, | |||
| pub description: Option<String>, | |||
| pub env: Option<BTreeMap<String, EnvValue>>, | |||
| #[serde(default)] | |||
| pub deploy: ResolvedDeploy, | |||
| #[serde(flatten)] | |||
| pub kind: CoreNodeKind, | |||
| } | |||
| impl ResolvedNode { | |||
| pub fn send_stdout_as(&self) -> Result<Option<String>> { | |||
| match &self.kind { | |||
| // TODO: Split stdout between operators | |||
| CoreNodeKind::Runtime(n) => { | |||
| let count = n | |||
| .operators | |||
| .iter() | |||
| .filter(|op| op.config.send_stdout_as.is_some()) | |||
| .count(); | |||
| if count == 1 && n.operators.len() > 1 { | |||
| warn!("All stdout from all operators of a runtime are going to be sent in the selected `send_stdout_as` operator.") | |||
| } else if count > 1 { | |||
| return Err(eyre!("More than one `send_stdout_as` entries for a runtime node. Please only use one `send_stdout_as` per runtime.")); | |||
| } | |||
| Ok(n.operators.iter().find_map(|op| { | |||
| op.config | |||
| .send_stdout_as | |||
| .clone() | |||
| .map(|stdout| format!("{}/{}", op.id, stdout)) | |||
| })) | |||
| } | |||
| CoreNodeKind::Custom(n) => Ok(n.send_stdout_as.clone()), | |||
| } | |||
| } | |||
| } | |||
| #[derive(Debug, Clone, Default, Serialize, Deserialize)] | |||
| pub struct ResolvedDeploy { | |||
| pub machine: String, | |||
| } | |||
| impl ResolvedDeploy { | |||
| fn new(deploy: Deploy, descriptor: &Descriptor) -> Self { | |||
| let default_machine = descriptor.deploy.machine.as_deref().unwrap_or_default(); | |||
| let machine = match deploy.machine { | |||
| Some(m) => m, | |||
| None => default_machine.to_owned(), | |||
| }; | |||
| Self { machine } | |||
| } | |||
| } | |||
| #[derive(Debug, Clone, Serialize, Deserialize)] | |||
| #[serde(rename_all = "lowercase")] | |||
| pub enum CoreNodeKind { | |||
| /// Dora runtime node | |||
| #[serde(rename = "operators")] | |||
| Runtime(RuntimeNode), | |||
| Custom(CustomNode), | |||
| } | |||
| pub fn runtime_node_inputs(n: &RuntimeNode) -> BTreeMap<DataId, Input> { | |||
| n.operators | |||
| .iter() | |||
| .flat_map(|operator| { | |||
| operator.config.inputs.iter().map(|(input_id, mapping)| { | |||
| ( | |||
| DataId::from(format!("{}/{input_id}", operator.id)), | |||
| mapping.clone(), | |||
| ) | |||
| }) | |||
| }) | |||
| .collect() | |||
| } | |||
| fn runtime_node_outputs(n: &RuntimeNode) -> BTreeSet<DataId> { | |||
| n.operators | |||
| .iter() | |||
| .flat_map(|operator| { | |||
| operator | |||
| .config | |||
| .outputs | |||
| .iter() | |||
| .map(|output_id| DataId::from(format!("{}/{output_id}", operator.id))) | |||
| }) | |||
| .collect() | |||
| } | |||
| impl CoreNodeKind { | |||
| pub fn run_config(&self) -> NodeRunConfig { | |||
| match self { | |||
| CoreNodeKind::Runtime(n) => NodeRunConfig { | |||
| inputs: runtime_node_inputs(n), | |||
| outputs: runtime_node_outputs(n), | |||
| }, | |||
| CoreNodeKind::Custom(n) => n.run_config.clone(), | |||
| } | |||
| } | |||
| pub fn dynamic(&self) -> bool { | |||
| match self { | |||
| CoreNodeKind::Runtime(_n) => false, | |||
| CoreNodeKind::Custom(n) => n.source == DYNAMIC_SOURCE, | |||
| } | |||
| } | |||
| } | |||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(transparent)] | |||
| pub struct RuntimeNode { | |||
| pub operators: Vec<OperatorDefinition>, | |||
| } | |||
| #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] | |||
| pub struct OperatorDefinition { | |||
| pub id: OperatorId, | |||
| #[serde(flatten)] | |||
| pub config: OperatorConfig, | |||
| } | |||
| #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] | |||
| pub struct SingleOperatorDefinition { | |||
| /// ID is optional if there is only a single operator. | |||
| pub id: Option<OperatorId>, | |||
| #[serde(flatten)] | |||
| pub config: OperatorConfig, | |||
| } | |||
| #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] | |||
| pub struct OperatorConfig { | |||
| pub name: Option<String>, | |||
| pub description: Option<String>, | |||
| #[serde(default)] | |||
| pub inputs: BTreeMap<DataId, Input>, | |||
| #[serde(default)] | |||
| pub outputs: BTreeSet<DataId>, | |||
| #[serde(flatten)] | |||
| pub source: OperatorSource, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub build: Option<String>, | |||
| #[serde(skip_serializing_if = "Option::is_none")] | |||
| pub send_stdout_as: Option<String>, | |||
| } | |||
| #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] | |||
| #[serde(rename_all = "kebab-case")] | |||
| pub enum OperatorSource { | |||
| SharedLibrary(String), | |||
| Python(PythonSource), | |||
| #[schemars(skip)] | |||
| Wasm(String), | |||
| } | |||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] | |||
| #[serde( | |||
| deny_unknown_fields, | |||
| from = "PythonSourceDef", | |||
| into = "PythonSourceDef" | |||
| )] | |||
| pub struct PythonSource { | |||
| pub source: String, | |||
| pub conda_env: Option<String>, | |||
| } | |||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(untagged)] | |||
| pub enum PythonSourceDef { | |||
| SourceOnly(String), | |||
| WithOptions { | |||
| source: String, | |||
| conda_env: Option<String>, | |||
| }, | |||
| } | |||
| impl From<PythonSource> for PythonSourceDef { | |||
| fn from(input: PythonSource) -> Self { | |||
| match input { | |||
| PythonSource { | |||
| source, | |||
| conda_env: None, | |||
| } => Self::SourceOnly(source), | |||
| PythonSource { source, conda_env } => Self::WithOptions { source, conda_env }, | |||
| } | |||
| } | |||
| } | |||
| impl From<PythonSourceDef> for PythonSource { | |||
| fn from(value: PythonSourceDef) -> Self { | |||
| match value { | |||
| PythonSourceDef::SourceOnly(source) => Self { | |||
| source, | |||
| conda_env: None, | |||
| }, | |||
| PythonSourceDef::WithOptions { source, conda_env } => Self { source, conda_env }, | |||
| } | |||
| } | |||
| } | |||
| pub fn source_is_url(source: &str) -> bool { | |||
| source.contains("://") | |||
| } | |||
| pub fn resolve_path(source: &str, working_dir: &Path) -> Result<PathBuf> { | |||
| let path = Path::new(&source); | |||
| let path = if path.extension().is_none() { | |||
| path.with_extension(EXE_EXTENSION) | |||
| } else { | |||
| path.to_owned() | |||
| }; | |||
| // Search path within current working directory | |||
| if let Ok(abs_path) = working_dir.join(&path).canonicalize() { | |||
| Ok(abs_path) | |||
| // Search path within $PATH | |||
| } else if let Ok(abs_path) = which::which(&path) { | |||
| Ok(abs_path) | |||
| } else { | |||
| bail!("Could not find source path {}", path.display()) | |||
| } | |||
| } | |||
| #[derive(Debug, Serialize, Deserialize, Clone)] | |||
| #[serde(deny_unknown_fields)] | |||
| pub struct PythonOperatorConfig { | |||
| pub path: PathBuf, | |||
| #[serde(default)] | |||
| pub inputs: BTreeMap<DataId, InputMapping>, | |||
| #[serde(default)] | |||
| pub outputs: BTreeSet<DataId>, | |||
| } | |||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | |||
| pub struct CustomNode { | |||
| /// Path of the source code | |||
| /// | |||
| /// If you want to use a specific `conda` environment. | |||
| /// Provide the python path within the source. | |||
| /// | |||
| /// source: /home/peter/miniconda3/bin/python | |||
| /// | |||
| /// args: some_node.py | |||
| /// | |||
| /// Source can match any executable in PATH. | |||
| pub source: String, | |||
| /// Args for the executable. | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub args: Option<String>, | |||
| /// Environment variables for the custom nodes | |||
| /// | |||
| /// Deprecated, use outer-level `env` field instead. | |||
| pub envs: Option<BTreeMap<String, EnvValue>>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub build: Option<String>, | |||
| /// Send stdout and stderr to another node | |||
| #[serde(skip_serializing_if = "Option::is_none")] | |||
| pub send_stdout_as: Option<String>, | |||
| #[serde(flatten)] | |||
| pub run_config: NodeRunConfig, | |||
| } | |||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(untagged)] | |||
| pub enum EnvValue { | |||
| #[serde(deserialize_with = "with_expand_envs")] | |||
| Bool(bool), | |||
| #[serde(deserialize_with = "with_expand_envs")] | |||
| Integer(u64), | |||
| #[serde(deserialize_with = "with_expand_envs")] | |||
| String(String), | |||
| } | |||
| impl fmt::Display for EnvValue { | |||
| fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { | |||
| match self { | |||
| EnvValue::Bool(bool) => fmt.write_str(&bool.to_string()), | |||
| EnvValue::Integer(u64) => fmt.write_str(&u64.to_string()), | |||
| EnvValue::String(str) => fmt.write_str(str), | |||
| } | |||
| } | |||
| } | |||
| @@ -1,15 +1,19 @@ | |||
| use crate::{ | |||
| adjust_shared_library_path, | |||
| config::{DataId, Input, InputMapping, OperatorId, UserInputMapping}, | |||
| descriptor::{self, source_is_url, CoreNodeKind, OperatorSource}, | |||
| descriptor::{self, source_is_url}, | |||
| get_python_path, | |||
| }; | |||
| use dora_message::{ | |||
| config::{Input, InputMapping, UserInputMapping}, | |||
| descriptor::{CoreNodeKind, OperatorSource, DYNAMIC_SOURCE, SHELL_SOURCE}, | |||
| id::{DataId, OperatorId}, | |||
| }; | |||
| use eyre::{bail, eyre, Context}; | |||
| use std::{path::Path, process::Command}; | |||
| use tracing::info; | |||
| use super::{resolve_path, Descriptor, DYNAMIC_SOURCE, SHELL_SOURCE}; | |||
| use super::{resolve_path, Descriptor, DescriptorExt}; | |||
| const VERSION: &str = env!("CARGO_PKG_VERSION"); | |||
| pub fn check_dataflow( | |||
| @@ -1,5 +1,10 @@ | |||
| use super::{CoreNodeKind, CustomNode, OperatorDefinition, ResolvedNode, RuntimeNode}; | |||
| use crate::config::{format_duration, DataId, Input, InputMapping, NodeId, UserInputMapping}; | |||
| use dora_message::{ | |||
| config::{format_duration, Input, InputMapping, UserInputMapping}, | |||
| descriptor::{CoreNodeKind, OperatorDefinition}, | |||
| id::{DataId, NodeId}, | |||
| }; | |||
| use super::{CustomNode, ResolvedNode, RuntimeNode}; | |||
| use std::{ | |||
| collections::{BTreeMap, BTreeSet, HashMap}, | |||
| fmt::Write as _, | |||
| @@ -5,9 +5,8 @@ use std::{ | |||
| path::Path, | |||
| }; | |||
| pub use uhlc; | |||
| pub use dora_message::{config, uhlc}; | |||
| pub mod config; | |||
| pub mod descriptor; | |||
| pub mod topics; | |||
| @@ -17,8 +17,13 @@ serde = { version = "1.0.136", features = ["derive"] } | |||
| eyre = "0.6.8" | |||
| arrow-schema = { workspace = true, features = ["serde"] } | |||
| tokio = "1.39.2" | |||
| dora-core = { workspace = true } | |||
| # dora-core = { workspace = true } | |||
| uuid = { version = "1.7", features = ["serde", "v7"] } | |||
| log = { version = "0.4.21", features = ["serde"] } | |||
| aligned-vec = { version = "0.5.0", features = ["serde"] } | |||
| semver = { version = "1.0.23", features = ["serde"] } | |||
| schemars = "0.8.19" | |||
| uhlc = "0.5.1" | |||
| serde_yaml = "0.9.11" | |||
| once_cell = "1.13.0" | |||
| serde-with-expand-env = "1.1.0" | |||
| @@ -1,10 +1,11 @@ | |||
| use std::{path::PathBuf, time::Duration}; | |||
| use dora_core::{ | |||
| config::{NodeId, OperatorId}, | |||
| use uuid::Uuid; | |||
| use crate::{ | |||
| descriptor::Descriptor, | |||
| id::{NodeId, OperatorId}, | |||
| }; | |||
| use uuid::Uuid; | |||
| #[derive(Debug, serde::Deserialize, serde::Serialize)] | |||
| pub enum ControlRequest { | |||
| @@ -2,10 +2,9 @@ use core::fmt; | |||
| use std::borrow::Cow; | |||
| use aligned_vec::{AVec, ConstAlign}; | |||
| use dora_core::{config::NodeId, uhlc}; | |||
| use uuid::Uuid; | |||
| use crate::DataflowId; | |||
| use crate::{id::NodeId, DataflowId}; | |||
| pub use log::Level as LogLevel; | |||
| @@ -1,129 +1,87 @@ | |||
| use once_cell::sync::OnceCell; | |||
| use schemars::JsonSchema; | |||
| use serde::{Deserialize, Serialize}; | |||
| use core::fmt; | |||
| use std::{ | |||
| borrow::Borrow, | |||
| collections::{BTreeMap, BTreeSet}, | |||
| convert::Infallible, | |||
| fmt, | |||
| str::FromStr, | |||
| time::Duration, | |||
| }; | |||
| #[derive( | |||
| Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema, | |||
| )] | |||
| pub struct NodeId(String); | |||
| impl FromStr for NodeId { | |||
| type Err = Infallible; | |||
| fn from_str(s: &str) -> Result<Self, Self::Err> { | |||
| Ok(Self(s.to_owned())) | |||
| } | |||
| } | |||
| impl From<String> for NodeId { | |||
| fn from(id: String) -> Self { | |||
| Self(id) | |||
| } | |||
| } | |||
| impl std::fmt::Display for NodeId { | |||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |||
| std::fmt::Display::fmt(&self.0, f) | |||
| } | |||
| } | |||
| impl AsRef<str> for NodeId { | |||
| fn as_ref(&self) -> &str { | |||
| &self.0 | |||
| } | |||
| } | |||
| #[derive( | |||
| Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema, | |||
| )] | |||
| pub struct OperatorId(String); | |||
| impl FromStr for OperatorId { | |||
| type Err = Infallible; | |||
| fn from_str(s: &str) -> Result<Self, Self::Err> { | |||
| Ok(Self(s.to_owned())) | |||
| } | |||
| } | |||
| impl From<String> for OperatorId { | |||
| fn from(id: String) -> Self { | |||
| Self(id) | |||
| } | |||
| } | |||
| impl std::fmt::Display for OperatorId { | |||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |||
| std::fmt::Display::fmt(&self.0, f) | |||
| } | |||
| } | |||
| impl AsRef<str> for OperatorId { | |||
| fn as_ref(&self) -> &str { | |||
| &self.0 | |||
| } | |||
| } | |||
| #[derive( | |||
| Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema, | |||
| )] | |||
| pub struct DataId(String); | |||
| impl From<DataId> for String { | |||
| fn from(id: DataId) -> Self { | |||
| id.0 | |||
| } | |||
| } | |||
| impl From<String> for DataId { | |||
| fn from(id: String) -> Self { | |||
| Self(id) | |||
| } | |||
| } | |||
| impl std::fmt::Display for DataId { | |||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |||
| std::fmt::Display::fmt(&self.0, f) | |||
| } | |||
| } | |||
| use once_cell::sync::OnceCell; | |||
| use schemars::JsonSchema; | |||
| use serde::{Deserialize, Serialize}; | |||
| impl std::ops::Deref for DataId { | |||
| type Target = String; | |||
| pub use crate::id::{DataId, NodeId, OperatorId}; | |||
| fn deref(&self) -> &Self::Target { | |||
| &self.0 | |||
| } | |||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] | |||
| pub struct NodeRunConfig { | |||
| /// Inputs for the nodes as a map from input ID to `node_id/output_id`. | |||
| /// | |||
| /// e.g. | |||
| /// | |||
| /// inputs: | |||
| /// | |||
| /// example_input: example_node/example_output1 | |||
| /// | |||
| #[serde(default)] | |||
| pub inputs: BTreeMap<DataId, Input>, | |||
| /// List of output IDs. | |||
| /// | |||
| /// e.g. | |||
| /// | |||
| /// outputs: | |||
| /// | |||
| /// - output_1 | |||
| /// | |||
| /// - output_2 | |||
| #[serde(default)] | |||
| pub outputs: BTreeSet<DataId>, | |||
| } | |||
| impl AsRef<String> for DataId { | |||
| fn as_ref(&self) -> &String { | |||
| &self.0 | |||
| } | |||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(deny_unknown_fields, from = "InputDef", into = "InputDef")] | |||
| pub struct Input { | |||
| pub mapping: InputMapping, | |||
| pub queue_size: Option<usize>, | |||
| } | |||
| impl AsRef<str> for DataId { | |||
| fn as_ref(&self) -> &str { | |||
| &self.0 | |||
| } | |||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] | |||
| #[serde(untagged)] | |||
| pub enum InputDef { | |||
| MappingOnly(InputMapping), | |||
| WithOptions { | |||
| source: InputMapping, | |||
| queue_size: Option<usize>, | |||
| }, | |||
| } | |||
| impl Borrow<String> for DataId { | |||
| fn borrow(&self) -> &String { | |||
| &self.0 | |||
| impl From<Input> for InputDef { | |||
| fn from(input: Input) -> Self { | |||
| match input { | |||
| Input { | |||
| mapping, | |||
| queue_size: None, | |||
| } => Self::MappingOnly(mapping), | |||
| Input { | |||
| mapping, | |||
| queue_size, | |||
| } => Self::WithOptions { | |||
| source: mapping, | |||
| queue_size, | |||
| }, | |||
| } | |||
| } | |||
| } | |||
| impl Borrow<str> for DataId { | |||
| fn borrow(&self) -> &str { | |||
| &self.0 | |||
| impl From<InputDef> for Input { | |||
| fn from(value: InputDef) -> Self { | |||
| match value { | |||
| InputDef::MappingOnly(mapping) => Self { | |||
| mapping, | |||
| queue_size: None, | |||
| }, | |||
| InputDef::WithOptions { source, queue_size } => Self { | |||
| mapping: source, | |||
| queue_size, | |||
| }, | |||
| } | |||
| } | |||
| } | |||
| @@ -158,6 +116,22 @@ impl fmt::Display for InputMapping { | |||
| } | |||
| } | |||
| pub struct FormattedDuration(pub Duration); | |||
| impl fmt::Display for FormattedDuration { | |||
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |||
| if self.0.subsec_millis() == 0 { | |||
| write!(f, "secs/{}", self.0.as_secs()) | |||
| } else { | |||
| write!(f, "millis/{}", self.0.as_millis()) | |||
| } | |||
| } | |||
| } | |||
| pub fn format_duration(interval: Duration) -> FormattedDuration { | |||
| FormattedDuration(interval) | |||
| } | |||
| impl Serialize for InputMapping { | |||
| fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | |||
| where | |||
| @@ -233,97 +207,6 @@ pub struct UserInputMapping { | |||
| pub output: DataId, | |||
| } | |||
| pub struct FormattedDuration(pub Duration); | |||
| impl fmt::Display for FormattedDuration { | |||
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |||
| if self.0.subsec_millis() == 0 { | |||
| write!(f, "secs/{}", self.0.as_secs()) | |||
| } else { | |||
| write!(f, "millis/{}", self.0.as_millis()) | |||
| } | |||
| } | |||
| } | |||
| pub fn format_duration(interval: Duration) -> FormattedDuration { | |||
| FormattedDuration(interval) | |||
| } | |||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] | |||
| pub struct NodeRunConfig { | |||
| /// Inputs for the nodes as a map from input ID to `node_id/output_id`. | |||
| /// | |||
| /// e.g. | |||
| /// | |||
| /// inputs: | |||
| /// | |||
| /// example_input: example_node/example_output1 | |||
| /// | |||
| #[serde(default)] | |||
| pub inputs: BTreeMap<DataId, Input>, | |||
| /// List of output IDs. | |||
| /// | |||
| /// e.g. | |||
| /// | |||
| /// outputs: | |||
| /// | |||
| /// - output_1 | |||
| /// | |||
| /// - output_2 | |||
| #[serde(default)] | |||
| pub outputs: BTreeSet<DataId>, | |||
| } | |||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(deny_unknown_fields, from = "InputDef", into = "InputDef")] | |||
| pub struct Input { | |||
| pub mapping: InputMapping, | |||
| pub queue_size: Option<usize>, | |||
| } | |||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] | |||
| #[serde(untagged)] | |||
| pub enum InputDef { | |||
| MappingOnly(InputMapping), | |||
| WithOptions { | |||
| source: InputMapping, | |||
| queue_size: Option<usize>, | |||
| }, | |||
| } | |||
| impl From<Input> for InputDef { | |||
| fn from(input: Input) -> Self { | |||
| match input { | |||
| Input { | |||
| mapping, | |||
| queue_size: None, | |||
| } => Self::MappingOnly(mapping), | |||
| Input { | |||
| mapping, | |||
| queue_size, | |||
| } => Self::WithOptions { | |||
| source: mapping, | |||
| queue_size, | |||
| }, | |||
| } | |||
| } | |||
| } | |||
| impl From<InputDef> for Input { | |||
| fn from(value: InputDef) -> Self { | |||
| match value { | |||
| InputDef::MappingOnly(mapping) => Self { | |||
| mapping, | |||
| queue_size: None, | |||
| }, | |||
| InputDef::WithOptions { source, queue_size } => Self { | |||
| mapping: source, | |||
| queue_size, | |||
| }, | |||
| } | |||
| } | |||
| } | |||
| #[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Clone)] | |||
| #[serde(deny_unknown_fields, rename_all = "lowercase")] | |||
| pub struct CommunicationConfig { | |||
| @@ -1,11 +1,9 @@ | |||
| use std::collections::{BTreeMap, BTreeSet}; | |||
| use dora_core::config::NodeId; | |||
| use dora_core::uhlc; | |||
| use uuid::Uuid; | |||
| pub use crate::common::LogMessage; | |||
| pub use crate::common::{NodeError, NodeErrorCause, NodeExitStatus}; | |||
| pub use crate::common::{LogMessage, NodeError, NodeErrorCause, NodeExitStatus}; | |||
| use crate::id::NodeId; | |||
| #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] | |||
| pub enum ControlRequestReply { | |||
| @@ -1,13 +1,11 @@ | |||
| use std::{collections::BTreeMap, net::SocketAddr, path::PathBuf, time::Duration}; | |||
| use dora_core::{ | |||
| config::{NodeId, OperatorId}, | |||
| // TODO: how should we version these? | |||
| use crate::{ | |||
| descriptor::{Descriptor, ResolvedNode}, | |||
| id::{NodeId, OperatorId}, | |||
| DataflowId, | |||
| }; | |||
| use crate::DataflowId; | |||
| pub use crate::common::Timestamped; | |||
| #[derive(Debug, serde::Serialize, serde::Deserialize)] | |||
| @@ -1,11 +1,9 @@ | |||
| use std::collections::BTreeMap; | |||
| use dora_core::{config::NodeId, uhlc}; | |||
| pub use crate::common::{ | |||
| DataMessage, LogLevel, LogMessage, NodeError, NodeErrorCause, NodeExitStatus, Timestamped, | |||
| }; | |||
| use crate::{current_crate_version, versions_compatible, DataflowId}; | |||
| use crate::{current_crate_version, id::NodeId, versions_compatible, DataflowId}; | |||
| #[derive(Debug, serde::Serialize, serde::Deserialize)] | |||
| pub enum CoordinatorRequest { | |||
| @@ -1,9 +1,12 @@ | |||
| use std::collections::BTreeSet; | |||
| use aligned_vec::{AVec, ConstAlign}; | |||
| use dora_core::config::{DataId, NodeId}; | |||
| use crate::{metadata::Metadata, DataflowId}; | |||
| use crate::{ | |||
| id::{DataId, NodeId}, | |||
| metadata::Metadata, | |||
| DataflowId, | |||
| }; | |||
| #[derive(Debug, serde::Deserialize, serde::Serialize)] | |||
| pub enum InterDaemonEvent { | |||
| @@ -1,12 +1,13 @@ | |||
| use std::{net::SocketAddr, path::PathBuf}; | |||
| use dora_core::{ | |||
| config::{DataId, NodeId, NodeRunConfig, OperatorId}, | |||
| use crate::{ | |||
| config::NodeRunConfig, | |||
| descriptor::{Descriptor, OperatorDefinition}, | |||
| id::{DataId, NodeId, OperatorId}, | |||
| metadata::Metadata, | |||
| DataflowId, | |||
| }; | |||
| use crate::{metadata::Metadata, DataflowId}; | |||
| pub use crate::common::{DataMessage, DropToken, SharedMemoryId, Timestamped}; | |||
| // Passed via env variable | |||
| @@ -0,0 +1,331 @@ | |||
| use crate::{ | |||
| config::{CommunicationConfig, Input, InputMapping, NodeRunConfig}, | |||
| id::{DataId, NodeId, OperatorId}, | |||
| }; | |||
| use eyre::{eyre, Result}; | |||
| use log::warn; | |||
| use schemars::JsonSchema; | |||
| use serde::{Deserialize, Serialize}; | |||
| use serde_with_expand_env::with_expand_envs; | |||
| use std::{ | |||
| collections::{BTreeMap, BTreeSet}, | |||
| fmt, | |||
| path::PathBuf, | |||
| }; | |||
| pub const SHELL_SOURCE: &str = "shell"; | |||
| pub const DYNAMIC_SOURCE: &str = "dynamic"; | |||
| /// Dataflow description | |||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(deny_unknown_fields)] | |||
| #[schemars(title = "dora-rs specification")] | |||
| pub struct Descriptor { | |||
| #[schemars(skip)] | |||
| #[serde(default)] | |||
| pub communication: CommunicationConfig, | |||
| #[schemars(skip)] | |||
| #[serde(default, rename = "_unstable_deploy")] | |||
| pub deploy: Deploy, | |||
| pub nodes: Vec<Node>, | |||
| } | |||
| #[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(deny_unknown_fields)] | |||
| pub struct Deploy { | |||
| pub machine: Option<String>, | |||
| } | |||
| /// Dora Node | |||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(deny_unknown_fields)] | |||
| pub struct Node { | |||
| /// Node identifier | |||
| pub id: NodeId, | |||
| /// Node name | |||
| pub name: Option<String>, | |||
| /// Description of the node | |||
| pub description: Option<String>, | |||
| /// Environment variables | |||
| pub env: Option<BTreeMap<String, EnvValue>>, | |||
| /// Unstable machine deployment configuration | |||
| #[schemars(skip)] | |||
| #[serde(default, rename = "_unstable_deploy")] | |||
| pub deploy: Deploy, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub operators: Option<RuntimeNode>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub custom: Option<CustomNode>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub operator: Option<SingleOperatorDefinition>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub path: Option<String>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub args: Option<String>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub build: Option<String>, | |||
| #[serde(skip_serializing_if = "Option::is_none")] | |||
| pub send_stdout_as: Option<String>, | |||
| #[serde(default)] | |||
| pub inputs: BTreeMap<DataId, Input>, | |||
| #[serde(default)] | |||
| pub outputs: BTreeSet<DataId>, | |||
| } | |||
| #[derive(Debug, Clone, Serialize, Deserialize)] | |||
| pub struct ResolvedNode { | |||
| pub id: NodeId, | |||
| pub name: Option<String>, | |||
| pub description: Option<String>, | |||
| pub env: Option<BTreeMap<String, EnvValue>>, | |||
| #[serde(default)] | |||
| pub deploy: ResolvedDeploy, | |||
| #[serde(flatten)] | |||
| pub kind: CoreNodeKind, | |||
| } | |||
| impl ResolvedNode { | |||
| pub fn send_stdout_as(&self) -> Result<Option<String>> { | |||
| match &self.kind { | |||
| // TODO: Split stdout between operators | |||
| CoreNodeKind::Runtime(n) => { | |||
| let count = n | |||
| .operators | |||
| .iter() | |||
| .filter(|op| op.config.send_stdout_as.is_some()) | |||
| .count(); | |||
| if count == 1 && n.operators.len() > 1 { | |||
| warn!("All stdout from all operators of a runtime are going to be sent in the selected `send_stdout_as` operator.") | |||
| } else if count > 1 { | |||
| return Err(eyre!("More than one `send_stdout_as` entries for a runtime node. Please only use one `send_stdout_as` per runtime.")); | |||
| } | |||
| Ok(n.operators.iter().find_map(|op| { | |||
| op.config | |||
| .send_stdout_as | |||
| .clone() | |||
| .map(|stdout| format!("{}/{}", op.id, stdout)) | |||
| })) | |||
| } | |||
| CoreNodeKind::Custom(n) => Ok(n.send_stdout_as.clone()), | |||
| } | |||
| } | |||
| } | |||
| #[derive(Debug, Clone, Default, Serialize, Deserialize)] | |||
| pub struct ResolvedDeploy { | |||
| pub machine: String, | |||
| } | |||
| #[derive(Debug, Clone, Serialize, Deserialize)] | |||
| #[serde(rename_all = "lowercase")] | |||
| pub enum CoreNodeKind { | |||
| /// Dora runtime node | |||
| #[serde(rename = "operators")] | |||
| Runtime(RuntimeNode), | |||
| Custom(CustomNode), | |||
| } | |||
| pub fn runtime_node_inputs(n: &RuntimeNode) -> BTreeMap<DataId, Input> { | |||
| n.operators | |||
| .iter() | |||
| .flat_map(|operator| { | |||
| operator.config.inputs.iter().map(|(input_id, mapping)| { | |||
| ( | |||
| DataId::from(format!("{}/{input_id}", operator.id)), | |||
| mapping.clone(), | |||
| ) | |||
| }) | |||
| }) | |||
| .collect() | |||
| } | |||
| fn runtime_node_outputs(n: &RuntimeNode) -> BTreeSet<DataId> { | |||
| n.operators | |||
| .iter() | |||
| .flat_map(|operator| { | |||
| operator | |||
| .config | |||
| .outputs | |||
| .iter() | |||
| .map(|output_id| DataId::from(format!("{}/{output_id}", operator.id))) | |||
| }) | |||
| .collect() | |||
| } | |||
| impl CoreNodeKind { | |||
| pub fn run_config(&self) -> NodeRunConfig { | |||
| match self { | |||
| CoreNodeKind::Runtime(n) => NodeRunConfig { | |||
| inputs: runtime_node_inputs(n), | |||
| outputs: runtime_node_outputs(n), | |||
| }, | |||
| CoreNodeKind::Custom(n) => n.run_config.clone(), | |||
| } | |||
| } | |||
| pub fn dynamic(&self) -> bool { | |||
| match self { | |||
| CoreNodeKind::Runtime(_n) => false, | |||
| CoreNodeKind::Custom(n) => n.source == DYNAMIC_SOURCE, | |||
| } | |||
| } | |||
| } | |||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(transparent)] | |||
| pub struct RuntimeNode { | |||
| pub operators: Vec<OperatorDefinition>, | |||
| } | |||
| #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] | |||
| pub struct OperatorDefinition { | |||
| pub id: OperatorId, | |||
| #[serde(flatten)] | |||
| pub config: OperatorConfig, | |||
| } | |||
| #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] | |||
| pub struct SingleOperatorDefinition { | |||
| /// ID is optional if there is only a single operator. | |||
| pub id: Option<OperatorId>, | |||
| #[serde(flatten)] | |||
| pub config: OperatorConfig, | |||
| } | |||
| #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] | |||
| pub struct OperatorConfig { | |||
| pub name: Option<String>, | |||
| pub description: Option<String>, | |||
| #[serde(default)] | |||
| pub inputs: BTreeMap<DataId, Input>, | |||
| #[serde(default)] | |||
| pub outputs: BTreeSet<DataId>, | |||
| #[serde(flatten)] | |||
| pub source: OperatorSource, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub build: Option<String>, | |||
| #[serde(skip_serializing_if = "Option::is_none")] | |||
| pub send_stdout_as: Option<String>, | |||
| } | |||
| #[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)] | |||
| #[serde(rename_all = "kebab-case")] | |||
| pub enum OperatorSource { | |||
| SharedLibrary(String), | |||
| Python(PythonSource), | |||
| #[schemars(skip)] | |||
| Wasm(String), | |||
| } | |||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] | |||
| #[serde( | |||
| deny_unknown_fields, | |||
| from = "PythonSourceDef", | |||
| into = "PythonSourceDef" | |||
| )] | |||
| pub struct PythonSource { | |||
| pub source: String, | |||
| pub conda_env: Option<String>, | |||
| } | |||
| #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(untagged)] | |||
| pub enum PythonSourceDef { | |||
| SourceOnly(String), | |||
| WithOptions { | |||
| source: String, | |||
| conda_env: Option<String>, | |||
| }, | |||
| } | |||
| impl From<PythonSource> for PythonSourceDef { | |||
| fn from(input: PythonSource) -> Self { | |||
| match input { | |||
| PythonSource { | |||
| source, | |||
| conda_env: None, | |||
| } => Self::SourceOnly(source), | |||
| PythonSource { source, conda_env } => Self::WithOptions { source, conda_env }, | |||
| } | |||
| } | |||
| } | |||
| impl From<PythonSourceDef> for PythonSource { | |||
| fn from(value: PythonSourceDef) -> Self { | |||
| match value { | |||
| PythonSourceDef::SourceOnly(source) => Self { | |||
| source, | |||
| conda_env: None, | |||
| }, | |||
| PythonSourceDef::WithOptions { source, conda_env } => Self { source, conda_env }, | |||
| } | |||
| } | |||
| } | |||
| #[derive(Debug, Serialize, Deserialize, Clone)] | |||
| #[serde(deny_unknown_fields)] | |||
| pub struct PythonOperatorConfig { | |||
| pub path: PathBuf, | |||
| #[serde(default)] | |||
| pub inputs: BTreeMap<DataId, InputMapping>, | |||
| #[serde(default)] | |||
| pub outputs: BTreeSet<DataId>, | |||
| } | |||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | |||
| pub struct CustomNode { | |||
| /// Path of the source code | |||
| /// | |||
| /// If you want to use a specific `conda` environment. | |||
| /// Provide the python path within the source. | |||
| /// | |||
| /// source: /home/peter/miniconda3/bin/python | |||
| /// | |||
| /// args: some_node.py | |||
| /// | |||
| /// Source can match any executable in PATH. | |||
| pub source: String, | |||
| /// Args for the executable. | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub args: Option<String>, | |||
| /// Environment variables for the custom nodes | |||
| /// | |||
| /// Deprecated, use outer-level `env` field instead. | |||
| pub envs: Option<BTreeMap<String, EnvValue>>, | |||
| #[serde(default, skip_serializing_if = "Option::is_none")] | |||
| pub build: Option<String>, | |||
| /// Send stdout and stderr to another node | |||
| #[serde(skip_serializing_if = "Option::is_none")] | |||
| pub send_stdout_as: Option<String>, | |||
| #[serde(flatten)] | |||
| pub run_config: NodeRunConfig, | |||
| } | |||
| #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] | |||
| #[serde(untagged)] | |||
| pub enum EnvValue { | |||
| #[serde(deserialize_with = "with_expand_envs")] | |||
| Bool(bool), | |||
| #[serde(deserialize_with = "with_expand_envs")] | |||
| Integer(u64), | |||
| #[serde(deserialize_with = "with_expand_envs")] | |||
| String(String), | |||
| } | |||
| impl fmt::Display for EnvValue { | |||
| fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { | |||
| match self { | |||
| EnvValue::Bool(bool) => fmt.write_str(&bool.to_string()), | |||
| EnvValue::Integer(u64) => fmt.write_str(&u64.to_string()), | |||
| EnvValue::String(str) => fmt.write_str(str), | |||
| } | |||
| } | |||
| } | |||
| @@ -0,0 +1,121 @@ | |||
| use std::{borrow::Borrow, convert::Infallible, str::FromStr}; | |||
| use schemars::JsonSchema; | |||
| use serde::{Deserialize, Serialize}; | |||
| #[derive( | |||
| Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema, | |||
| )] | |||
| pub struct NodeId(pub(crate) String); | |||
| impl FromStr for NodeId { | |||
| type Err = Infallible; | |||
| fn from_str(s: &str) -> Result<Self, Self::Err> { | |||
| Ok(Self(s.to_owned())) | |||
| } | |||
| } | |||
| impl From<String> for NodeId { | |||
| fn from(id: String) -> Self { | |||
| Self(id) | |||
| } | |||
| } | |||
| impl std::fmt::Display for NodeId { | |||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |||
| std::fmt::Display::fmt(&self.0, f) | |||
| } | |||
| } | |||
| impl AsRef<str> for NodeId { | |||
| fn as_ref(&self) -> &str { | |||
| &self.0 | |||
| } | |||
| } | |||
| #[derive( | |||
| Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema, | |||
| )] | |||
| pub struct OperatorId(String); | |||
| impl FromStr for OperatorId { | |||
| type Err = Infallible; | |||
| fn from_str(s: &str) -> Result<Self, Self::Err> { | |||
| Ok(Self(s.to_owned())) | |||
| } | |||
| } | |||
| impl From<String> for OperatorId { | |||
| fn from(id: String) -> Self { | |||
| Self(id) | |||
| } | |||
| } | |||
| impl std::fmt::Display for OperatorId { | |||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |||
| std::fmt::Display::fmt(&self.0, f) | |||
| } | |||
| } | |||
| impl AsRef<str> for OperatorId { | |||
| fn as_ref(&self) -> &str { | |||
| &self.0 | |||
| } | |||
| } | |||
| #[derive( | |||
| Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema, | |||
| )] | |||
| pub struct DataId(String); | |||
| impl From<DataId> for String { | |||
| fn from(id: DataId) -> Self { | |||
| id.0 | |||
| } | |||
| } | |||
| impl From<String> for DataId { | |||
| fn from(id: String) -> Self { | |||
| Self(id) | |||
| } | |||
| } | |||
| impl std::fmt::Display for DataId { | |||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |||
| std::fmt::Display::fmt(&self.0, f) | |||
| } | |||
| } | |||
| impl std::ops::Deref for DataId { | |||
| type Target = String; | |||
| fn deref(&self) -> &Self::Target { | |||
| &self.0 | |||
| } | |||
| } | |||
| impl AsRef<String> for DataId { | |||
| fn as_ref(&self) -> &String { | |||
| &self.0 | |||
| } | |||
| } | |||
| impl AsRef<str> for DataId { | |||
| fn as_ref(&self) -> &str { | |||
| &self.0 | |||
| } | |||
| } | |||
| impl Borrow<String> for DataId { | |||
| fn borrow(&self) -> &String { | |||
| &self.0 | |||
| } | |||
| } | |||
| impl Borrow<str> for DataId { | |||
| fn borrow(&self) -> &str { | |||
| &self.0 | |||
| } | |||
| } | |||
| @@ -3,7 +3,12 @@ | |||
| #![allow(clippy::missing_safety_doc)] | |||
| pub use uhlc; | |||
| pub mod common; | |||
| pub mod config; | |||
| pub mod descriptor; | |||
| pub mod id; | |||
| pub mod metadata; | |||
| pub mod coordinator_to_daemon; | |||
| @@ -2,7 +2,6 @@ use std::collections::BTreeMap; | |||
| use arrow_data::ArrayData; | |||
| use arrow_schema::DataType; | |||
| use dora_core::uhlc; | |||
| use eyre::Context; | |||
| use serde::{Deserialize, Serialize}; | |||
| @@ -1,9 +1,12 @@ | |||
| pub use crate::common::{ | |||
| DataMessage, DropToken, LogLevel, LogMessage, SharedMemoryId, Timestamped, | |||
| }; | |||
| use crate::{current_crate_version, metadata::Metadata, versions_compatible, DataflowId}; | |||
| use dora_core::config::{DataId, NodeId}; | |||
| use crate::{ | |||
| current_crate_version, | |||
| id::{DataId, NodeId}, | |||
| metadata::Metadata, | |||
| versions_compatible, DataflowId, | |||
| }; | |||
| #[derive(Debug, serde::Serialize, serde::Deserialize)] | |||
| pub enum DaemonRequest { | |||