Browse Source

Move more types from `dora-core` to `dora-message` to avoid dependency

Make `dora-message` a dependency of `dora-core`, instead of the other way around. This way, we can continue to freely bump the version of `dora-core` with the other workspace crates, without introducing errors such as #708.
tags/v0.3.7rc2
Philipp Oppermann 1 year ago
parent
commit
7c44e7a2e6
Failed to extract signature
29 changed files with 709 additions and 648 deletions
  1. +6
    -2
      Cargo.lock
  2. +1
    -1
      binaries/cli/src/attach.rs
  3. +1
    -1
      binaries/cli/src/build.rs
  4. +1
    -1
      binaries/cli/src/graph/mod.rs
  5. +1
    -1
      binaries/cli/src/lib.rs
  6. +1
    -1
      binaries/coordinator/src/lib.rs
  7. +2
    -4
      binaries/coordinator/src/run/mod.rs
  8. +5
    -2
      binaries/daemon/src/lib.rs
  9. +2
    -2
      examples/multiple-daemons/run.rs
  10. +1
    -1
      libraries/core/Cargo.toml
  11. +1
    -1
      libraries/core/src/bin/generate_schema.rs
  12. +98
    -395
      libraries/core/src/descriptor/mod.rs
  13. +7
    -3
      libraries/core/src/descriptor/validate.rs
  14. +7
    -2
      libraries/core/src/descriptor/visualize.rs
  15. +1
    -2
      libraries/core/src/lib.rs
  16. +6
    -1
      libraries/message/Cargo.toml
  17. +4
    -3
      libraries/message/src/cli_to_coordinator.rs
  18. +1
    -2
      libraries/message/src/common.rs
  19. +84
    -201
      libraries/message/src/config.rs
  20. +2
    -4
      libraries/message/src/coordinator_to_cli.rs
  21. +3
    -5
      libraries/message/src/coordinator_to_daemon.rs
  22. +1
    -3
      libraries/message/src/daemon_to_coordinator.rs
  23. +5
    -2
      libraries/message/src/daemon_to_daemon.rs
  24. +5
    -4
      libraries/message/src/daemon_to_node.rs
  25. +331
    -0
      libraries/message/src/descriptor.rs
  26. +121
    -0
      libraries/message/src/id.rs
  27. +5
    -0
      libraries/message/src/lib.rs
  28. +0
    -1
      libraries/message/src/metadata.rs
  29. +6
    -3
      libraries/message/src/node_to_daemon.rs

+ 6
- 2
Cargo.lock View File

@@ -2403,6 +2403,7 @@ dependencies = [
name = "dora-core"
version = "0.3.6"
dependencies = [
"dora-message",
"eyre",
"log",
"once_cell",
@@ -2413,7 +2414,6 @@ dependencies = [
"serde_yaml 0.9.34+deprecated",
"tokio",
"tracing",
"uhlc",
"uuid",
"which",
]
@@ -2486,12 +2486,16 @@ dependencies = [
"aligned-vec",
"arrow-data",
"arrow-schema",
"dora-core",
"eyre",
"log",
"once_cell",
"schemars",
"semver",
"serde",
"serde-with-expand-env",
"serde_yaml 0.9.34+deprecated",
"tokio",
"uhlc",
"uuid",
]



+ 1
- 1
binaries/cli/src/attach.rs View File

@@ -1,6 +1,6 @@
use colored::Colorize;
use communication_layer_request_reply::{TcpConnection, TcpRequestReplyConnection};
use dora_core::descriptor::{resolve_path, CoreNodeKind, Descriptor};
use dora_core::descriptor::{resolve_path, CoreNodeKind, Descriptor, DescriptorExt};
use dora_message::cli_to_coordinator::ControlRequest;
use dora_message::common::LogMessage;
use dora_message::coordinator_to_cli::ControlRequestReply;


+ 1
- 1
binaries/cli/src/build.rs View File

@@ -1,6 +1,6 @@
use dora_core::{
config::OperatorId,
descriptor::{Descriptor, SINGLE_OPERATOR_DEFAULT_ID},
descriptor::{Descriptor, DescriptorExt, NodeExt, SINGLE_OPERATOR_DEFAULT_ID},
};
use eyre::{eyre, Context};
use std::{path::Path, process::Command};


+ 1
- 1
binaries/cli/src/graph/mod.rs View File

@@ -1,6 +1,6 @@
use std::{fs::File, io::Write, path::Path};

use dora_core::descriptor::Descriptor;
use dora_core::descriptor::{Descriptor, DescriptorExt};
use eyre::Context;

const MERMAID_TEMPLATE: &str = include_str!("mermaid-template.html");


+ 1
- 1
binaries/cli/src/lib.rs View File

@@ -3,7 +3,7 @@ use colored::Colorize;
use communication_layer_request_reply::{RequestReplyLayer, TcpLayer, TcpRequestReplyConnection};
use dora_coordinator::Event;
use dora_core::{
descriptor::{source_is_url, Descriptor},
descriptor::{source_is_url, Descriptor, DescriptorExt},
topics::{
DORA_COORDINATOR_PORT_CONTROL_DEFAULT, DORA_COORDINATOR_PORT_DEFAULT,
DORA_DAEMON_LOCAL_LISTEN_PORT_DEFAULT,


+ 1
- 1
binaries/coordinator/src/lib.rs View File

@@ -5,7 +5,6 @@ use crate::{
pub use control::ControlEvent;
use dora_core::{
config::{NodeId, OperatorId},
descriptor::{Descriptor, ResolvedNode},
uhlc::{self, HLC},
};
use dora_message::{
@@ -16,6 +15,7 @@ use dora_message::{
},
coordinator_to_daemon::{DaemonCoordinatorEvent, RegisterResult, Timestamped},
daemon_to_coordinator::{DaemonCoordinatorReply, DataflowDaemonResult},
descriptor::{Descriptor, ResolvedNode},
};
use eyre::{bail, eyre, ContextCompat, Result, WrapErr};
use futures::{future::join_all, stream::FuturesUnordered, Future, Stream, StreamExt};


+ 2
- 4
binaries/coordinator/src/run/mod.rs View File

@@ -3,13 +3,11 @@ use crate::{
DaemonConnection,
};

use dora_core::{
descriptor::{Descriptor, ResolvedNode},
uhlc::HLC,
};
use dora_core::{descriptor::DescriptorExt, uhlc::HLC};
use dora_message::{
coordinator_to_daemon::{DaemonCoordinatorEvent, SpawnDataflowNodes, Timestamped},
daemon_to_coordinator::DaemonCoordinatorReply,
descriptor::{Descriptor, ResolvedNode},
};
use eyre::{bail, eyre, ContextCompat, WrapErr};
use std::{


+ 5
- 2
binaries/daemon/src/lib.rs View File

@@ -3,7 +3,10 @@ use coordinator::CoordinatorEvent;
use crossbeam::queue::ArrayQueue;
use dora_core::{
config::{DataId, Input, InputMapping, NodeId, OperatorId},
descriptor::{runtime_node_inputs, CoreNodeKind, Descriptor, ResolvedNode},
descriptor::{
read_as_descriptor, runtime_node_inputs, CoreNodeKind, Descriptor, DescriptorExt,
ResolvedNode,
},
topics::LOCALHOST,
uhlc::{self, HLC},
};
@@ -162,7 +165,7 @@ impl Daemon {
.ok_or_else(|| eyre::eyre!("canonicalized dataflow path has no parent"))?
.to_owned();

let descriptor = Descriptor::read(dataflow_path).await?;
let descriptor = read_as_descriptor(dataflow_path).await?;
descriptor.check(&working_dir)?;
let nodes = descriptor.resolve_aliases_and_set_defaults()?;



+ 2
- 2
examples/multiple-daemons/run.rs View File

@@ -1,6 +1,6 @@
use dora_coordinator::{ControlEvent, Event};
use dora_core::{
descriptor::Descriptor,
descriptor::{read_as_descriptor, DescriptorExt},
topics::{DORA_COORDINATOR_PORT_CONTROL_DEFAULT, DORA_COORDINATOR_PORT_DEFAULT},
};
use dora_message::{
@@ -115,7 +115,7 @@ async fn start_dataflow(
dataflow: &Path,
coordinator_events_tx: &Sender<Event>,
) -> eyre::Result<Uuid> {
let dataflow_descriptor = Descriptor::read(dataflow)
let dataflow_descriptor = read_as_descriptor(dataflow)
.await
.wrap_err("failed to read yaml dataflow")?;
let working_dir = dataflow


+ 1
- 1
libraries/core/Cargo.toml View File

@@ -10,6 +10,7 @@ repository.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
dora-message = { workspace = true }
eyre = "0.6.8"
serde = { version = "1.0.136", features = ["derive"] }
serde_yaml = "0.9.11"
@@ -22,4 +23,3 @@ tokio = { version = "1.24.1", features = ["fs", "process", "sync"] }
schemars = "0.8.19"
serde_json = "1.0.117"
log = { version = "0.4.21", features = ["serde"] }
uhlc = "0.5.1"

+ 1
- 1
libraries/core/src/bin/generate_schema.rs View File

@@ -1,6 +1,6 @@
use std::{env, path::Path};

use dora_core::descriptor::Descriptor;
use dora_message::descriptor::Descriptor;
use schemars::schema_for;

fn main() {


+ 98
- 395
libraries/core/src/descriptor/mod.rs View File

@@ -1,41 +1,43 @@
use crate::config::{
CommunicationConfig, DataId, Input, InputMapping, NodeId, NodeRunConfig, OperatorId,
use dora_message::{
config::{Input, InputMapping, NodeRunConfig},
id::{DataId, OperatorId},
};
use eyre::{bail, eyre, Context, OptionExt, Result};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use serde_with_expand_env::with_expand_envs;
use eyre::{bail, Context, OptionExt, Result};
use std::{
collections::{BTreeMap, BTreeSet, HashMap},
collections::{BTreeMap, HashMap},
env::consts::EXE_EXTENSION,
fmt,
path::{Path, PathBuf},
};
use tracing::warn;

// reexport for compatibility
pub use dora_message::descriptor::{
runtime_node_inputs, CoreNodeKind, CustomNode, Descriptor, Node, OperatorConfig,
OperatorDefinition, OperatorSource, PythonSource, ResolvedDeploy, ResolvedNode, RuntimeNode,
SingleOperatorDefinition, DYNAMIC_SOURCE, SHELL_SOURCE,
};
pub use visualize::collect_dora_timers;

mod validate;
mod visualize;
pub const SHELL_SOURCE: &str = "shell";
pub const DYNAMIC_SOURCE: &str = "dynamic";

/// Dataflow description
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)]
#[schemars(title = "dora-rs specification")]
pub struct Descriptor {
#[schemars(skip)]
#[serde(default)]
pub communication: CommunicationConfig,
#[schemars(skip)]
#[serde(default, rename = "_unstable_deploy")]
pub deploy: Deploy,
pub nodes: Vec<Node>,
pub trait DescriptorExt {
fn resolve_aliases_and_set_defaults(&self) -> eyre::Result<Vec<ResolvedNode>>;
fn visualize_as_mermaid(&self) -> eyre::Result<String>;
fn blocking_read(path: &Path) -> eyre::Result<Descriptor>;
fn parse(buf: Vec<u8>) -> eyre::Result<Descriptor>;
fn check(&self, working_dir: &Path) -> eyre::Result<()>;
fn check_in_daemon(
&self,
working_dir: &Path,
remote_machine_id: &[&str],
coordinator_is_remote: bool,
) -> eyre::Result<()>;
}

pub const SINGLE_OPERATOR_DEFAULT_ID: &str = "op";

impl Descriptor {
pub fn resolve_aliases_and_set_defaults(&self) -> eyre::Result<Vec<ResolvedNode>> {
impl DescriptorExt for Descriptor {
fn resolve_aliases_and_set_defaults(&self) -> eyre::Result<Vec<ResolvedNode>> {
let default_op_id = OperatorId::from(SINGLE_OPERATOR_DEFAULT_ID.to_string());

let single_operator_nodes: HashMap<_, _> = self
@@ -51,7 +53,7 @@ impl Descriptor {
let mut resolved = vec![];
for mut node in self.nodes.clone() {
// adjust input mappings
let mut node_kind = node.kind_mut()?;
let mut node_kind = node_kind_mut(&mut node)?;
let input_mappings: Vec<_> = match &mut node_kind {
NodeKindMut::Standard { path: _, inputs } => inputs.values_mut().collect(),
NodeKindMut::Runtime(node) => node
@@ -102,7 +104,14 @@ impl Descriptor {
name: node.name,
description: node.description,
env: node.env,
deploy: ResolvedDeploy::new(node.deploy, self),
deploy: {
let default_machine = self.deploy.machine.as_deref().unwrap_or_default();
let machine = match node.deploy.machine {
Some(m) => m,
None => default_machine.to_owned(),
};
ResolvedDeploy { machine }
},
kind,
});
}
@@ -110,35 +119,28 @@ impl Descriptor {
Ok(resolved)
}

pub fn visualize_as_mermaid(&self) -> eyre::Result<String> {
fn visualize_as_mermaid(&self) -> eyre::Result<String> {
let resolved = self.resolve_aliases_and_set_defaults()?;
let flowchart = visualize::visualize_nodes(&resolved);

Ok(flowchart)
}

pub async fn read(path: &Path) -> eyre::Result<Descriptor> {
let buf = tokio::fs::read(path)
.await
.context("failed to open given file")?;
Descriptor::parse(buf)
}

pub fn blocking_read(path: &Path) -> eyre::Result<Descriptor> {
fn blocking_read(path: &Path) -> eyre::Result<Descriptor> {
let buf = std::fs::read(path).context("failed to open given file")?;
Descriptor::parse(buf)
}

pub fn parse(buf: Vec<u8>) -> eyre::Result<Descriptor> {
fn parse(buf: Vec<u8>) -> eyre::Result<Descriptor> {
serde_yaml::from_slice(&buf).context("failed to parse given descriptor")
}

pub fn check(&self, working_dir: &Path) -> eyre::Result<()> {
fn check(&self, working_dir: &Path) -> eyre::Result<()> {
validate::check_dataflow(self, working_dir, None, false)
.wrap_err("Dataflow could not be validated.")
}

pub fn check_in_daemon(
fn check_in_daemon(
&self,
working_dir: &Path,
remote_machine_id: &[&str],
@@ -154,53 +156,70 @@ impl Descriptor {
}
}

#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)]
pub struct Deploy {
pub machine: Option<String>,
pub async fn read_as_descriptor(path: &Path) -> eyre::Result<Descriptor> {
let buf = tokio::fs::read(path)
.await
.context("failed to open given file")?;
Descriptor::parse(buf)
}

fn node_kind_mut(node: &mut Node) -> eyre::Result<NodeKindMut> {
match node.kind()? {
NodeKind::Standard(_) => node
.path
.as_ref()
.map(|path| NodeKindMut::Standard {
path,
inputs: &mut node.inputs,
})
.ok_or_eyre("no path"),
NodeKind::Runtime(_) => node
.operators
.as_mut()
.map(NodeKindMut::Runtime)
.ok_or_eyre("no operators"),
NodeKind::Custom(_) => node
.custom
.as_mut()
.map(NodeKindMut::Custom)
.ok_or_eyre("no custom"),
NodeKind::Operator(_) => node
.operator
.as_mut()
.map(NodeKindMut::Operator)
.ok_or_eyre("no operator"),
}
}

/// Dora Node
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)]
pub struct Node {
/// Node identifier
pub id: NodeId,
/// Node name
pub name: Option<String>,
/// Description of the node
pub description: Option<String>,
/// Environment variables
pub env: Option<BTreeMap<String, EnvValue>>,
pub fn source_is_url(source: &str) -> bool {
source.contains("://")
}

/// Unstable machine deployment configuration
#[schemars(skip)]
#[serde(default, rename = "_unstable_deploy")]
pub deploy: Deploy,
pub fn resolve_path(source: &str, working_dir: &Path) -> Result<PathBuf> {
let path = Path::new(&source);
let path = if path.extension().is_none() {
path.with_extension(EXE_EXTENSION)
} else {
path.to_owned()
};

#[serde(default, skip_serializing_if = "Option::is_none")]
operators: Option<RuntimeNode>,
#[serde(default, skip_serializing_if = "Option::is_none")]
custom: Option<CustomNode>,
#[serde(default, skip_serializing_if = "Option::is_none")]
operator: Option<SingleOperatorDefinition>,
// Search path within current working directory
if let Ok(abs_path) = working_dir.join(&path).canonicalize() {
Ok(abs_path)
// Search path within $PATH
} else if let Ok(abs_path) = which::which(&path) {
Ok(abs_path)
} else {
bail!("Could not find source path {}", path.display())
}
}

#[serde(default, skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub args: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub build: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub send_stdout_as: Option<String>,
#[serde(default)]
pub inputs: BTreeMap<DataId, Input>,
#[serde(default)]
pub outputs: BTreeSet<DataId>,
pub trait NodeExt {
fn kind(&self) -> eyre::Result<NodeKind>;
}

impl Node {
pub fn kind(&self) -> eyre::Result<NodeKind> {
impl NodeExt for Node {
fn kind(&self) -> eyre::Result<NodeKind> {
match (&self.path, &self.operators, &self.custom, &self.operator) {
(None, None, None, None) => {
eyre::bail!(
@@ -220,34 +239,6 @@ impl Node {
}
}
}

fn kind_mut(&mut self) -> eyre::Result<NodeKindMut> {
match self.kind()? {
NodeKind::Standard(_) => self
.path
.as_ref()
.map(|path| NodeKindMut::Standard {
path,
inputs: &mut self.inputs,
})
.ok_or_eyre("no path"),
NodeKind::Runtime(_) => self
.operators
.as_mut()
.map(NodeKindMut::Runtime)
.ok_or_eyre("no operators"),
NodeKind::Custom(_) => self
.custom
.as_mut()
.map(NodeKindMut::Custom)
.ok_or_eyre("no custom"),
NodeKind::Operator(_) => self
.operator
.as_mut()
.map(NodeKindMut::Operator)
.ok_or_eyre("no operator"),
}
}
}

#[derive(Debug)]
@@ -270,291 +261,3 @@ enum NodeKindMut<'a> {
Custom(&'a mut CustomNode),
Operator(&'a mut SingleOperatorDefinition),
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResolvedNode {
pub id: NodeId,
pub name: Option<String>,
pub description: Option<String>,
pub env: Option<BTreeMap<String, EnvValue>>,

#[serde(default)]
pub deploy: ResolvedDeploy,

#[serde(flatten)]
pub kind: CoreNodeKind,
}

impl ResolvedNode {
pub fn send_stdout_as(&self) -> Result<Option<String>> {
match &self.kind {
// TODO: Split stdout between operators
CoreNodeKind::Runtime(n) => {
let count = n
.operators
.iter()
.filter(|op| op.config.send_stdout_as.is_some())
.count();
if count == 1 && n.operators.len() > 1 {
warn!("All stdout from all operators of a runtime are going to be sent in the selected `send_stdout_as` operator.")
} else if count > 1 {
return Err(eyre!("More than one `send_stdout_as` entries for a runtime node. Please only use one `send_stdout_as` per runtime."));
}
Ok(n.operators.iter().find_map(|op| {
op.config
.send_stdout_as
.clone()
.map(|stdout| format!("{}/{}", op.id, stdout))
}))
}
CoreNodeKind::Custom(n) => Ok(n.send_stdout_as.clone()),
}
}
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ResolvedDeploy {
pub machine: String,
}
impl ResolvedDeploy {
fn new(deploy: Deploy, descriptor: &Descriptor) -> Self {
let default_machine = descriptor.deploy.machine.as_deref().unwrap_or_default();
let machine = match deploy.machine {
Some(m) => m,
None => default_machine.to_owned(),
};
Self { machine }
}
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CoreNodeKind {
/// Dora runtime node
#[serde(rename = "operators")]
Runtime(RuntimeNode),
Custom(CustomNode),
}

pub fn runtime_node_inputs(n: &RuntimeNode) -> BTreeMap<DataId, Input> {
n.operators
.iter()
.flat_map(|operator| {
operator.config.inputs.iter().map(|(input_id, mapping)| {
(
DataId::from(format!("{}/{input_id}", operator.id)),
mapping.clone(),
)
})
})
.collect()
}

fn runtime_node_outputs(n: &RuntimeNode) -> BTreeSet<DataId> {
n.operators
.iter()
.flat_map(|operator| {
operator
.config
.outputs
.iter()
.map(|output_id| DataId::from(format!("{}/{output_id}", operator.id)))
})
.collect()
}

impl CoreNodeKind {
pub fn run_config(&self) -> NodeRunConfig {
match self {
CoreNodeKind::Runtime(n) => NodeRunConfig {
inputs: runtime_node_inputs(n),
outputs: runtime_node_outputs(n),
},
CoreNodeKind::Custom(n) => n.run_config.clone(),
}
}

pub fn dynamic(&self) -> bool {
match self {
CoreNodeKind::Runtime(_n) => false,
CoreNodeKind::Custom(n) => n.source == DYNAMIC_SOURCE,
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(transparent)]
pub struct RuntimeNode {
pub operators: Vec<OperatorDefinition>,
}

#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
pub struct OperatorDefinition {
pub id: OperatorId,
#[serde(flatten)]
pub config: OperatorConfig,
}

#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
pub struct SingleOperatorDefinition {
/// ID is optional if there is only a single operator.
pub id: Option<OperatorId>,
#[serde(flatten)]
pub config: OperatorConfig,
}

#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
pub struct OperatorConfig {
pub name: Option<String>,
pub description: Option<String>,

#[serde(default)]
pub inputs: BTreeMap<DataId, Input>,
#[serde(default)]
pub outputs: BTreeSet<DataId>,

#[serde(flatten)]
pub source: OperatorSource,

#[serde(default, skip_serializing_if = "Option::is_none")]
pub build: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub send_stdout_as: Option<String>,
}

#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
#[serde(rename_all = "kebab-case")]
pub enum OperatorSource {
SharedLibrary(String),
Python(PythonSource),
#[schemars(skip)]
Wasm(String),
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(
deny_unknown_fields,
from = "PythonSourceDef",
into = "PythonSourceDef"
)]
pub struct PythonSource {
pub source: String,
pub conda_env: Option<String>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(untagged)]
pub enum PythonSourceDef {
SourceOnly(String),
WithOptions {
source: String,
conda_env: Option<String>,
},
}

impl From<PythonSource> for PythonSourceDef {
fn from(input: PythonSource) -> Self {
match input {
PythonSource {
source,
conda_env: None,
} => Self::SourceOnly(source),
PythonSource { source, conda_env } => Self::WithOptions { source, conda_env },
}
}
}

impl From<PythonSourceDef> for PythonSource {
fn from(value: PythonSourceDef) -> Self {
match value {
PythonSourceDef::SourceOnly(source) => Self {
source,
conda_env: None,
},
PythonSourceDef::WithOptions { source, conda_env } => Self { source, conda_env },
}
}
}

pub fn source_is_url(source: &str) -> bool {
source.contains("://")
}

pub fn resolve_path(source: &str, working_dir: &Path) -> Result<PathBuf> {
let path = Path::new(&source);
let path = if path.extension().is_none() {
path.with_extension(EXE_EXTENSION)
} else {
path.to_owned()
};

// Search path within current working directory
if let Ok(abs_path) = working_dir.join(&path).canonicalize() {
Ok(abs_path)
// Search path within $PATH
} else if let Ok(abs_path) = which::which(&path) {
Ok(abs_path)
} else {
bail!("Could not find source path {}", path.display())
}
}

#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(deny_unknown_fields)]
pub struct PythonOperatorConfig {
pub path: PathBuf,
#[serde(default)]
pub inputs: BTreeMap<DataId, InputMapping>,
#[serde(default)]
pub outputs: BTreeSet<DataId>,
}

#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct CustomNode {
/// Path of the source code
///
/// If you want to use a specific `conda` environment.
/// Provide the python path within the source.
///
/// source: /home/peter/miniconda3/bin/python
///
/// args: some_node.py
///
/// Source can match any executable in PATH.
pub source: String,
/// Args for the executable.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub args: Option<String>,
/// Environment variables for the custom nodes
///
/// Deprecated, use outer-level `env` field instead.
pub envs: Option<BTreeMap<String, EnvValue>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub build: Option<String>,
/// Send stdout and stderr to another node
#[serde(skip_serializing_if = "Option::is_none")]
pub send_stdout_as: Option<String>,

#[serde(flatten)]
pub run_config: NodeRunConfig,
}

#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(untagged)]
pub enum EnvValue {
#[serde(deserialize_with = "with_expand_envs")]
Bool(bool),
#[serde(deserialize_with = "with_expand_envs")]
Integer(u64),
#[serde(deserialize_with = "with_expand_envs")]
String(String),
}

impl fmt::Display for EnvValue {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
match self {
EnvValue::Bool(bool) => fmt.write_str(&bool.to_string()),
EnvValue::Integer(u64) => fmt.write_str(&u64.to_string()),
EnvValue::String(str) => fmt.write_str(str),
}
}
}

+ 7
- 3
libraries/core/src/descriptor/validate.rs View File

@@ -1,15 +1,19 @@
use crate::{
adjust_shared_library_path,
config::{DataId, Input, InputMapping, OperatorId, UserInputMapping},
descriptor::{self, source_is_url, CoreNodeKind, OperatorSource},
descriptor::{self, source_is_url},
get_python_path,
};

use dora_message::{
config::{Input, InputMapping, UserInputMapping},
descriptor::{CoreNodeKind, OperatorSource, DYNAMIC_SOURCE, SHELL_SOURCE},
id::{DataId, OperatorId},
};
use eyre::{bail, eyre, Context};
use std::{path::Path, process::Command};
use tracing::info;

use super::{resolve_path, Descriptor, DYNAMIC_SOURCE, SHELL_SOURCE};
use super::{resolve_path, Descriptor, DescriptorExt};
const VERSION: &str = env!("CARGO_PKG_VERSION");

pub fn check_dataflow(


+ 7
- 2
libraries/core/src/descriptor/visualize.rs View File

@@ -1,5 +1,10 @@
use super::{CoreNodeKind, CustomNode, OperatorDefinition, ResolvedNode, RuntimeNode};
use crate::config::{format_duration, DataId, Input, InputMapping, NodeId, UserInputMapping};
use dora_message::{
config::{format_duration, Input, InputMapping, UserInputMapping},
descriptor::{CoreNodeKind, OperatorDefinition},
id::{DataId, NodeId},
};

use super::{CustomNode, ResolvedNode, RuntimeNode};
use std::{
collections::{BTreeMap, BTreeSet, HashMap},
fmt::Write as _,


+ 1
- 2
libraries/core/src/lib.rs View File

@@ -5,9 +5,8 @@ use std::{
path::Path,
};

pub use uhlc;
pub use dora_message::{config, uhlc};

pub mod config;
pub mod descriptor;
pub mod topics;



+ 6
- 1
libraries/message/Cargo.toml View File

@@ -17,8 +17,13 @@ serde = { version = "1.0.136", features = ["derive"] }
eyre = "0.6.8"
arrow-schema = { workspace = true, features = ["serde"] }
tokio = "1.39.2"
dora-core = { workspace = true }
# dora-core = { workspace = true }
uuid = { version = "1.7", features = ["serde", "v7"] }
log = { version = "0.4.21", features = ["serde"] }
aligned-vec = { version = "0.5.0", features = ["serde"] }
semver = { version = "1.0.23", features = ["serde"] }
schemars = "0.8.19"
uhlc = "0.5.1"
serde_yaml = "0.9.11"
once_cell = "1.13.0"
serde-with-expand-env = "1.1.0"

+ 4
- 3
libraries/message/src/cli_to_coordinator.rs View File

@@ -1,10 +1,11 @@
use std::{path::PathBuf, time::Duration};

use dora_core::{
config::{NodeId, OperatorId},
use uuid::Uuid;

use crate::{
descriptor::Descriptor,
id::{NodeId, OperatorId},
};
use uuid::Uuid;

#[derive(Debug, serde::Deserialize, serde::Serialize)]
pub enum ControlRequest {


+ 1
- 2
libraries/message/src/common.rs View File

@@ -2,10 +2,9 @@ use core::fmt;
use std::borrow::Cow;

use aligned_vec::{AVec, ConstAlign};
use dora_core::{config::NodeId, uhlc};
use uuid::Uuid;

use crate::DataflowId;
use crate::{id::NodeId, DataflowId};

pub use log::Level as LogLevel;



libraries/core/src/config.rs → libraries/message/src/config.rs View File

@@ -1,129 +1,87 @@
use once_cell::sync::OnceCell;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use core::fmt;
use std::{
borrow::Borrow,
collections::{BTreeMap, BTreeSet},
convert::Infallible,
fmt,
str::FromStr,
time::Duration,
};

#[derive(
Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
)]
pub struct NodeId(String);

impl FromStr for NodeId {
type Err = Infallible;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self(s.to_owned()))
}
}

impl From<String> for NodeId {
fn from(id: String) -> Self {
Self(id)
}
}

impl std::fmt::Display for NodeId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}

impl AsRef<str> for NodeId {
fn as_ref(&self) -> &str {
&self.0
}
}

#[derive(
Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
)]
pub struct OperatorId(String);

impl FromStr for OperatorId {
type Err = Infallible;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self(s.to_owned()))
}
}

impl From<String> for OperatorId {
fn from(id: String) -> Self {
Self(id)
}
}

impl std::fmt::Display for OperatorId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}

impl AsRef<str> for OperatorId {
fn as_ref(&self) -> &str {
&self.0
}
}

#[derive(
Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
)]
pub struct DataId(String);

impl From<DataId> for String {
fn from(id: DataId) -> Self {
id.0
}
}

impl From<String> for DataId {
fn from(id: String) -> Self {
Self(id)
}
}

impl std::fmt::Display for DataId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}
use once_cell::sync::OnceCell;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

impl std::ops::Deref for DataId {
type Target = String;
pub use crate::id::{DataId, NodeId, OperatorId};

fn deref(&self) -> &Self::Target {
&self.0
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct NodeRunConfig {
/// Inputs for the nodes as a map from input ID to `node_id/output_id`.
///
/// e.g.
///
/// inputs:
///
/// example_input: example_node/example_output1
///
#[serde(default)]
pub inputs: BTreeMap<DataId, Input>,
/// List of output IDs.
///
/// e.g.
///
/// outputs:
///
/// - output_1
///
/// - output_2
#[serde(default)]
pub outputs: BTreeSet<DataId>,
}

impl AsRef<String> for DataId {
fn as_ref(&self) -> &String {
&self.0
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields, from = "InputDef", into = "InputDef")]
pub struct Input {
pub mapping: InputMapping,
pub queue_size: Option<usize>,
}

impl AsRef<str> for DataId {
fn as_ref(&self) -> &str {
&self.0
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(untagged)]
pub enum InputDef {
MappingOnly(InputMapping),
WithOptions {
source: InputMapping,
queue_size: Option<usize>,
},
}

impl Borrow<String> for DataId {
fn borrow(&self) -> &String {
&self.0
impl From<Input> for InputDef {
fn from(input: Input) -> Self {
match input {
Input {
mapping,
queue_size: None,
} => Self::MappingOnly(mapping),
Input {
mapping,
queue_size,
} => Self::WithOptions {
source: mapping,
queue_size,
},
}
}
}

impl Borrow<str> for DataId {
fn borrow(&self) -> &str {
&self.0
impl From<InputDef> for Input {
fn from(value: InputDef) -> Self {
match value {
InputDef::MappingOnly(mapping) => Self {
mapping,
queue_size: None,
},
InputDef::WithOptions { source, queue_size } => Self {
mapping: source,
queue_size,
},
}
}
}

@@ -158,6 +116,22 @@ impl fmt::Display for InputMapping {
}
}

pub struct FormattedDuration(pub Duration);

impl fmt::Display for FormattedDuration {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.0.subsec_millis() == 0 {
write!(f, "secs/{}", self.0.as_secs())
} else {
write!(f, "millis/{}", self.0.as_millis())
}
}
}

pub fn format_duration(interval: Duration) -> FormattedDuration {
FormattedDuration(interval)
}

impl Serialize for InputMapping {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
@@ -233,97 +207,6 @@ pub struct UserInputMapping {
pub output: DataId,
}

pub struct FormattedDuration(pub Duration);

impl fmt::Display for FormattedDuration {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.0.subsec_millis() == 0 {
write!(f, "secs/{}", self.0.as_secs())
} else {
write!(f, "millis/{}", self.0.as_millis())
}
}
}

pub fn format_duration(interval: Duration) -> FormattedDuration {
FormattedDuration(interval)
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct NodeRunConfig {
/// Inputs for the nodes as a map from input ID to `node_id/output_id`.
///
/// e.g.
///
/// inputs:
///
/// example_input: example_node/example_output1
///
#[serde(default)]
pub inputs: BTreeMap<DataId, Input>,
/// List of output IDs.
///
/// e.g.
///
/// outputs:
///
/// - output_1
///
/// - output_2
#[serde(default)]
pub outputs: BTreeSet<DataId>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields, from = "InputDef", into = "InputDef")]
pub struct Input {
pub mapping: InputMapping,
pub queue_size: Option<usize>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(untagged)]
pub enum InputDef {
MappingOnly(InputMapping),
WithOptions {
source: InputMapping,
queue_size: Option<usize>,
},
}

impl From<Input> for InputDef {
fn from(input: Input) -> Self {
match input {
Input {
mapping,
queue_size: None,
} => Self::MappingOnly(mapping),
Input {
mapping,
queue_size,
} => Self::WithOptions {
source: mapping,
queue_size,
},
}
}
}

impl From<InputDef> for Input {
fn from(value: InputDef) -> Self {
match value {
InputDef::MappingOnly(mapping) => Self {
mapping,
queue_size: None,
},
InputDef::WithOptions { source, queue_size } => Self {
mapping: source,
queue_size,
},
}
}
}

#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Clone)]
#[serde(deny_unknown_fields, rename_all = "lowercase")]
pub struct CommunicationConfig {

+ 2
- 4
libraries/message/src/coordinator_to_cli.rs View File

@@ -1,11 +1,9 @@
use std::collections::{BTreeMap, BTreeSet};

use dora_core::config::NodeId;
use dora_core::uhlc;
use uuid::Uuid;

pub use crate::common::LogMessage;
pub use crate::common::{NodeError, NodeErrorCause, NodeExitStatus};
pub use crate::common::{LogMessage, NodeError, NodeErrorCause, NodeExitStatus};
use crate::id::NodeId;

#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
pub enum ControlRequestReply {


+ 3
- 5
libraries/message/src/coordinator_to_daemon.rs View File

@@ -1,13 +1,11 @@
use std::{collections::BTreeMap, net::SocketAddr, path::PathBuf, time::Duration};

use dora_core::{
config::{NodeId, OperatorId},
// TODO: how should we version these?
use crate::{
descriptor::{Descriptor, ResolvedNode},
id::{NodeId, OperatorId},
DataflowId,
};

use crate::DataflowId;

pub use crate::common::Timestamped;

#[derive(Debug, serde::Serialize, serde::Deserialize)]


+ 1
- 3
libraries/message/src/daemon_to_coordinator.rs View File

@@ -1,11 +1,9 @@
use std::collections::BTreeMap;

use dora_core::{config::NodeId, uhlc};

pub use crate::common::{
DataMessage, LogLevel, LogMessage, NodeError, NodeErrorCause, NodeExitStatus, Timestamped,
};
use crate::{current_crate_version, versions_compatible, DataflowId};
use crate::{current_crate_version, id::NodeId, versions_compatible, DataflowId};

#[derive(Debug, serde::Serialize, serde::Deserialize)]
pub enum CoordinatorRequest {


+ 5
- 2
libraries/message/src/daemon_to_daemon.rs View File

@@ -1,9 +1,12 @@
use std::collections::BTreeSet;

use aligned_vec::{AVec, ConstAlign};
use dora_core::config::{DataId, NodeId};

use crate::{metadata::Metadata, DataflowId};
use crate::{
id::{DataId, NodeId},
metadata::Metadata,
DataflowId,
};

#[derive(Debug, serde::Deserialize, serde::Serialize)]
pub enum InterDaemonEvent {


+ 5
- 4
libraries/message/src/daemon_to_node.rs View File

@@ -1,12 +1,13 @@
use std::{net::SocketAddr, path::PathBuf};

use dora_core::{
config::{DataId, NodeId, NodeRunConfig, OperatorId},
use crate::{
config::NodeRunConfig,
descriptor::{Descriptor, OperatorDefinition},
id::{DataId, NodeId, OperatorId},
metadata::Metadata,
DataflowId,
};

use crate::{metadata::Metadata, DataflowId};

pub use crate::common::{DataMessage, DropToken, SharedMemoryId, Timestamped};

// Passed via env variable


+ 331
- 0
libraries/message/src/descriptor.rs View File

@@ -0,0 +1,331 @@
use crate::{
config::{CommunicationConfig, Input, InputMapping, NodeRunConfig},
id::{DataId, NodeId, OperatorId},
};
use eyre::{eyre, Result};
use log::warn;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use serde_with_expand_env::with_expand_envs;
use std::{
collections::{BTreeMap, BTreeSet},
fmt,
path::PathBuf,
};

pub const SHELL_SOURCE: &str = "shell";
pub const DYNAMIC_SOURCE: &str = "dynamic";

/// Dataflow description
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)]
#[schemars(title = "dora-rs specification")]
pub struct Descriptor {
#[schemars(skip)]
#[serde(default)]
pub communication: CommunicationConfig,
#[schemars(skip)]
#[serde(default, rename = "_unstable_deploy")]
pub deploy: Deploy,
pub nodes: Vec<Node>,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)]
pub struct Deploy {
pub machine: Option<String>,
}

/// Dora Node
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields)]
pub struct Node {
/// Node identifier
pub id: NodeId,
/// Node name
pub name: Option<String>,
/// Description of the node
pub description: Option<String>,
/// Environment variables
pub env: Option<BTreeMap<String, EnvValue>>,

/// Unstable machine deployment configuration
#[schemars(skip)]
#[serde(default, rename = "_unstable_deploy")]
pub deploy: Deploy,

#[serde(default, skip_serializing_if = "Option::is_none")]
pub operators: Option<RuntimeNode>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub custom: Option<CustomNode>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub operator: Option<SingleOperatorDefinition>,

#[serde(default, skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub args: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub build: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub send_stdout_as: Option<String>,
#[serde(default)]
pub inputs: BTreeMap<DataId, Input>,
#[serde(default)]
pub outputs: BTreeSet<DataId>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResolvedNode {
pub id: NodeId,
pub name: Option<String>,
pub description: Option<String>,
pub env: Option<BTreeMap<String, EnvValue>>,

#[serde(default)]
pub deploy: ResolvedDeploy,

#[serde(flatten)]
pub kind: CoreNodeKind,
}

impl ResolvedNode {
pub fn send_stdout_as(&self) -> Result<Option<String>> {
match &self.kind {
// TODO: Split stdout between operators
CoreNodeKind::Runtime(n) => {
let count = n
.operators
.iter()
.filter(|op| op.config.send_stdout_as.is_some())
.count();
if count == 1 && n.operators.len() > 1 {
warn!("All stdout from all operators of a runtime are going to be sent in the selected `send_stdout_as` operator.")
} else if count > 1 {
return Err(eyre!("More than one `send_stdout_as` entries for a runtime node. Please only use one `send_stdout_as` per runtime."));
}
Ok(n.operators.iter().find_map(|op| {
op.config
.send_stdout_as
.clone()
.map(|stdout| format!("{}/{}", op.id, stdout))
}))
}
CoreNodeKind::Custom(n) => Ok(n.send_stdout_as.clone()),
}
}
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ResolvedDeploy {
pub machine: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CoreNodeKind {
/// Dora runtime node
#[serde(rename = "operators")]
Runtime(RuntimeNode),
Custom(CustomNode),
}

pub fn runtime_node_inputs(n: &RuntimeNode) -> BTreeMap<DataId, Input> {
n.operators
.iter()
.flat_map(|operator| {
operator.config.inputs.iter().map(|(input_id, mapping)| {
(
DataId::from(format!("{}/{input_id}", operator.id)),
mapping.clone(),
)
})
})
.collect()
}

fn runtime_node_outputs(n: &RuntimeNode) -> BTreeSet<DataId> {
n.operators
.iter()
.flat_map(|operator| {
operator
.config
.outputs
.iter()
.map(|output_id| DataId::from(format!("{}/{output_id}", operator.id)))
})
.collect()
}

impl CoreNodeKind {
pub fn run_config(&self) -> NodeRunConfig {
match self {
CoreNodeKind::Runtime(n) => NodeRunConfig {
inputs: runtime_node_inputs(n),
outputs: runtime_node_outputs(n),
},
CoreNodeKind::Custom(n) => n.run_config.clone(),
}
}

pub fn dynamic(&self) -> bool {
match self {
CoreNodeKind::Runtime(_n) => false,
CoreNodeKind::Custom(n) => n.source == DYNAMIC_SOURCE,
}
}
}

#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(transparent)]
pub struct RuntimeNode {
pub operators: Vec<OperatorDefinition>,
}

#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
pub struct OperatorDefinition {
pub id: OperatorId,
#[serde(flatten)]
pub config: OperatorConfig,
}

#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
pub struct SingleOperatorDefinition {
/// ID is optional if there is only a single operator.
pub id: Option<OperatorId>,
#[serde(flatten)]
pub config: OperatorConfig,
}

#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
pub struct OperatorConfig {
pub name: Option<String>,
pub description: Option<String>,

#[serde(default)]
pub inputs: BTreeMap<DataId, Input>,
#[serde(default)]
pub outputs: BTreeSet<DataId>,

#[serde(flatten)]
pub source: OperatorSource,

#[serde(default, skip_serializing_if = "Option::is_none")]
pub build: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub send_stdout_as: Option<String>,
}

#[derive(Debug, Serialize, Deserialize, JsonSchema, Clone)]
#[serde(rename_all = "kebab-case")]
pub enum OperatorSource {
SharedLibrary(String),
Python(PythonSource),
#[schemars(skip)]
Wasm(String),
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(
deny_unknown_fields,
from = "PythonSourceDef",
into = "PythonSourceDef"
)]
pub struct PythonSource {
pub source: String,
pub conda_env: Option<String>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(untagged)]
pub enum PythonSourceDef {
SourceOnly(String),
WithOptions {
source: String,
conda_env: Option<String>,
},
}

impl From<PythonSource> for PythonSourceDef {
fn from(input: PythonSource) -> Self {
match input {
PythonSource {
source,
conda_env: None,
} => Self::SourceOnly(source),
PythonSource { source, conda_env } => Self::WithOptions { source, conda_env },
}
}
}

impl From<PythonSourceDef> for PythonSource {
fn from(value: PythonSourceDef) -> Self {
match value {
PythonSourceDef::SourceOnly(source) => Self {
source,
conda_env: None,
},
PythonSourceDef::WithOptions { source, conda_env } => Self { source, conda_env },
}
}
}

#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(deny_unknown_fields)]
pub struct PythonOperatorConfig {
pub path: PathBuf,
#[serde(default)]
pub inputs: BTreeMap<DataId, InputMapping>,
#[serde(default)]
pub outputs: BTreeSet<DataId>,
}

#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct CustomNode {
/// Path of the source code
///
/// If you want to use a specific `conda` environment.
/// Provide the python path within the source.
///
/// source: /home/peter/miniconda3/bin/python
///
/// args: some_node.py
///
/// Source can match any executable in PATH.
pub source: String,
/// Args for the executable.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub args: Option<String>,
/// Environment variables for the custom nodes
///
/// Deprecated, use outer-level `env` field instead.
pub envs: Option<BTreeMap<String, EnvValue>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub build: Option<String>,
/// Send stdout and stderr to another node
#[serde(skip_serializing_if = "Option::is_none")]
pub send_stdout_as: Option<String>,

#[serde(flatten)]
pub run_config: NodeRunConfig,
}

#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
#[serde(untagged)]
pub enum EnvValue {
#[serde(deserialize_with = "with_expand_envs")]
Bool(bool),
#[serde(deserialize_with = "with_expand_envs")]
Integer(u64),
#[serde(deserialize_with = "with_expand_envs")]
String(String),
}

impl fmt::Display for EnvValue {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
match self {
EnvValue::Bool(bool) => fmt.write_str(&bool.to_string()),
EnvValue::Integer(u64) => fmt.write_str(&u64.to_string()),
EnvValue::String(str) => fmt.write_str(str),
}
}
}

+ 121
- 0
libraries/message/src/id.rs View File

@@ -0,0 +1,121 @@
use std::{borrow::Borrow, convert::Infallible, str::FromStr};

use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

#[derive(
Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
)]
pub struct NodeId(pub(crate) String);

impl FromStr for NodeId {
type Err = Infallible;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self(s.to_owned()))
}
}

impl From<String> for NodeId {
fn from(id: String) -> Self {
Self(id)
}
}

impl std::fmt::Display for NodeId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}

impl AsRef<str> for NodeId {
fn as_ref(&self) -> &str {
&self.0
}
}

#[derive(
Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
)]
pub struct OperatorId(String);

impl FromStr for OperatorId {
type Err = Infallible;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self(s.to_owned()))
}
}

impl From<String> for OperatorId {
fn from(id: String) -> Self {
Self(id)
}
}

impl std::fmt::Display for OperatorId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}

impl AsRef<str> for OperatorId {
fn as_ref(&self) -> &str {
&self.0
}
}

#[derive(
Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
)]
pub struct DataId(String);

impl From<DataId> for String {
fn from(id: DataId) -> Self {
id.0
}
}

impl From<String> for DataId {
fn from(id: String) -> Self {
Self(id)
}
}

impl std::fmt::Display for DataId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}

impl std::ops::Deref for DataId {
type Target = String;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl AsRef<String> for DataId {
fn as_ref(&self) -> &String {
&self.0
}
}

impl AsRef<str> for DataId {
fn as_ref(&self) -> &str {
&self.0
}
}

impl Borrow<String> for DataId {
fn borrow(&self) -> &String {
&self.0
}
}

impl Borrow<str> for DataId {
fn borrow(&self) -> &str {
&self.0
}
}

+ 5
- 0
libraries/message/src/lib.rs View File

@@ -3,7 +3,12 @@

#![allow(clippy::missing_safety_doc)]

pub use uhlc;

pub mod common;
pub mod config;
pub mod descriptor;
pub mod id;
pub mod metadata;

pub mod coordinator_to_daemon;


+ 0
- 1
libraries/message/src/metadata.rs View File

@@ -2,7 +2,6 @@ use std::collections::BTreeMap;

use arrow_data::ArrayData;
use arrow_schema::DataType;
use dora_core::uhlc;
use eyre::Context;
use serde::{Deserialize, Serialize};



+ 6
- 3
libraries/message/src/node_to_daemon.rs View File

@@ -1,9 +1,12 @@
pub use crate::common::{
DataMessage, DropToken, LogLevel, LogMessage, SharedMemoryId, Timestamped,
};
use crate::{current_crate_version, metadata::Metadata, versions_compatible, DataflowId};

use dora_core::config::{DataId, NodeId};
use crate::{
current_crate_version,
id::{DataId, NodeId},
metadata::Metadata,
versions_compatible, DataflowId,
};

#[derive(Debug, serde::Serialize, serde::Deserialize)]
pub enum DaemonRequest {


Loading…
Cancel
Save