From e3bdf186e6bcd985c5447c4827469715e495acde Mon Sep 17 00:00:00 2001 From: Philipp Oppermann Date: Tue, 1 Mar 2022 01:07:38 +0100 Subject: [PATCH] Add basic yaml parsing and visualization via mermaid Creates a basic prototype for parsing dataflows delared in YAML files using `serde`/`serde_yaml`. The dataflow file format is just an example, we can adjust this however we like. To visualize the parsed dataflow, the main executable outputs a flowchart in mermaid syntax. GitHub supports this format natively in markdown files, alternatively it can be converted to an image on . --- Cargo.lock | 318 ++++++++++++++++++++++++++++++++++ Cargo.toml | 5 + examples/dataflow-example.yml | 30 ++++ src/descriptor.rs | 92 ++++++++++ src/lib.rs | 1 + src/main.rs | 29 +++- 6 files changed, 473 insertions(+), 2 deletions(-) create mode 100644 examples/dataflow-example.yml create mode 100644 src/descriptor.rs create mode 100644 src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 54d00349..c08c3699 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,324 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + [[package]] name = "dora-rs" version = "0.1.0" +dependencies = [ + "eyre", + "serde", + "serde_yaml", + "structopt", +] + +[[package]] +name = "eyre" +version = "0.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9289ed2c0440a6536e65119725cf91fc2c6b5e513bfd2e36e1134d7cca6ca12f" +dependencies = [ + "indenter", + "once_cell", +] + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + +[[package]] +name = "indexmap" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.119" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4" + +[[package]] +name = "linked-hash-map" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" + +[[package]] +name = "once_cell" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" + +[[package]] +name = "serde" +version = "1.0.136" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.136" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_yaml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a521f2940385c165a24ee286aa8599633d162077a54bdcae2a6fd5a7bfa7a0" +dependencies = [ + "indexmap", + "ryu", + "serde", + "yaml-rust", +] + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" +dependencies = [ + "clap", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "unicode-segmentation" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] diff --git a/Cargo.toml b/Cargo.toml index 65450b6d..7397579b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,8 @@ edition = "2021" [workspace] +[dependencies] +eyre = "0.6.7" +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.8.23" +structopt = "0.3.26" diff --git a/examples/dataflow-example.yml b/examples/dataflow-example.yml new file mode 100644 index 00000000..0fef5e03 --- /dev/null +++ b/examples/dataflow-example.yml @@ -0,0 +1,30 @@ +sinks: + - id: sink-1 + input: A + - id: sink-2 + input: B +sources: + - id: source-1 + output: C + - id: source-2 + output: G +operators: + - id: op-1 + inputs: + - C + - E + - B + outputs: + - A + - id: op-2 + inputs: + - C + - F + outputs: + - E + - id: op-3 + inputs: + - C + - G + outputs: + - B diff --git a/src/descriptor.rs b/src/descriptor.rs new file mode 100644 index 00000000..55e3f397 --- /dev/null +++ b/src/descriptor.rs @@ -0,0 +1,92 @@ +use serde::{Deserialize, Serialize}; +use std::collections::{BTreeSet, HashMap, HashSet}; + +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct Descriptor { + #[serde(default)] + sources: HashSet, + #[serde(default)] + sinks: HashSet, + #[serde(default)] + operators: HashSet, +} + +impl Descriptor { + pub fn visualize_as_mermaid(&self) -> eyre::Result { + let mut flowchart = "flowchart TB\n".to_owned(); + for source in &self.sources { + let id = &source.id; + flowchart.push_str(&format!(" {id}[\\{id}/]\n")); + } + for operator in &self.operators { + let id = &operator.id; + flowchart.push_str(&format!(" {id}\n")); + } + for sink in &self.sinks { + let id = &sink.id; + flowchart.push_str(&format!(" {id}[/{id}\\]\n")); + } + + let mut expected_inputs: HashMap<_, BTreeSet<_>> = HashMap::new(); + for operator in &self.operators { + for input in &operator.inputs { + expected_inputs + .entry(input.to_owned()) + .or_default() + .insert(&operator.id); + } + } + for sink in &self.sinks { + expected_inputs + .entry(sink.input.to_owned()) + .or_default() + .insert(&sink.id); + } + + for source in &self.sources { + let targets = expected_inputs.remove(&source.output).unwrap_or_default(); + let id = &source.id; + let output = &source.output; + for target in targets { + flowchart.push_str(&format!(" {id} -- {output} --> {target}\n")); + } + } + + for operator in &self.operators { + let id = &operator.id; + for output in &operator.outputs { + let targets = expected_inputs.remove(output).unwrap_or_default(); + for target in targets { + flowchart.push_str(&format!(" {id} -- {output} --> {target}\n")); + } + } + } + + for (output, targets) in expected_inputs.drain() { + for target in targets { + flowchart.push_str(&format!(" missing>missing] -- {output} --> {target}\n")); + } + } + + Ok(flowchart) + } +} + +#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct Source { + id: String, + output: String, +} + +#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct Sink { + id: String, + input: String, +} + +#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct Operator { + id: String, + inputs: BTreeSet, + outputs: BTreeSet, +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 00000000..f0d46f32 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod descriptor; diff --git a/src/main.rs b/src/main.rs index 47ad8c63..61b7d79f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,28 @@ -fn main() { - println!("Hello World!"); +use dora_rs::descriptor::Descriptor; +use eyre::Context; +use std::{fs::File, path::PathBuf}; +use structopt::StructOpt; + +#[derive(Debug, Clone, StructOpt)] +struct Args { + file: PathBuf, +} + +fn main() -> eyre::Result<()> { + let args = Args::from_args(); + let descriptor_file = File::open(&args.file).context("failed to open given file")?; + + let descriptor: Descriptor = + serde_yaml::from_reader(descriptor_file).context("failed to parse given descriptor")?; + + let visualized = descriptor + .visualize_as_mermaid() + .context("failed to visualize descriptor")?; + println!("{visualized}"); + println!( + "Paste the above output on https://mermaid.live/ or in a \ + ```mermaid code block on GitHub to display it." + ); + + Ok(()) }