Browse Source

Add basic yaml parsing and visualization via mermaid

Creates a basic prototype for parsing dataflows delared in YAML files using `serde`/`serde_yaml`. The dataflow file format is just an example, we can adjust this however we like.

To visualize the parsed dataflow, the main executable outputs a flowchart in mermaid syntax. GitHub supports this format natively in markdown files, alternatively it can be converted to an image on <https://mermaid.live>.
tags/v0.0.0-test.4
Philipp Oppermann 3 years ago
parent
commit
e3bdf186e6
6 changed files with 473 additions and 2 deletions
  1. +318
    -0
      Cargo.lock
  2. +5
    -0
      Cargo.toml
  3. +30
    -0
      examples/dataflow-example.yml
  4. +92
    -0
      src/descriptor.rs
  5. +1
    -0
      src/lib.rs
  6. +27
    -2
      src/main.rs

+ 318
- 0
Cargo.lock View File

@@ -2,6 +2,324 @@
# It is not intended for manual editing.
version = 3

[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]

[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]

[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"

[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"

[[package]]
name = "clap"
version = "2.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
]

[[package]]
name = "dora-rs"
version = "0.1.0"
dependencies = [
"eyre",
"serde",
"serde_yaml",
"structopt",
]

[[package]]
name = "eyre"
version = "0.6.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9289ed2c0440a6536e65119725cf91fc2c6b5e513bfd2e36e1134d7cca6ca12f"
dependencies = [
"indenter",
"once_cell",
]

[[package]]
name = "hashbrown"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"

[[package]]
name = "heck"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [
"unicode-segmentation",
]

[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]

[[package]]
name = "indenter"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683"

[[package]]
name = "indexmap"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223"
dependencies = [
"autocfg",
"hashbrown",
]

[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"

[[package]]
name = "libc"
version = "0.2.119"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4"

[[package]]
name = "linked-hash-map"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"

[[package]]
name = "once_cell"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"

[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"version_check",
]

[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote",
"version_check",
]

[[package]]
name = "proc-macro2"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
dependencies = [
"unicode-xid",
]

[[package]]
name = "quote"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145"
dependencies = [
"proc-macro2",
]

[[package]]
name = "ryu"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"

[[package]]
name = "serde"
version = "1.0.136"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
dependencies = [
"serde_derive",
]

[[package]]
name = "serde_derive"
version = "1.0.136"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

[[package]]
name = "serde_yaml"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a521f2940385c165a24ee286aa8599633d162077a54bdcae2a6fd5a7bfa7a0"
dependencies = [
"indexmap",
"ryu",
"serde",
"yaml-rust",
]

[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"

[[package]]
name = "structopt"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10"
dependencies = [
"clap",
"lazy_static",
"structopt-derive",
]

[[package]]
name = "structopt-derive"
version = "0.4.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
]

[[package]]
name = "syn"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]

[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]

[[package]]
name = "unicode-segmentation"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"

[[package]]
name = "unicode-width"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"

[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"

[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"

[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"

[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]

[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"

[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

[[package]]
name = "yaml-rust"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
dependencies = [
"linked-hash-map",
]

+ 5
- 0
Cargo.toml View File

@@ -7,3 +7,8 @@ edition = "2021"

[workspace]

[dependencies]
eyre = "0.6.7"
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.8.23"
structopt = "0.3.26"

+ 30
- 0
examples/dataflow-example.yml View File

@@ -0,0 +1,30 @@
sinks:
- id: sink-1
input: A
- id: sink-2
input: B
sources:
- id: source-1
output: C
- id: source-2
output: G
operators:
- id: op-1
inputs:
- C
- E
- B
outputs:
- A
- id: op-2
inputs:
- C
- F
outputs:
- E
- id: op-3
inputs:
- C
- G
outputs:
- B

+ 92
- 0
src/descriptor.rs View File

@@ -0,0 +1,92 @@
use serde::{Deserialize, Serialize};
use std::collections::{BTreeSet, HashMap, HashSet};

#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct Descriptor {
#[serde(default)]
sources: HashSet<Source>,
#[serde(default)]
sinks: HashSet<Sink>,
#[serde(default)]
operators: HashSet<Operator>,
}

impl Descriptor {
pub fn visualize_as_mermaid(&self) -> eyre::Result<String> {
let mut flowchart = "flowchart TB\n".to_owned();
for source in &self.sources {
let id = &source.id;
flowchart.push_str(&format!(" {id}[\\{id}/]\n"));
}
for operator in &self.operators {
let id = &operator.id;
flowchart.push_str(&format!(" {id}\n"));
}
for sink in &self.sinks {
let id = &sink.id;
flowchart.push_str(&format!(" {id}[/{id}\\]\n"));
}

let mut expected_inputs: HashMap<_, BTreeSet<_>> = HashMap::new();
for operator in &self.operators {
for input in &operator.inputs {
expected_inputs
.entry(input.to_owned())
.or_default()
.insert(&operator.id);
}
}
for sink in &self.sinks {
expected_inputs
.entry(sink.input.to_owned())
.or_default()
.insert(&sink.id);
}

for source in &self.sources {
let targets = expected_inputs.remove(&source.output).unwrap_or_default();
let id = &source.id;
let output = &source.output;
for target in targets {
flowchart.push_str(&format!(" {id} -- {output} --> {target}\n"));
}
}

for operator in &self.operators {
let id = &operator.id;
for output in &operator.outputs {
let targets = expected_inputs.remove(output).unwrap_or_default();
for target in targets {
flowchart.push_str(&format!(" {id} -- {output} --> {target}\n"));
}
}
}

for (output, targets) in expected_inputs.drain() {
for target in targets {
flowchart.push_str(&format!(" missing>missing] -- {output} --> {target}\n"));
}
}

Ok(flowchart)
}
}

#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Source {
id: String,
output: String,
}

#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Sink {
id: String,
input: String,
}

#[derive(Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Operator {
id: String,
inputs: BTreeSet<String>,
outputs: BTreeSet<String>,
}

+ 1
- 0
src/lib.rs View File

@@ -0,0 +1 @@
pub mod descriptor;

+ 27
- 2
src/main.rs View File

@@ -1,3 +1,28 @@
fn main() {
println!("Hello World!");
use dora_rs::descriptor::Descriptor;
use eyre::Context;
use std::{fs::File, path::PathBuf};
use structopt::StructOpt;

#[derive(Debug, Clone, StructOpt)]
struct Args {
file: PathBuf,
}

fn main() -> eyre::Result<()> {
let args = Args::from_args();
let descriptor_file = File::open(&args.file).context("failed to open given file")?;

let descriptor: Descriptor =
serde_yaml::from_reader(descriptor_file).context("failed to parse given descriptor")?;

let visualized = descriptor
.visualize_as_mermaid()
.context("failed to visualize descriptor")?;
println!("{visualized}");
println!(
"Paste the above output on https://mermaid.live/ or in a \
```mermaid code block on GitHub to display it."
);

Ok(())
}

Loading…
Cancel
Save