Fix test assertion in benchmark script

10 months ago · 98cbff4108
--- a/benches/llms/llama_cpp_python.yaml
+++ b/benches/llms/llama_cpp_python.yaml
@@ -6,14 +6,15 @@ nodes:
    outputs:
      - text
    env:
      DATA: "Please only generate the following output: This is a test"
      TEXT: "Please only generate the following output: This is a test"
      TEXT_TRUTH: "This is a test"
  - id: llm
    build: pip install -e ../../node-hub/dora-llama-cpp-python
    path: dora-llama-cpp-python
    inputs:
      text:
        source: benchmark_script/data
        source: benchmark_script/text
        queue-size: 10
    outputs:
      - text
--- a/benches/llms/phi4.yaml
+++ b/benches/llms/phi4.yaml
@@ -6,7 +6,8 @@ nodes:
    outputs:
      - text
    env:
      DATA: "Please only generate the following output: This is a test"
      TEXT: "Please only generate the following output: This is a test"
      TEXT_TRUTH: "This is a test"
  - id: llm
    build: |
@@ -14,6 +15,6 @@ nodes:
      pip install -e ../../node-hub/dora-phi4
    path: dora-phi4
    inputs:
      text: benchmark_script/data
      text: benchmark_script/text
    outputs:
      - text
--- a/benches/llms/qwen2.5.yaml
+++ b/benches/llms/qwen2.5.yaml
@@ -6,13 +6,14 @@ nodes:
    outputs:
      - text
    env:
      DATA: "Please only generate the following output: This is a test"
      TEXT: "Please only generate the following output: This is a test"
      TEXT_TRUTH: "This is a test"
  - id: llm
    build: |
      pip install -e ../../node-hub/dora-qwen
    path: dora-qwen
    inputs:
      text: benchmark_script/data
      text: benchmark_script/text
    outputs:
      - text
--- a/benches/llms/transformers.yaml
+++ b/benches/llms/transformers.yaml
@@ -6,13 +6,14 @@ nodes:
    outputs:
      - text
    env:
      DATA: "Please only generate the following output: This is a test"
      TEXT: "Please only generate the following output: This is a test"
      TEXT_TRUTH: "This is a test"
  - id: llm
    build: pip install -e ../../node-hub/dora-transformers
    path: dora-transformers
    inputs:
      text: benchmark_script/data
      text: benchmark_script/text
    outputs:
      - text
    env:
--- a/benches/mllm/benchmark_script.py
+++ b/benches/mllm/benchmark_script.py
@@ -110,7 +110,7 @@ def main():
        default="pyarrow-sender",
    )
    parser.add_argument(
        "--data",
        "--text",
        type=str,
        required=False,
        help="Arrow Data as string.",
@@ -119,23 +119,24 @@ def main():
    args = parser.parse_args()
    data = os.getenv("DATA", args.data)
    text = os.getenv("TEXT", args.text)
    text_truth = os.getenv("TEXT_TRUTH", args.text)
    cat = get_cat_image()
    audio, sample_rate = get_c3po_audio()
    if data is None:
    if text is None:
        raise ValueError(
            "No data provided. Please specify `DATA` environment argument or as `--data` argument",
            "No data provided. Please specify `TEXT` environment argument or as `--text` argument",
        )
    try:
        data = ast.literal_eval(data)
        text = ast.literal_eval(text)
    except Exception:  # noqa
        print("Passing input as string")
    if isinstance(data, (str, int, float)):
        data = pa.array([data])
    if isinstance(text, (str, int, float)):
        text = pa.array([text])
    else:
        data = pa.array(data)  # initialize pyarrow array
        text = pa.array(text)  # initialize pyarrow array
    node = Node(
        args.name,
    )  # provide the name to connect to the dataflow if dynamic node
@@ -156,15 +157,15 @@ def main():
        )
        time.sleep(0.1)
        start_time = time.time()
        node.send_output("text", data)
        node.send_output("text", text)
        event = node.next()
        duration = time.time() - start_time
        if event is not None and event["type"] == "INPUT":
            text = event["value"][0].as_py()
            received_text = event["value"][0].as_py()
            tokens = event["metadata"].get("tokens", 6)
            assert (
                "this is a cat" in text.lower()
            ), f"Expected 'This is a cat', got {text}"
                text_truth in received_text
            ), f"Expected '{text_truth}', got {received_text}"
            durations.append(duration)
            speed.append(tokens / duration)
            time.sleep(0.1)
--- a/benches/mllm/phi4.yaml
+++ b/benches/mllm/phi4.yaml
@@ -8,7 +8,8 @@ nodes:
      - image
      - audio
    env:
      DATA: "Please only generate the following output: This is a cat"
      TEXT: "Please only generate the following output: This is a cat"
      TEXT_TRUTH: "This is a cat"
  - id: llm
    build: |
--- a/benches/vlm/magma.yaml
+++ b/benches/vlm/magma.yaml
@@ -7,7 +7,8 @@ nodes:
      - text
      - image
    env:
      DATA: "Please only generate the following output: This is a test"
      TEXT: "Please only generate the following output: This is a cat"
      TEXT_TRUTH: "This is a cat"
  - id: llm
    build: pip install -e ../../node-hub/dora-magma
--- a/benches/vlm/phi4.yaml
+++ b/benches/vlm/phi4.yaml
@@ -7,7 +7,8 @@ nodes:
      - text
      - image
    env:
      DATA: "Please only generate the following output: This is a cat"
      TEXT: "Please only generate the following output: This is a cat"
      TEXT_TRUTH: "This is a cat"
  - id: llm
    build: |
--- a/benches/vlm/qwen2.5vl.yaml
+++ b/benches/vlm/qwen2.5vl.yaml
@@ -7,7 +7,8 @@ nodes:
      - text
      - image
    env:
      DATA: "Please only generate the following output: This is a cat"
      TEXT: "Please only generate the following output: This is a cat"
      TEXT_TRUTH: "This is a cat"
  - id: vlm
    # Comment flash_attn if not on cuda hardware