From effe15b5adec353ac6317d37c5f28d8665fd810e Mon Sep 17 00:00:00 2001
From: haixuantao <tao.xavier@outlook.com>
Date: Tue, 18 Mar 2025 23:54:51 +0100
Subject: [PATCH] Remove unused dataflow from benchmark

---
 benchs/llms/benchmark_script.py        | 10 +++++-----
 benchs/llms/magma.yaml                 | 17 ----------------
 benchs/llms/qwen2.5.yaml               |  1 -
 benchs/llms/transformers.yaml          |  5 -----
 benchs/mllm/llama_cpp_python.yaml      | 27 --------------------------
 benchs/mllm/magma.yaml                 | 19 ------------------
 benchs/mllm/phi4.yaml                  |  1 -
 benchs/mllm/qwen2.5vl.yaml             | 22 ---------------------
 benchs/mllm/transformers.yaml          | 24 -----------------------
 benchs/vlm/llama_cpp_python.yaml       | 27 --------------------------
 benchs/vlm/qwen2.5vl.yaml              |  1 -
 benchs/vlm/transformers.yaml           | 24 -----------------------
 examples/translation/phi4-dev.yml      |  1 -
 node-hub/dora-magma/dora_magma/main.py | 21 +++++++++++++-------
 node-hub/dora-phi4/dora_phi4/main.py   |  8 +++-----
 15 files changed, 22 insertions(+), 186 deletions(-)
 delete mode 100644 benchs/llms/magma.yaml
 delete mode 100644 benchs/mllm/llama_cpp_python.yaml
 delete mode 100644 benchs/mllm/magma.yaml
 delete mode 100644 benchs/mllm/qwen2.5vl.yaml
 delete mode 100644 benchs/mllm/transformers.yaml
 delete mode 100644 benchs/vlm/llama_cpp_python.yaml
 delete mode 100644 benchs/vlm/transformers.yaml

diff --git a/benchs/llms/benchmark_script.py b/benchs/llms/benchmark_script.py
index 1ba87662..98390627 100644
--- a/benchs/llms/benchmark_script.py
+++ b/benchs/llms/benchmark_script.py
@@ -84,7 +84,7 @@ def main():
 
     durations = []
     speed = []
-    for _ in range(50):
+    for _ in range(10):
         start_time = time.time()
         node.send_output("data", data)
         event = node.next()
@@ -92,9 +92,9 @@ def main():
         if event is not None and event["type"] == "INPUT":
             text = event["value"][0].as_py()
             tokens = event["metadata"].get("tokens", 6)
-            assert (
-                "this is a test" in text.lower()
-            ), f"Expected 'This is a test', got {text}"
+            assert "this is a test" in text.lower(), (
+                f"Expected 'This is a test', got {text}"
+            )
             durations.append(duration)
             speed.append(tokens / duration)
             time.sleep(0.1)
@@ -105,7 +105,7 @@ def main():
         + f"\nMax duration: {max(durations)}"
         + f"\nMin duration: {min(durations)}"
         + f"\nMedian duration: {np.median(durations)}"
-        + f"\nMedian frequency: {1/np.median(durations)}"
+        + f"\nMedian frequency: {1 / np.median(durations)}"
         + f"\nAverage speed: {sum(speed) / len(speed)}"
         + f"\nMax speed: {max(speed)}"
         + f"\nMin speed: {min(speed)}"
diff --git a/benchs/llms/magma.yaml b/benchs/llms/magma.yaml
deleted file mode 100644
index dcbc3196..00000000
--- a/benchs/llms/magma.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-nodes:
-  - id: benchmark_script
-    path: benchmark_script.py
-    inputs:
-      text: llm/text
-    outputs:
-      - data
-    env:
-      DATA: "Please only generate the following output: This is a test"
-
-  - id: llm
-    build: pip install -e ../../node-hub/dora-magma
-    path: dora-magma
-    inputs:
-      text: benchmark_script/data
-    outputs:
-      - text
diff --git a/benchs/llms/qwen2.5.yaml b/benchs/llms/qwen2.5.yaml
index 8a10d78d..d47a4745 100644
--- a/benchs/llms/qwen2.5.yaml
+++ b/benchs/llms/qwen2.5.yaml
@@ -10,7 +10,6 @@ nodes:
 
   - id: llm
     build: |
-      pip install flash-attn --no-build-isolation
       pip install -e ../../node-hub/dora-qwen
     path: dora-qwen
     inputs:
diff --git a/benchs/llms/transformers.yaml b/benchs/llms/transformers.yaml
index 590a2493..1f115240 100644
--- a/benchs/llms/transformers.yaml
+++ b/benchs/llms/transformers.yaml
@@ -17,8 +17,3 @@ nodes:
       - text
     env:
       MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face
-      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
-      MAX_TOKENS: "128" # Reduced for concise responses
-      DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon
-      ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations
-      TORCH_DTYPE: "float16" # Use half precision for better memory efficiency
diff --git a/benchs/mllm/llama_cpp_python.yaml b/benchs/mllm/llama_cpp_python.yaml
deleted file mode 100644
index 08b5eace..00000000
--- a/benchs/mllm/llama_cpp_python.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-nodes:
-  - id: benchmark_script
-    path: benchmark_script.py
-    inputs:
-      text: llm/text
-    outputs:
-      - data
-    env:
-      DATA: "Please only generate the following output: This is a test"
-
-  - id: llm
-    build: pip install -e ../../node-hub/dora-llama-cpp-python
-    path: dora-llama-cpp-python
-    inputs:
-      text:
-        source: benchmark_script/data
-        queue-size: 10
-    outputs:
-      - text
-    env:
-      MODEL_NAME_OR_PATH: "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
-      MODEL_FILE_PATTERN: "*fp16.gguf"
-      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
-      MAX_TOKENS: "512"
-      N_GPU_LAYERS: "35" # Enable GPU acceleration
-      N_THREADS: "16" # CPU threads
-      CONTEXT_SIZE: "4096" # Maximum context window
diff --git a/benchs/mllm/magma.yaml b/benchs/mllm/magma.yaml
deleted file mode 100644
index f1fd5792..00000000
--- a/benchs/mllm/magma.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-nodes:
-  - id: benchmark_script
-    path: benchmark_script.py
-    inputs:
-      text: llm/text
-    outputs:
-      - text
-      - image
-    env:
-      DATA: "Please only generate the following output: This is a test"
-
-  - id: llm
-    build: pip install -e ../../node-hub/dora-magma
-    path: dora-magma
-    inputs:
-      text: benchmark_script/text
-      image: benchmark_script/image
-    outputs:
-      - text
diff --git a/benchs/mllm/phi4.yaml b/benchs/mllm/phi4.yaml
index 2aba0c0b..3d7c42ce 100644
--- a/benchs/mllm/phi4.yaml
+++ b/benchs/mllm/phi4.yaml
@@ -12,7 +12,6 @@ nodes:
 
   - id: llm
     build: |
-      pip install flash-attn --no-build-isolation
       pip install -e ../../node-hub/dora-phi4
     path: dora-phi4
     inputs:
diff --git a/benchs/mllm/qwen2.5vl.yaml b/benchs/mllm/qwen2.5vl.yaml
deleted file mode 100644
index bfc62bcd..00000000
--- a/benchs/mllm/qwen2.5vl.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-nodes:
-  - id: benchmark_script
-    path: benchmark_script.py
-    inputs:
-      text: vlm/text
-    outputs:
-      - text
-      - image
-    env:
-      DATA: "Please only generate the following output: This is a cat"
-
-  - id: vlm
-    # Comment flash_attn if not on cuda hardware
-    build: |
-      pip install flash-attn --no-build-isolation
-      pip install -e ../../node-hub/dora-qwen2-5-vl
-    path: dora-qwen2-5-vl
-    inputs:
-      image: benchmark_script/image
-      text: benchmark_script/text
-    outputs:
-      - text
diff --git a/benchs/mllm/transformers.yaml b/benchs/mllm/transformers.yaml
deleted file mode 100644
index 590a2493..00000000
--- a/benchs/mllm/transformers.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-nodes:
-  - id: benchmark_script
-    path: benchmark_script.py
-    inputs:
-      text: llm/text
-    outputs:
-      - data
-    env:
-      DATA: "Please only generate the following output: This is a test"
-
-  - id: llm
-    build: pip install -e ../../node-hub/dora-transformers
-    path: dora-transformers
-    inputs:
-      text: benchmark_script/data
-    outputs:
-      - text
-    env:
-      MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face
-      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
-      MAX_TOKENS: "128" # Reduced for concise responses
-      DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon
-      ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations
-      TORCH_DTYPE: "float16" # Use half precision for better memory efficiency
diff --git a/benchs/vlm/llama_cpp_python.yaml b/benchs/vlm/llama_cpp_python.yaml
deleted file mode 100644
index 08b5eace..00000000
--- a/benchs/vlm/llama_cpp_python.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-nodes:
-  - id: benchmark_script
-    path: benchmark_script.py
-    inputs:
-      text: llm/text
-    outputs:
-      - data
-    env:
-      DATA: "Please only generate the following output: This is a test"
-
-  - id: llm
-    build: pip install -e ../../node-hub/dora-llama-cpp-python
-    path: dora-llama-cpp-python
-    inputs:
-      text:
-        source: benchmark_script/data
-        queue-size: 10
-    outputs:
-      - text
-    env:
-      MODEL_NAME_OR_PATH: "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
-      MODEL_FILE_PATTERN: "*fp16.gguf"
-      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
-      MAX_TOKENS: "512"
-      N_GPU_LAYERS: "35" # Enable GPU acceleration
-      N_THREADS: "16" # CPU threads
-      CONTEXT_SIZE: "4096" # Maximum context window
diff --git a/benchs/vlm/qwen2.5vl.yaml b/benchs/vlm/qwen2.5vl.yaml
index bfc62bcd..8a553532 100644
--- a/benchs/vlm/qwen2.5vl.yaml
+++ b/benchs/vlm/qwen2.5vl.yaml
@@ -12,7 +12,6 @@ nodes:
   - id: vlm
     # Comment flash_attn if not on cuda hardware
     build: |
-      pip install flash-attn --no-build-isolation
       pip install -e ../../node-hub/dora-qwen2-5-vl
     path: dora-qwen2-5-vl
     inputs:
diff --git a/benchs/vlm/transformers.yaml b/benchs/vlm/transformers.yaml
deleted file mode 100644
index 590a2493..00000000
--- a/benchs/vlm/transformers.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-nodes:
-  - id: benchmark_script
-    path: benchmark_script.py
-    inputs:
-      text: llm/text
-    outputs:
-      - data
-    env:
-      DATA: "Please only generate the following output: This is a test"
-
-  - id: llm
-    build: pip install -e ../../node-hub/dora-transformers
-    path: dora-transformers
-    inputs:
-      text: benchmark_script/data
-    outputs:
-      - text
-    env:
-      MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face
-      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
-      MAX_TOKENS: "128" # Reduced for concise responses
-      DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon
-      ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations
-      TORCH_DTYPE: "float16" # Use half precision for better memory efficiency
diff --git a/examples/translation/phi4-dev.yml b/examples/translation/phi4-dev.yml
index 749591e6..280adb8a 100644
--- a/examples/translation/phi4-dev.yml
+++ b/examples/translation/phi4-dev.yml
@@ -17,7 +17,6 @@ nodes:
 
   - id: dora-phi4
     build: |
-      pip install flash-attn --no-build-isolation
       pip install -e ../../node-hub/dora-phi4
     path: dora-phi4
     inputs:
diff --git a/node-hub/dora-magma/dora_magma/main.py b/node-hub/dora-magma/dora_magma/main.py
index 5f794c6a..097ee7cd 100644
--- a/node-hub/dora-magma/dora_magma/main.py
+++ b/node-hub/dora-magma/dora_magma/main.py
@@ -42,7 +42,8 @@ def load_magma_models():
             device_map="auto",
         )
         processor = AutoProcessor.from_pretrained(
-            model_name_or_path, trust_remote_code=True,
+            model_name_or_path,
+            trust_remote_code=True,
         )
     except Exception as e:
         logger.error(f"Failed to load model: {e}")
@@ -72,7 +73,9 @@ def generate(
     ]
 
     prompt = processor.tokenizer.apply_chat_template(
-        convs, tokenize=False, add_generation_prompt=True,
+        convs,
+        tokenize=False,
+        add_generation_prompt=True,
     )
 
     try:
@@ -174,14 +177,18 @@ def main():
                     task_description = event["value"][0].as_py()
                     image_id = event["metadata"].get("image_id", None)
 
-                    if image_id is None or image_id not in frames:
-                        logger.error(f"Image ID {image_id} not found in frames")
+                    if image_id in frames:
+                        image = frames[image_id]
+                    elif len(frames) == 1:
+                        image = next(iter(frames.values()))
+                    else:
+                        logger.error(f"Image not found for {image_id}")
                         continue
-
-                    image = frames[image_id]
                     response, trajectories = generate(image, task_description)
                     node.send_output(
-                        "text", pa.array([response]), {"image_id": image_id},
+                        "text",
+                        pa.array([response]),
+                        {"image_id": image_id},
                     )
 
                     # Send trajectory data if available
diff --git a/node-hub/dora-phi4/dora_phi4/main.py b/node-hub/dora-phi4/dora_phi4/main.py
index 457d1b53..00f8120c 100644
--- a/node-hub/dora-phi4/dora_phi4/main.py
+++ b/node-hub/dora-phi4/dora_phi4/main.py
@@ -55,9 +55,7 @@ device_map = infer_auto_device_map(
 )
 
 # Load the model directly with the inferred device map
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_PATH, **MODEL_CONFIG, device_map=device_map
-).to(device)
+model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, **MODEL_CONFIG).to(device)
 
 generation_config = GenerationConfig.from_pretrained(MODEL_PATH)
 
@@ -82,6 +80,7 @@ BAD_SENTENCES = [
     "The sound of the wind is so loud.",
     "The first time I saw the sea.",
     "the first time saw the sea i was so happy"
+    "The first time I saw the sea, I was very happy.",
     "The first time I saw the sea was in the movie.",
     "The first time I saw the movie was in the theater.",
     "The first time I saw the movie.",
@@ -129,8 +128,7 @@ def remove_text_noise(text: str, text_noise="") -> str:
         # Replace hyphens with spaces to treat "Notre-Dame" and "notre dame" as equivalent
         s = re.sub(r"-", " ", s)
         # Remove other punctuation and convert to lowercase
-        s = re.sub(r"[^\w\s]", "", s).lower()
-        return s
+        return re.sub(r"[^\w\s]", "", s).lower()
 
     # Normalize both text and text_noise
     normalized_text = normalize(text)