Remove unused dataflow from benchmark

11 months ago · effe15b5ad
--- a/benchs/llms/benchmark_script.py
+++ b/benchs/llms/benchmark_script.py
@@ -84,7 +84,7 @@ def main():

    durations = []
    speed = []
    for _ in range(50):
    for _ in range(10):
        start_time = time.time()
        node.send_output("data", data)
        event = node.next()
@@ -92,9 +92,9 @@ def main():
        if event is not None and event["type"] == "INPUT":
            text = event["value"][0].as_py()
            tokens = event["metadata"].get("tokens", 6)
            assert (
                "this is a test" in text.lower()
            ), f"Expected 'This is a test', got {text}"
            assert "this is a test" in text.lower(), (
                f"Expected 'This is a test', got {text}"
            )
            durations.append(duration)
            speed.append(tokens / duration)
            time.sleep(0.1)
@@ -105,7 +105,7 @@ def main():
        + f"\nMax duration: {max(durations)}"
        + f"\nMin duration: {min(durations)}"
        + f"\nMedian duration: {np.median(durations)}"
        + f"\nMedian frequency: {1/np.median(durations)}"
        + f"\nMedian frequency: {1 / np.median(durations)}"
        + f"\nAverage speed: {sum(speed) / len(speed)}"
        + f"\nMax speed: {max(speed)}"
        + f"\nMin speed: {min(speed)}"
--- a/benchs/llms/magma.yaml
+++ b/benchs/llms/magma.yaml
@@ -1,17 +0,0 @@
 nodes:
  - id: benchmark_script
    path: benchmark_script.py
    inputs:
      text: llm/text
    outputs:
      - data
    env:
      DATA: "Please only generate the following output: This is a test"

  - id: llm
    build: pip install -e ../../node-hub/dora-magma
    path: dora-magma
    inputs:
      text: benchmark_script/data
    outputs:
      - text
--- a/benchs/llms/qwen2.5.yaml
+++ b/benchs/llms/qwen2.5.yaml
@@ -10,7 +10,6 @@ nodes:

  - id: llm
    build: |
      pip install flash-attn --no-build-isolation
      pip install -e ../../node-hub/dora-qwen
    path: dora-qwen
    inputs:
--- a/benchs/llms/transformers.yaml
+++ b/benchs/llms/transformers.yaml
@@ -17,8 +17,3 @@ nodes:
      - text
    env:
      MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face
      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
      MAX_TOKENS: "128" # Reduced for concise responses
      DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon
      ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations
      TORCH_DTYPE: "float16" # Use half precision for better memory efficiency
--- a/benchs/mllm/llama_cpp_python.yaml
+++ b/benchs/mllm/llama_cpp_python.yaml
@@ -1,27 +0,0 @@
 nodes:
  - id: benchmark_script
    path: benchmark_script.py
    inputs:
      text: llm/text
    outputs:
      - data
    env:
      DATA: "Please only generate the following output: This is a test"

  - id: llm
    build: pip install -e ../../node-hub/dora-llama-cpp-python
    path: dora-llama-cpp-python
    inputs:
      text:
        source: benchmark_script/data
        queue-size: 10
    outputs:
      - text
    env:
      MODEL_NAME_OR_PATH: "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
      MODEL_FILE_PATTERN: "*fp16.gguf"
      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
      MAX_TOKENS: "512"
      N_GPU_LAYERS: "35" # Enable GPU acceleration
      N_THREADS: "16" # CPU threads
      CONTEXT_SIZE: "4096" # Maximum context window
--- a/benchs/mllm/magma.yaml
+++ b/benchs/mllm/magma.yaml
@@ -1,19 +0,0 @@
 nodes:
  - id: benchmark_script
    path: benchmark_script.py
    inputs:
      text: llm/text
    outputs:
      - text
      - image
    env:
      DATA: "Please only generate the following output: This is a test"

  - id: llm
    build: pip install -e ../../node-hub/dora-magma
    path: dora-magma
    inputs:
      text: benchmark_script/text
      image: benchmark_script/image
    outputs:
      - text
--- a/benchs/mllm/phi4.yaml
+++ b/benchs/mllm/phi4.yaml
@@ -12,7 +12,6 @@ nodes:

  - id: llm
    build: |
      pip install flash-attn --no-build-isolation
      pip install -e ../../node-hub/dora-phi4
    path: dora-phi4
    inputs:
--- a/benchs/mllm/qwen2.5vl.yaml
+++ b/benchs/mllm/qwen2.5vl.yaml
@@ -1,22 +0,0 @@
 nodes:
  - id: benchmark_script
    path: benchmark_script.py
    inputs:
      text: vlm/text
    outputs:
      - text
      - image
    env:
      DATA: "Please only generate the following output: This is a cat"

  - id: vlm
    # Comment flash_attn if not on cuda hardware
    build: |
      pip install flash-attn --no-build-isolation
      pip install -e ../../node-hub/dora-qwen2-5-vl
    path: dora-qwen2-5-vl
    inputs:
      image: benchmark_script/image
      text: benchmark_script/text
    outputs:
      - text
--- a/benchs/mllm/transformers.yaml
+++ b/benchs/mllm/transformers.yaml
@@ -1,24 +0,0 @@
 nodes:
  - id: benchmark_script
    path: benchmark_script.py
    inputs:
      text: llm/text
    outputs:
      - data
    env:
      DATA: "Please only generate the following output: This is a test"

  - id: llm
    build: pip install -e ../../node-hub/dora-transformers
    path: dora-transformers
    inputs:
      text: benchmark_script/data
    outputs:
      - text
    env:
      MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face
      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
      MAX_TOKENS: "128" # Reduced for concise responses
      DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon
      ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations
      TORCH_DTYPE: "float16" # Use half precision for better memory efficiency
--- a/benchs/vlm/llama_cpp_python.yaml
+++ b/benchs/vlm/llama_cpp_python.yaml
@@ -1,27 +0,0 @@
 nodes:
  - id: benchmark_script
    path: benchmark_script.py
    inputs:
      text: llm/text
    outputs:
      - data
    env:
      DATA: "Please only generate the following output: This is a test"

  - id: llm
    build: pip install -e ../../node-hub/dora-llama-cpp-python
    path: dora-llama-cpp-python
    inputs:
      text:
        source: benchmark_script/data
        queue-size: 10
    outputs:
      - text
    env:
      MODEL_NAME_OR_PATH: "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
      MODEL_FILE_PATTERN: "*fp16.gguf"
      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
      MAX_TOKENS: "512"
      N_GPU_LAYERS: "35" # Enable GPU acceleration
      N_THREADS: "16" # CPU threads
      CONTEXT_SIZE: "4096" # Maximum context window
--- a/benchs/vlm/qwen2.5vl.yaml
+++ b/benchs/vlm/qwen2.5vl.yaml
@@ -12,7 +12,6 @@ nodes:
  - id: vlm
    # Comment flash_attn if not on cuda hardware
    build: |
      pip install flash-attn --no-build-isolation
      pip install -e ../../node-hub/dora-qwen2-5-vl
    path: dora-qwen2-5-vl
    inputs:
--- a/benchs/vlm/transformers.yaml
+++ b/benchs/vlm/transformers.yaml
@@ -1,24 +0,0 @@
 nodes:
  - id: benchmark_script
    path: benchmark_script.py
    inputs:
      text: llm/text
    outputs:
      - data
    env:
      DATA: "Please only generate the following output: This is a test"

  - id: llm
    build: pip install -e ../../node-hub/dora-transformers
    path: dora-transformers
    inputs:
      text: benchmark_script/data
    outputs:
      - text
    env:
      MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face
      SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
      MAX_TOKENS: "128" # Reduced for concise responses
      DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon
      ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations
      TORCH_DTYPE: "float16" # Use half precision for better memory efficiency
--- a/examples/translation/phi4-dev.yml
+++ b/examples/translation/phi4-dev.yml
@@ -17,7 +17,6 @@ nodes:

  - id: dora-phi4
    build: |
      pip install flash-attn --no-build-isolation
      pip install -e ../../node-hub/dora-phi4
    path: dora-phi4
    inputs:
--- a/node-hub/dora-magma/dora_magma/main.py
+++ b/node-hub/dora-magma/dora_magma/main.py
@@ -42,7 +42,8 @@ def load_magma_models():
            device_map="auto",
        )
        processor = AutoProcessor.from_pretrained(
            model_name_or_path, trust_remote_code=True,
            model_name_or_path,
            trust_remote_code=True,
        )
    except Exception as e:
        logger.error(f"Failed to load model: {e}")
@@ -72,7 +73,9 @@ def generate(
    ]

    prompt = processor.tokenizer.apply_chat_template(
        convs, tokenize=False, add_generation_prompt=True,
        convs,
        tokenize=False,
        add_generation_prompt=True,
    )

    try:
@@ -174,14 +177,18 @@ def main():
                    task_description = event["value"][0].as_py()
                    image_id = event["metadata"].get("image_id", None)

                    if image_id is None or image_id not in frames:
                        logger.error(f"Image ID {image_id} not found in frames")
                    if image_id in frames:
                        image = frames[image_id]
                    elif len(frames) == 1:
                        image = next(iter(frames.values()))
                    else:
                        logger.error(f"Image not found for {image_id}")
                        continue

                    image = frames[image_id]
                    response, trajectories = generate(image, task_description)
                    node.send_output(
                        "text", pa.array([response]), {"image_id": image_id},
                        "text",
                        pa.array([response]),
                        {"image_id": image_id},
                    )

                    # Send trajectory data if available
--- a/node-hub/dora-phi4/dora_phi4/main.py
+++ b/node-hub/dora-phi4/dora_phi4/main.py
@@ -55,9 +55,7 @@ device_map = infer_auto_device_map(
 )

 # Load the model directly with the inferred device map
 model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH, **MODEL_CONFIG, device_map=device_map
 ).to(device)
 model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, **MODEL_CONFIG).to(device)

 generation_config = GenerationConfig.from_pretrained(MODEL_PATH)

@@ -82,6 +80,7 @@ BAD_SENTENCES = [
    "The sound of the wind is so loud.",
    "The first time I saw the sea.",
    "the first time saw the sea i was so happy"
    "The first time I saw the sea, I was very happy.",
    "The first time I saw the sea was in the movie.",
    "The first time I saw the movie was in the theater.",
    "The first time I saw the movie.",
@@ -129,8 +128,7 @@ def remove_text_noise(text: str, text_noise="") -> str:
        # Replace hyphens with spaces to treat "Notre-Dame" and "notre dame" as equivalent
        s = re.sub(r"-", " ", s)
        # Remove other punctuation and convert to lowercase
        s = re.sub(r"[^\w\s]", "", s).lower()
        return s
        return re.sub(r"[^\w\s]", "", s).lower()

    # Normalize both text and text_noise
    normalized_text = normalize(text)