From effe15b5adec353ac6317d37c5f28d8665fd810e Mon Sep 17 00:00:00 2001 From: haixuantao Date: Tue, 18 Mar 2025 23:54:51 +0100 Subject: [PATCH] Remove unused dataflow from benchmark --- benchs/llms/benchmark_script.py | 10 +++++----- benchs/llms/magma.yaml | 17 ---------------- benchs/llms/qwen2.5.yaml | 1 - benchs/llms/transformers.yaml | 5 ----- benchs/mllm/llama_cpp_python.yaml | 27 -------------------------- benchs/mllm/magma.yaml | 19 ------------------ benchs/mllm/phi4.yaml | 1 - benchs/mllm/qwen2.5vl.yaml | 22 --------------------- benchs/mllm/transformers.yaml | 24 ----------------------- benchs/vlm/llama_cpp_python.yaml | 27 -------------------------- benchs/vlm/qwen2.5vl.yaml | 1 - benchs/vlm/transformers.yaml | 24 ----------------------- examples/translation/phi4-dev.yml | 1 - node-hub/dora-magma/dora_magma/main.py | 21 +++++++++++++------- node-hub/dora-phi4/dora_phi4/main.py | 8 +++----- 15 files changed, 22 insertions(+), 186 deletions(-) delete mode 100644 benchs/llms/magma.yaml delete mode 100644 benchs/mllm/llama_cpp_python.yaml delete mode 100644 benchs/mllm/magma.yaml delete mode 100644 benchs/mllm/qwen2.5vl.yaml delete mode 100644 benchs/mllm/transformers.yaml delete mode 100644 benchs/vlm/llama_cpp_python.yaml delete mode 100644 benchs/vlm/transformers.yaml diff --git a/benchs/llms/benchmark_script.py b/benchs/llms/benchmark_script.py index 1ba87662..98390627 100644 --- a/benchs/llms/benchmark_script.py +++ b/benchs/llms/benchmark_script.py @@ -84,7 +84,7 @@ def main(): durations = [] speed = [] - for _ in range(50): + for _ in range(10): start_time = time.time() node.send_output("data", data) event = node.next() @@ -92,9 +92,9 @@ def main(): if event is not None and event["type"] == "INPUT": text = event["value"][0].as_py() tokens = event["metadata"].get("tokens", 6) - assert ( - "this is a test" in text.lower() - ), f"Expected 'This is a test', got {text}" + assert "this is a test" in text.lower(), ( + f"Expected 'This is a test', got {text}" + ) durations.append(duration) speed.append(tokens / duration) time.sleep(0.1) @@ -105,7 +105,7 @@ def main(): + f"\nMax duration: {max(durations)}" + f"\nMin duration: {min(durations)}" + f"\nMedian duration: {np.median(durations)}" - + f"\nMedian frequency: {1/np.median(durations)}" + + f"\nMedian frequency: {1 / np.median(durations)}" + f"\nAverage speed: {sum(speed) / len(speed)}" + f"\nMax speed: {max(speed)}" + f"\nMin speed: {min(speed)}" diff --git a/benchs/llms/magma.yaml b/benchs/llms/magma.yaml deleted file mode 100644 index dcbc3196..00000000 --- a/benchs/llms/magma.yaml +++ /dev/null @@ -1,17 +0,0 @@ -nodes: - - id: benchmark_script - path: benchmark_script.py - inputs: - text: llm/text - outputs: - - data - env: - DATA: "Please only generate the following output: This is a test" - - - id: llm - build: pip install -e ../../node-hub/dora-magma - path: dora-magma - inputs: - text: benchmark_script/data - outputs: - - text diff --git a/benchs/llms/qwen2.5.yaml b/benchs/llms/qwen2.5.yaml index 8a10d78d..d47a4745 100644 --- a/benchs/llms/qwen2.5.yaml +++ b/benchs/llms/qwen2.5.yaml @@ -10,7 +10,6 @@ nodes: - id: llm build: | - pip install flash-attn --no-build-isolation pip install -e ../../node-hub/dora-qwen path: dora-qwen inputs: diff --git a/benchs/llms/transformers.yaml b/benchs/llms/transformers.yaml index 590a2493..1f115240 100644 --- a/benchs/llms/transformers.yaml +++ b/benchs/llms/transformers.yaml @@ -17,8 +17,3 @@ nodes: - text env: MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face - SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers." - MAX_TOKENS: "128" # Reduced for concise responses - DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon - ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations - TORCH_DTYPE: "float16" # Use half precision for better memory efficiency diff --git a/benchs/mllm/llama_cpp_python.yaml b/benchs/mllm/llama_cpp_python.yaml deleted file mode 100644 index 08b5eace..00000000 --- a/benchs/mllm/llama_cpp_python.yaml +++ /dev/null @@ -1,27 +0,0 @@ -nodes: - - id: benchmark_script - path: benchmark_script.py - inputs: - text: llm/text - outputs: - - data - env: - DATA: "Please only generate the following output: This is a test" - - - id: llm - build: pip install -e ../../node-hub/dora-llama-cpp-python - path: dora-llama-cpp-python - inputs: - text: - source: benchmark_script/data - queue-size: 10 - outputs: - - text - env: - MODEL_NAME_OR_PATH: "Qwen/Qwen2.5-0.5B-Instruct-GGUF" - MODEL_FILE_PATTERN: "*fp16.gguf" - SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers." - MAX_TOKENS: "512" - N_GPU_LAYERS: "35" # Enable GPU acceleration - N_THREADS: "16" # CPU threads - CONTEXT_SIZE: "4096" # Maximum context window diff --git a/benchs/mllm/magma.yaml b/benchs/mllm/magma.yaml deleted file mode 100644 index f1fd5792..00000000 --- a/benchs/mllm/magma.yaml +++ /dev/null @@ -1,19 +0,0 @@ -nodes: - - id: benchmark_script - path: benchmark_script.py - inputs: - text: llm/text - outputs: - - text - - image - env: - DATA: "Please only generate the following output: This is a test" - - - id: llm - build: pip install -e ../../node-hub/dora-magma - path: dora-magma - inputs: - text: benchmark_script/text - image: benchmark_script/image - outputs: - - text diff --git a/benchs/mllm/phi4.yaml b/benchs/mllm/phi4.yaml index 2aba0c0b..3d7c42ce 100644 --- a/benchs/mllm/phi4.yaml +++ b/benchs/mllm/phi4.yaml @@ -12,7 +12,6 @@ nodes: - id: llm build: | - pip install flash-attn --no-build-isolation pip install -e ../../node-hub/dora-phi4 path: dora-phi4 inputs: diff --git a/benchs/mllm/qwen2.5vl.yaml b/benchs/mllm/qwen2.5vl.yaml deleted file mode 100644 index bfc62bcd..00000000 --- a/benchs/mllm/qwen2.5vl.yaml +++ /dev/null @@ -1,22 +0,0 @@ -nodes: - - id: benchmark_script - path: benchmark_script.py - inputs: - text: vlm/text - outputs: - - text - - image - env: - DATA: "Please only generate the following output: This is a cat" - - - id: vlm - # Comment flash_attn if not on cuda hardware - build: | - pip install flash-attn --no-build-isolation - pip install -e ../../node-hub/dora-qwen2-5-vl - path: dora-qwen2-5-vl - inputs: - image: benchmark_script/image - text: benchmark_script/text - outputs: - - text diff --git a/benchs/mllm/transformers.yaml b/benchs/mllm/transformers.yaml deleted file mode 100644 index 590a2493..00000000 --- a/benchs/mllm/transformers.yaml +++ /dev/null @@ -1,24 +0,0 @@ -nodes: - - id: benchmark_script - path: benchmark_script.py - inputs: - text: llm/text - outputs: - - data - env: - DATA: "Please only generate the following output: This is a test" - - - id: llm - build: pip install -e ../../node-hub/dora-transformers - path: dora-transformers - inputs: - text: benchmark_script/data - outputs: - - text - env: - MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face - SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers." - MAX_TOKENS: "128" # Reduced for concise responses - DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon - ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations - TORCH_DTYPE: "float16" # Use half precision for better memory efficiency diff --git a/benchs/vlm/llama_cpp_python.yaml b/benchs/vlm/llama_cpp_python.yaml deleted file mode 100644 index 08b5eace..00000000 --- a/benchs/vlm/llama_cpp_python.yaml +++ /dev/null @@ -1,27 +0,0 @@ -nodes: - - id: benchmark_script - path: benchmark_script.py - inputs: - text: llm/text - outputs: - - data - env: - DATA: "Please only generate the following output: This is a test" - - - id: llm - build: pip install -e ../../node-hub/dora-llama-cpp-python - path: dora-llama-cpp-python - inputs: - text: - source: benchmark_script/data - queue-size: 10 - outputs: - - text - env: - MODEL_NAME_OR_PATH: "Qwen/Qwen2.5-0.5B-Instruct-GGUF" - MODEL_FILE_PATTERN: "*fp16.gguf" - SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers." - MAX_TOKENS: "512" - N_GPU_LAYERS: "35" # Enable GPU acceleration - N_THREADS: "16" # CPU threads - CONTEXT_SIZE: "4096" # Maximum context window diff --git a/benchs/vlm/qwen2.5vl.yaml b/benchs/vlm/qwen2.5vl.yaml index bfc62bcd..8a553532 100644 --- a/benchs/vlm/qwen2.5vl.yaml +++ b/benchs/vlm/qwen2.5vl.yaml @@ -12,7 +12,6 @@ nodes: - id: vlm # Comment flash_attn if not on cuda hardware build: | - pip install flash-attn --no-build-isolation pip install -e ../../node-hub/dora-qwen2-5-vl path: dora-qwen2-5-vl inputs: diff --git a/benchs/vlm/transformers.yaml b/benchs/vlm/transformers.yaml deleted file mode 100644 index 590a2493..00000000 --- a/benchs/vlm/transformers.yaml +++ /dev/null @@ -1,24 +0,0 @@ -nodes: - - id: benchmark_script - path: benchmark_script.py - inputs: - text: llm/text - outputs: - - data - env: - DATA: "Please only generate the following output: This is a test" - - - id: llm - build: pip install -e ../../node-hub/dora-transformers - path: dora-transformers - inputs: - text: benchmark_script/data - outputs: - - text - env: - MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face - SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers." - MAX_TOKENS: "128" # Reduced for concise responses - DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon - ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations - TORCH_DTYPE: "float16" # Use half precision for better memory efficiency diff --git a/examples/translation/phi4-dev.yml b/examples/translation/phi4-dev.yml index 749591e6..280adb8a 100644 --- a/examples/translation/phi4-dev.yml +++ b/examples/translation/phi4-dev.yml @@ -17,7 +17,6 @@ nodes: - id: dora-phi4 build: | - pip install flash-attn --no-build-isolation pip install -e ../../node-hub/dora-phi4 path: dora-phi4 inputs: diff --git a/node-hub/dora-magma/dora_magma/main.py b/node-hub/dora-magma/dora_magma/main.py index 5f794c6a..097ee7cd 100644 --- a/node-hub/dora-magma/dora_magma/main.py +++ b/node-hub/dora-magma/dora_magma/main.py @@ -42,7 +42,8 @@ def load_magma_models(): device_map="auto", ) processor = AutoProcessor.from_pretrained( - model_name_or_path, trust_remote_code=True, + model_name_or_path, + trust_remote_code=True, ) except Exception as e: logger.error(f"Failed to load model: {e}") @@ -72,7 +73,9 @@ def generate( ] prompt = processor.tokenizer.apply_chat_template( - convs, tokenize=False, add_generation_prompt=True, + convs, + tokenize=False, + add_generation_prompt=True, ) try: @@ -174,14 +177,18 @@ def main(): task_description = event["value"][0].as_py() image_id = event["metadata"].get("image_id", None) - if image_id is None or image_id not in frames: - logger.error(f"Image ID {image_id} not found in frames") + if image_id in frames: + image = frames[image_id] + elif len(frames) == 1: + image = next(iter(frames.values())) + else: + logger.error(f"Image not found for {image_id}") continue - - image = frames[image_id] response, trajectories = generate(image, task_description) node.send_output( - "text", pa.array([response]), {"image_id": image_id}, + "text", + pa.array([response]), + {"image_id": image_id}, ) # Send trajectory data if available diff --git a/node-hub/dora-phi4/dora_phi4/main.py b/node-hub/dora-phi4/dora_phi4/main.py index 457d1b53..00f8120c 100644 --- a/node-hub/dora-phi4/dora_phi4/main.py +++ b/node-hub/dora-phi4/dora_phi4/main.py @@ -55,9 +55,7 @@ device_map = infer_auto_device_map( ) # Load the model directly with the inferred device map -model = AutoModelForCausalLM.from_pretrained( - MODEL_PATH, **MODEL_CONFIG, device_map=device_map -).to(device) +model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, **MODEL_CONFIG).to(device) generation_config = GenerationConfig.from_pretrained(MODEL_PATH) @@ -82,6 +80,7 @@ BAD_SENTENCES = [ "The sound of the wind is so loud.", "The first time I saw the sea.", "the first time saw the sea i was so happy" + "The first time I saw the sea, I was very happy.", "The first time I saw the sea was in the movie.", "The first time I saw the movie was in the theater.", "The first time I saw the movie.", @@ -129,8 +128,7 @@ def remove_text_noise(text: str, text_noise="") -> str: # Replace hyphens with spaces to treat "Notre-Dame" and "notre dame" as equivalent s = re.sub(r"-", " ", s) # Remove other punctuation and convert to lowercase - s = re.sub(r"[^\w\s]", "", s).lower() - return s + return re.sub(r"[^\w\s]", "", s).lower() # Normalize both text and text_noise normalized_text = normalize(text)