Browse Source

Remove unused dataflow from benchmark

tags/v0.3.11-rc1
haixuantao 10 months ago
parent
commit
effe15b5ad
15 changed files with 22 additions and 186 deletions
  1. +5
    -5
      benchs/llms/benchmark_script.py
  2. +0
    -17
      benchs/llms/magma.yaml
  3. +0
    -1
      benchs/llms/qwen2.5.yaml
  4. +0
    -5
      benchs/llms/transformers.yaml
  5. +0
    -27
      benchs/mllm/llama_cpp_python.yaml
  6. +0
    -19
      benchs/mllm/magma.yaml
  7. +0
    -1
      benchs/mllm/phi4.yaml
  8. +0
    -22
      benchs/mllm/qwen2.5vl.yaml
  9. +0
    -24
      benchs/mllm/transformers.yaml
  10. +0
    -27
      benchs/vlm/llama_cpp_python.yaml
  11. +0
    -1
      benchs/vlm/qwen2.5vl.yaml
  12. +0
    -24
      benchs/vlm/transformers.yaml
  13. +0
    -1
      examples/translation/phi4-dev.yml
  14. +14
    -7
      node-hub/dora-magma/dora_magma/main.py
  15. +3
    -5
      node-hub/dora-phi4/dora_phi4/main.py

+ 5
- 5
benchs/llms/benchmark_script.py View File

@@ -84,7 +84,7 @@ def main():

durations = []
speed = []
for _ in range(50):
for _ in range(10):
start_time = time.time()
node.send_output("data", data)
event = node.next()
@@ -92,9 +92,9 @@ def main():
if event is not None and event["type"] == "INPUT":
text = event["value"][0].as_py()
tokens = event["metadata"].get("tokens", 6)
assert (
"this is a test" in text.lower()
), f"Expected 'This is a test', got {text}"
assert "this is a test" in text.lower(), (
f"Expected 'This is a test', got {text}"
)
durations.append(duration)
speed.append(tokens / duration)
time.sleep(0.1)
@@ -105,7 +105,7 @@ def main():
+ f"\nMax duration: {max(durations)}"
+ f"\nMin duration: {min(durations)}"
+ f"\nMedian duration: {np.median(durations)}"
+ f"\nMedian frequency: {1/np.median(durations)}"
+ f"\nMedian frequency: {1 / np.median(durations)}"
+ f"\nAverage speed: {sum(speed) / len(speed)}"
+ f"\nMax speed: {max(speed)}"
+ f"\nMin speed: {min(speed)}"


+ 0
- 17
benchs/llms/magma.yaml View File

@@ -1,17 +0,0 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
inputs:
text: llm/text
outputs:
- data
env:
DATA: "Please only generate the following output: This is a test"

- id: llm
build: pip install -e ../../node-hub/dora-magma
path: dora-magma
inputs:
text: benchmark_script/data
outputs:
- text

+ 0
- 1
benchs/llms/qwen2.5.yaml View File

@@ -10,7 +10,6 @@ nodes:

- id: llm
build: |
pip install flash-attn --no-build-isolation
pip install -e ../../node-hub/dora-qwen
path: dora-qwen
inputs:


+ 0
- 5
benchs/llms/transformers.yaml View File

@@ -17,8 +17,3 @@ nodes:
- text
env:
MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face
SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
MAX_TOKENS: "128" # Reduced for concise responses
DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon
ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations
TORCH_DTYPE: "float16" # Use half precision for better memory efficiency

+ 0
- 27
benchs/mllm/llama_cpp_python.yaml View File

@@ -1,27 +0,0 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
inputs:
text: llm/text
outputs:
- data
env:
DATA: "Please only generate the following output: This is a test"

- id: llm
build: pip install -e ../../node-hub/dora-llama-cpp-python
path: dora-llama-cpp-python
inputs:
text:
source: benchmark_script/data
queue-size: 10
outputs:
- text
env:
MODEL_NAME_OR_PATH: "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
MODEL_FILE_PATTERN: "*fp16.gguf"
SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
MAX_TOKENS: "512"
N_GPU_LAYERS: "35" # Enable GPU acceleration
N_THREADS: "16" # CPU threads
CONTEXT_SIZE: "4096" # Maximum context window

+ 0
- 19
benchs/mllm/magma.yaml View File

@@ -1,19 +0,0 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
inputs:
text: llm/text
outputs:
- text
- image
env:
DATA: "Please only generate the following output: This is a test"

- id: llm
build: pip install -e ../../node-hub/dora-magma
path: dora-magma
inputs:
text: benchmark_script/text
image: benchmark_script/image
outputs:
- text

+ 0
- 1
benchs/mllm/phi4.yaml View File

@@ -12,7 +12,6 @@ nodes:

- id: llm
build: |
pip install flash-attn --no-build-isolation
pip install -e ../../node-hub/dora-phi4
path: dora-phi4
inputs:


+ 0
- 22
benchs/mllm/qwen2.5vl.yaml View File

@@ -1,22 +0,0 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
inputs:
text: vlm/text
outputs:
- text
- image
env:
DATA: "Please only generate the following output: This is a cat"

- id: vlm
# Comment flash_attn if not on cuda hardware
build: |
pip install flash-attn --no-build-isolation
pip install -e ../../node-hub/dora-qwen2-5-vl
path: dora-qwen2-5-vl
inputs:
image: benchmark_script/image
text: benchmark_script/text
outputs:
- text

+ 0
- 24
benchs/mllm/transformers.yaml View File

@@ -1,24 +0,0 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
inputs:
text: llm/text
outputs:
- data
env:
DATA: "Please only generate the following output: This is a test"

- id: llm
build: pip install -e ../../node-hub/dora-transformers
path: dora-transformers
inputs:
text: benchmark_script/data
outputs:
- text
env:
MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face
SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
MAX_TOKENS: "128" # Reduced for concise responses
DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon
ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations
TORCH_DTYPE: "float16" # Use half precision for better memory efficiency

+ 0
- 27
benchs/vlm/llama_cpp_python.yaml View File

@@ -1,27 +0,0 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
inputs:
text: llm/text
outputs:
- data
env:
DATA: "Please only generate the following output: This is a test"

- id: llm
build: pip install -e ../../node-hub/dora-llama-cpp-python
path: dora-llama-cpp-python
inputs:
text:
source: benchmark_script/data
queue-size: 10
outputs:
- text
env:
MODEL_NAME_OR_PATH: "Qwen/Qwen2.5-0.5B-Instruct-GGUF"
MODEL_FILE_PATTERN: "*fp16.gguf"
SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
MAX_TOKENS: "512"
N_GPU_LAYERS: "35" # Enable GPU acceleration
N_THREADS: "16" # CPU threads
CONTEXT_SIZE: "4096" # Maximum context window

+ 0
- 1
benchs/vlm/qwen2.5vl.yaml View File

@@ -12,7 +12,6 @@ nodes:
- id: vlm
# Comment flash_attn if not on cuda hardware
build: |
pip install flash-attn --no-build-isolation
pip install -e ../../node-hub/dora-qwen2-5-vl
path: dora-qwen2-5-vl
inputs:


+ 0
- 24
benchs/vlm/transformers.yaml View File

@@ -1,24 +0,0 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
inputs:
text: llm/text
outputs:
- data
env:
DATA: "Please only generate the following output: This is a test"

- id: llm
build: pip install -e ../../node-hub/dora-transformers
path: dora-transformers
inputs:
text: benchmark_script/data
outputs:
- text
env:
MODEL_NAME: "Qwen/Qwen2.5-0.5B-Instruct" # Model from Hugging Face
SYSTEM_PROMPT: "You're a very succinct AI assistant with short answers."
MAX_TOKENS: "128" # Reduced for concise responses
DEVICE: "cuda" # Use "cpu" for CPU, "cuda" for NVIDIA GPU, "mps" for Apple Silicon
ENABLE_MEMORY_EFFICIENT: "true" # Enable 8-bit quantization and memory optimizations
TORCH_DTYPE: "float16" # Use half precision for better memory efficiency

+ 0
- 1
examples/translation/phi4-dev.yml View File

@@ -17,7 +17,6 @@ nodes:

- id: dora-phi4
build: |
pip install flash-attn --no-build-isolation
pip install -e ../../node-hub/dora-phi4
path: dora-phi4
inputs:


+ 14
- 7
node-hub/dora-magma/dora_magma/main.py View File

@@ -42,7 +42,8 @@ def load_magma_models():
device_map="auto",
)
processor = AutoProcessor.from_pretrained(
model_name_or_path, trust_remote_code=True,
model_name_or_path,
trust_remote_code=True,
)
except Exception as e:
logger.error(f"Failed to load model: {e}")
@@ -72,7 +73,9 @@ def generate(
]

prompt = processor.tokenizer.apply_chat_template(
convs, tokenize=False, add_generation_prompt=True,
convs,
tokenize=False,
add_generation_prompt=True,
)

try:
@@ -174,14 +177,18 @@ def main():
task_description = event["value"][0].as_py()
image_id = event["metadata"].get("image_id", None)

if image_id is None or image_id not in frames:
logger.error(f"Image ID {image_id} not found in frames")
if image_id in frames:
image = frames[image_id]
elif len(frames) == 1:
image = next(iter(frames.values()))
else:
logger.error(f"Image not found for {image_id}")
continue

image = frames[image_id]
response, trajectories = generate(image, task_description)
node.send_output(
"text", pa.array([response]), {"image_id": image_id},
"text",
pa.array([response]),
{"image_id": image_id},
)

# Send trajectory data if available


+ 3
- 5
node-hub/dora-phi4/dora_phi4/main.py View File

@@ -55,9 +55,7 @@ device_map = infer_auto_device_map(
)

# Load the model directly with the inferred device map
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH, **MODEL_CONFIG, device_map=device_map
).to(device)
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, **MODEL_CONFIG).to(device)

generation_config = GenerationConfig.from_pretrained(MODEL_PATH)

@@ -82,6 +80,7 @@ BAD_SENTENCES = [
"The sound of the wind is so loud.",
"The first time I saw the sea.",
"the first time saw the sea i was so happy"
"The first time I saw the sea, I was very happy.",
"The first time I saw the sea was in the movie.",
"The first time I saw the movie was in the theater.",
"The first time I saw the movie.",
@@ -129,8 +128,7 @@ def remove_text_noise(text: str, text_noise="") -> str:
# Replace hyphens with spaces to treat "Notre-Dame" and "notre dame" as equivalent
s = re.sub(r"-", " ", s)
# Remove other punctuation and convert to lowercase
s = re.sub(r"[^\w\s]", "", s).lower()
return s
return re.sub(r"[^\w\s]", "", s).lower()

# Normalize both text and text_noise
normalized_text = normalize(text)


Loading…
Cancel
Save