Browse Source

Rename benchs -> benches to be conformant with cargo

tags/v0.3.11-rc1
haixuanTao 10 months ago
parent
commit
5ebd723c52
17 changed files with 11 additions and 344 deletions
  1. +0
    -0
      benches/llms/.gitignore
  2. +0
    -0
      benches/llms/README.md
  3. +2
    -2
      benches/llms/llama_cpp_python.yaml
  4. +2
    -2
      benches/llms/phi4.yaml
  5. +2
    -2
      benches/llms/qwen2.5.yaml
  6. +2
    -2
      benches/llms/transformers.yaml
  7. +0
    -0
      benches/mllm/.gitignore
  8. +0
    -0
      benches/mllm/README.md
  9. +0
    -0
      benches/mllm/benchmark_script.py
  10. +0
    -0
      benches/mllm/phi4.yaml
  11. +0
    -0
      benches/vlm/.gitignore
  12. +0
    -0
      benches/vlm/README.md
  13. +1
    -1
      benches/vlm/magma.yaml
  14. +1
    -1
      benches/vlm/phi4.yaml
  15. +1
    -1
      benches/vlm/qwen2.5vl.yaml
  16. +0
    -149
      benchs/llms/benchmark_script.py
  17. +0
    -184
      benchs/vlm/benchmark_script.py

benchs/llms/.gitignore → benches/llms/.gitignore View File


benchs/llms/README.md → benches/llms/README.md View File


benchs/llms/llama_cpp_python.yaml → benches/llms/llama_cpp_python.yaml View File

@@ -1,10 +1,10 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
path: ../mllm/benchmark_script.py
inputs:
text: llm/text
outputs:
- data
- text
env:
DATA: "Please only generate the following output: This is a test"


benchs/llms/phi4.yaml → benches/llms/phi4.yaml View File

@@ -1,10 +1,10 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
path: ../mllm/benchmark_script.py
inputs:
text: llm/text
outputs:
- data
- text
env:
DATA: "Please only generate the following output: This is a test"


benchs/llms/qwen2.5.yaml → benches/llms/qwen2.5.yaml View File

@@ -1,10 +1,10 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
path: ../mllm/benchmark_script.py
inputs:
text: llm/text
outputs:
- data
- text
env:
DATA: "Please only generate the following output: This is a test"


benchs/llms/transformers.yaml → benches/llms/transformers.yaml View File

@@ -1,10 +1,10 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
path: ../mllm/benchmark_script.py
inputs:
text: llm/text
outputs:
- data
- text
env:
DATA: "Please only generate the following output: This is a test"


benchs/mllm/.gitignore → benches/mllm/.gitignore View File


benchs/mllm/README.md → benches/mllm/README.md View File


benchs/mllm/benchmark_script.py → benches/mllm/benchmark_script.py View File


benchs/mllm/phi4.yaml → benches/mllm/phi4.yaml View File


benchs/vlm/.gitignore → benches/vlm/.gitignore View File


benchs/vlm/README.md → benches/vlm/README.md View File


benchs/vlm/magma.yaml → benches/vlm/magma.yaml View File

@@ -1,6 +1,6 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
path: ../mllm/benchmark_script.py
inputs:
text: llm/text
outputs:

benchs/vlm/phi4.yaml → benches/vlm/phi4.yaml View File

@@ -1,6 +1,6 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
path: ../mllm/benchmark_script.py
inputs:
text: llm/text
outputs:

benchs/vlm/qwen2.5vl.yaml → benches/vlm/qwen2.5vl.yaml View File

@@ -1,6 +1,6 @@
nodes:
- id: benchmark_script
path: benchmark_script.py
path: ../mllm/benchmark_script.py
inputs:
text: vlm/text
outputs:

+ 0
- 149
benchs/llms/benchmark_script.py View File

@@ -1,149 +0,0 @@
"""TODO: Add docstring."""

import argparse
import ast

# Create an empty csv file with header in the current directory if file does not exist
import csv
import os
import time

import numpy as np
import pyarrow as pa
from dora import Node


def write_to_csv(filename, header, row):
"""
Create a CSV file with a header if it does not exist, and write a row to it.
If the file exists, append the row to the file.

:param filename: Name of the CSV file.
:param header: List of column names to use as the header.
:param row: List of data to write as a row in the CSV file.
"""
file_exists = os.path.exists(filename)

with open(
filename, mode="a" if file_exists else "w", newline="", encoding="utf8"
) as file:
writer = csv.writer(file)

# Write the header if the file is being created
if not file_exists:
writer.writerow(header)
print(f"File '{filename}' created with header: {header}")

# Write the row
writer.writerow(row)
print(f"Row written to '{filename}': {row}")


def main():
# Handle dynamic nodes, ask for the name of the node in the dataflow, and the same values as the ENV variables.
"""TODO: Add docstring."""
parser = argparse.ArgumentParser(description="Simple arrow sender")

parser.add_argument(
"--name",
type=str,
required=False,
help="The name of the node in the dataflow.",
default="pyarrow-sender",
)
parser.add_argument(
"--data",
type=str,
required=False,
help="Arrow Data as string.",
default=None,
)

args = parser.parse_args()

data = os.getenv("DATA", args.data)

node = Node(
args.name,
) # provide the name to connect to the dataflow if dynamic node
name = node.dataflow_descriptor()["nodes"][1]["path"]

if data is None:
raise ValueError(
"No data provided. Please specify `DATA` environment argument or as `--data` argument",
)
try:
data = ast.literal_eval(data)
except Exception: # noqa
print("Passing input as string")

if isinstance(data, (str, int, float)):
data = pa.array([data])
else:
data = pa.array(data) # initialize pyarrow array

durations = []
speed = []
for _ in range(10):
start_time = time.time()
node.send_output("data", data)
event = node.next()
duration = time.time() - start_time
if event is not None and event["type"] == "INPUT":
text = event["value"][0].as_py()
tokens = event["metadata"].get("tokens", 6)
assert "this is a test" in text.lower(), (
f"Expected 'This is a test', got {text}"
)
durations.append(duration)
speed.append(tokens / duration)
time.sleep(0.1)
durations = np.array(durations)
speed = np.array(speed)
print(
f"\nAverage duration: {sum(durations) / len(durations)}"
+ f"\nMax duration: {max(durations)}"
+ f"\nMin duration: {min(durations)}"
+ f"\nMedian duration: {np.median(durations)}"
+ f"\nMedian frequency: {1 / np.median(durations)}"
+ f"\nAverage speed: {sum(speed) / len(speed)}"
+ f"\nMax speed: {max(speed)}"
+ f"\nMin speed: {min(speed)}"
+ f"\nMedian speed: {np.median(speed)}"
+ f"\nTotal tokens: {tokens}"
)
write_to_csv(
"benchmark.csv",
[
"path",
"date",
"average_duration(s)",
"max_duration(s)",
"min_duration(s)",
"median_duration(s)",
"median_frequency(Hz)",
"average_speed(tok/s)",
"max_speed(tok/s)",
"min_speed(tok/s)",
"median_speed(tok/s)",
"total_tokens",
],
[
name,
time.strftime("%Y-%m-%d %H:%M:%S"),
sum(durations) / len(durations),
max(durations),
min(durations),
np.median(durations),
1 / np.median(durations),
sum(speed) / len(speed),
max(speed),
min(speed),
np.median(speed),
tokens,
],
)


if __name__ == "__main__":
main()

+ 0
- 184
benchs/vlm/benchmark_script.py View File

@@ -1,184 +0,0 @@
"""TODO: Add docstring."""

import argparse
import ast

# Create an empty csv file with header in the current directory if file does not exist
import csv
import os
import time
from io import BytesIO

import cv2
import numpy as np
import pyarrow as pa
import requests
from dora import Node
from PIL import Image

CAT_URL = "https://i.ytimg.com/vi/fzzjgBAaWZw/hqdefault.jpg"


def get_cat_image():
"""
Get a cat image as a numpy array.

:return: Cat image as a numpy array.
"""
# Fetch the image from the URL
response = requests.get(CAT_URL)
response.raise_for_status()

# Open the image using PIL

image = Image.open(BytesIO(response.content))
# Convert the image to a numpy array

image_array = np.array(image)
cv2.resize(image_array, (640, 480))
# Convert RGB to BGR for

return image_array


def write_to_csv(filename, header, row):
"""
Create a CSV file with a header if it does not exist, and write a row to it.
If the file exists, append the row to the file.

:param filename: Name of the CSV file.
:param header: List of column names to use as the header.
:param row: List of data to write as a row in the CSV file.
"""
file_exists = os.path.exists(filename)

with open(
filename, mode="a" if file_exists else "w", newline="", encoding="utf8"
) as file:
writer = csv.writer(file)

# Write the header if the file is being created
if not file_exists:
writer.writerow(header)
print(f"File '{filename}' created with header: {header}")

# Write the row
writer.writerow(row)
print(f"Row written to '{filename}': {row}")


def main():
# Handle dynamic nodes, ask for the name of the node in the dataflow, and the same values as the ENV variables.
"""TODO: Add docstring."""
parser = argparse.ArgumentParser(description="Simple arrow sender")

parser.add_argument(
"--name",
type=str,
required=False,
help="The name of the node in the dataflow.",
default="pyarrow-sender",
)
parser.add_argument(
"--data",
type=str,
required=False,
help="Arrow Data as string.",
default=None,
)

args = parser.parse_args()

data = os.getenv("DATA", args.data)

node = Node(
args.name,
) # provide the name to connect to the dataflow if dynamic node
name = node.dataflow_descriptor()["nodes"][1]["path"]

if data is None:
raise ValueError(
"No data provided. Please specify `DATA` environment argument or as `--data` argument",
)
try:
data = ast.literal_eval(data)
except Exception: # noqa
print("Passing input as string")

if isinstance(data, (str, int, float)):
data = pa.array([data])
else:
data = pa.array(data) # initialize pyarrow array

cat = get_cat_image()
durations = []
speed = []
for _ in range(10):
node.send_output(
"image",
pa.array(cat.ravel()),
{"encoding": "rgb8", "width": cat.shape[1], "height": cat.shape[0]},
)
time.sleep(0.1)
start_time = time.time()
node.send_output("text", data)
event = node.next()
duration = time.time() - start_time
if event is not None and event["type"] == "INPUT":
text = event["value"][0].as_py()
tokens = event["metadata"].get("tokens", 6)
assert (
"this is a cat" in text.lower()
), f"Expected 'This is a cat', got {text}"
durations.append(duration)
speed.append(tokens / duration)
time.sleep(0.1)
durations = np.array(durations)
speed = np.array(speed)
print(
f"\nAverage duration: {sum(durations) / len(durations)}"
+ f"\nMax duration: {max(durations)}"
+ f"\nMin duration: {min(durations)}"
+ f"\nMedian duration: {np.median(durations)}"
+ f"\nMedian frequency: {1/np.median(durations)}"
+ f"\nAverage speed: {sum(speed) / len(speed)}"
+ f"\nMax speed: {max(speed)}"
+ f"\nMin speed: {min(speed)}"
+ f"\nMedian speed: {np.median(speed)}"
+ f"\nTotal tokens: {tokens}"
)
write_to_csv(
"benchmark.csv",
[
"path",
"date",
"average_duration(s)",
"max_duration(s)",
"min_duration(s)",
"median_duration(s)",
"median_frequency(Hz)",
"average_speed(tok/s)",
"max_speed(tok/s)",
"min_speed(tok/s)",
"median_speed(tok/s)",
"total_tokens",
],
[
name,
time.strftime("%Y-%m-%d %H:%M:%S"),
sum(durations) / len(durations),
max(durations),
min(durations),
np.median(durations),
1 / np.median(durations),
sum(speed) / len(speed),
max(speed),
min(speed),
np.median(speed),
tokens,
],
)


if __name__ == "__main__":
main()

Loading…
Cancel
Save