diff --git a/benchs/llms/.gitignore b/benches/llms/.gitignore similarity index 100% rename from benchs/llms/.gitignore rename to benches/llms/.gitignore diff --git a/benchs/llms/README.md b/benches/llms/README.md similarity index 100% rename from benchs/llms/README.md rename to benches/llms/README.md diff --git a/benchs/llms/llama_cpp_python.yaml b/benches/llms/llama_cpp_python.yaml similarity index 93% rename from benchs/llms/llama_cpp_python.yaml rename to benches/llms/llama_cpp_python.yaml index 08b5eace..888d62d2 100644 --- a/benchs/llms/llama_cpp_python.yaml +++ b/benches/llms/llama_cpp_python.yaml @@ -1,10 +1,10 @@ nodes: - id: benchmark_script - path: benchmark_script.py + path: ../mllm/benchmark_script.py inputs: text: llm/text outputs: - - data + - text env: DATA: "Please only generate the following output: This is a test" diff --git a/benchs/llms/phi4.yaml b/benches/llms/phi4.yaml similarity index 87% rename from benchs/llms/phi4.yaml rename to benches/llms/phi4.yaml index 600b9d93..0551f2c8 100644 --- a/benchs/llms/phi4.yaml +++ b/benches/llms/phi4.yaml @@ -1,10 +1,10 @@ nodes: - id: benchmark_script - path: benchmark_script.py + path: ../mllm/benchmark_script.py inputs: text: llm/text outputs: - - data + - text env: DATA: "Please only generate the following output: This is a test" diff --git a/benchs/llms/qwen2.5.yaml b/benches/llms/qwen2.5.yaml similarity index 86% rename from benchs/llms/qwen2.5.yaml rename to benches/llms/qwen2.5.yaml index d47a4745..8a86540b 100644 --- a/benchs/llms/qwen2.5.yaml +++ b/benches/llms/qwen2.5.yaml @@ -1,10 +1,10 @@ nodes: - id: benchmark_script - path: benchmark_script.py + path: ../mllm/benchmark_script.py inputs: text: llm/text outputs: - - data + - text env: DATA: "Please only generate the following output: This is a test" diff --git a/benchs/llms/transformers.yaml b/benches/llms/transformers.yaml similarity index 89% rename from benchs/llms/transformers.yaml rename to benches/llms/transformers.yaml index 1f115240..3614659b 100644 --- a/benchs/llms/transformers.yaml +++ b/benches/llms/transformers.yaml @@ -1,10 +1,10 @@ nodes: - id: benchmark_script - path: benchmark_script.py + path: ../mllm/benchmark_script.py inputs: text: llm/text outputs: - - data + - text env: DATA: "Please only generate the following output: This is a test" diff --git a/benchs/mllm/.gitignore b/benches/mllm/.gitignore similarity index 100% rename from benchs/mllm/.gitignore rename to benches/mllm/.gitignore diff --git a/benchs/mllm/README.md b/benches/mllm/README.md similarity index 100% rename from benchs/mllm/README.md rename to benches/mllm/README.md diff --git a/benchs/mllm/benchmark_script.py b/benches/mllm/benchmark_script.py similarity index 100% rename from benchs/mllm/benchmark_script.py rename to benches/mllm/benchmark_script.py diff --git a/benchs/mllm/phi4.yaml b/benches/mllm/phi4.yaml similarity index 100% rename from benchs/mllm/phi4.yaml rename to benches/mllm/phi4.yaml diff --git a/benchs/vlm/.gitignore b/benches/vlm/.gitignore similarity index 100% rename from benchs/vlm/.gitignore rename to benches/vlm/.gitignore diff --git a/benchs/vlm/README.md b/benches/vlm/README.md similarity index 100% rename from benchs/vlm/README.md rename to benches/vlm/README.md diff --git a/benchs/vlm/magma.yaml b/benches/vlm/magma.yaml similarity index 90% rename from benchs/vlm/magma.yaml rename to benches/vlm/magma.yaml index f1fd5792..41d8079e 100644 --- a/benchs/vlm/magma.yaml +++ b/benches/vlm/magma.yaml @@ -1,6 +1,6 @@ nodes: - id: benchmark_script - path: benchmark_script.py + path: ../mllm/benchmark_script.py inputs: text: llm/text outputs: diff --git a/benchs/vlm/phi4.yaml b/benches/vlm/phi4.yaml similarity index 91% rename from benchs/vlm/phi4.yaml rename to benches/vlm/phi4.yaml index 77cda7e3..a8005a59 100644 --- a/benchs/vlm/phi4.yaml +++ b/benches/vlm/phi4.yaml @@ -1,6 +1,6 @@ nodes: - id: benchmark_script - path: benchmark_script.py + path: ../mllm/benchmark_script.py inputs: text: llm/text outputs: diff --git a/benchs/vlm/qwen2.5vl.yaml b/benches/vlm/qwen2.5vl.yaml similarity index 92% rename from benchs/vlm/qwen2.5vl.yaml rename to benches/vlm/qwen2.5vl.yaml index 8a553532..f3bf0e0d 100644 --- a/benchs/vlm/qwen2.5vl.yaml +++ b/benches/vlm/qwen2.5vl.yaml @@ -1,6 +1,6 @@ nodes: - id: benchmark_script - path: benchmark_script.py + path: ../mllm/benchmark_script.py inputs: text: vlm/text outputs: diff --git a/benchs/llms/benchmark_script.py b/benchs/llms/benchmark_script.py deleted file mode 100644 index 98390627..00000000 --- a/benchs/llms/benchmark_script.py +++ /dev/null @@ -1,149 +0,0 @@ -"""TODO: Add docstring.""" - -import argparse -import ast - -# Create an empty csv file with header in the current directory if file does not exist -import csv -import os -import time - -import numpy as np -import pyarrow as pa -from dora import Node - - -def write_to_csv(filename, header, row): - """ - Create a CSV file with a header if it does not exist, and write a row to it. - If the file exists, append the row to the file. - - :param filename: Name of the CSV file. - :param header: List of column names to use as the header. - :param row: List of data to write as a row in the CSV file. - """ - file_exists = os.path.exists(filename) - - with open( - filename, mode="a" if file_exists else "w", newline="", encoding="utf8" - ) as file: - writer = csv.writer(file) - - # Write the header if the file is being created - if not file_exists: - writer.writerow(header) - print(f"File '{filename}' created with header: {header}") - - # Write the row - writer.writerow(row) - print(f"Row written to '{filename}': {row}") - - -def main(): - # Handle dynamic nodes, ask for the name of the node in the dataflow, and the same values as the ENV variables. - """TODO: Add docstring.""" - parser = argparse.ArgumentParser(description="Simple arrow sender") - - parser.add_argument( - "--name", - type=str, - required=False, - help="The name of the node in the dataflow.", - default="pyarrow-sender", - ) - parser.add_argument( - "--data", - type=str, - required=False, - help="Arrow Data as string.", - default=None, - ) - - args = parser.parse_args() - - data = os.getenv("DATA", args.data) - - node = Node( - args.name, - ) # provide the name to connect to the dataflow if dynamic node - name = node.dataflow_descriptor()["nodes"][1]["path"] - - if data is None: - raise ValueError( - "No data provided. Please specify `DATA` environment argument or as `--data` argument", - ) - try: - data = ast.literal_eval(data) - except Exception: # noqa - print("Passing input as string") - - if isinstance(data, (str, int, float)): - data = pa.array([data]) - else: - data = pa.array(data) # initialize pyarrow array - - durations = [] - speed = [] - for _ in range(10): - start_time = time.time() - node.send_output("data", data) - event = node.next() - duration = time.time() - start_time - if event is not None and event["type"] == "INPUT": - text = event["value"][0].as_py() - tokens = event["metadata"].get("tokens", 6) - assert "this is a test" in text.lower(), ( - f"Expected 'This is a test', got {text}" - ) - durations.append(duration) - speed.append(tokens / duration) - time.sleep(0.1) - durations = np.array(durations) - speed = np.array(speed) - print( - f"\nAverage duration: {sum(durations) / len(durations)}" - + f"\nMax duration: {max(durations)}" - + f"\nMin duration: {min(durations)}" - + f"\nMedian duration: {np.median(durations)}" - + f"\nMedian frequency: {1 / np.median(durations)}" - + f"\nAverage speed: {sum(speed) / len(speed)}" - + f"\nMax speed: {max(speed)}" - + f"\nMin speed: {min(speed)}" - + f"\nMedian speed: {np.median(speed)}" - + f"\nTotal tokens: {tokens}" - ) - write_to_csv( - "benchmark.csv", - [ - "path", - "date", - "average_duration(s)", - "max_duration(s)", - "min_duration(s)", - "median_duration(s)", - "median_frequency(Hz)", - "average_speed(tok/s)", - "max_speed(tok/s)", - "min_speed(tok/s)", - "median_speed(tok/s)", - "total_tokens", - ], - [ - name, - time.strftime("%Y-%m-%d %H:%M:%S"), - sum(durations) / len(durations), - max(durations), - min(durations), - np.median(durations), - 1 / np.median(durations), - sum(speed) / len(speed), - max(speed), - min(speed), - np.median(speed), - tokens, - ], - ) - - -if __name__ == "__main__": - main() diff --git a/benchs/vlm/benchmark_script.py b/benchs/vlm/benchmark_script.py deleted file mode 100644 index fa8e7e76..00000000 --- a/benchs/vlm/benchmark_script.py +++ /dev/null @@ -1,184 +0,0 @@ -"""TODO: Add docstring.""" - -import argparse -import ast - -# Create an empty csv file with header in the current directory if file does not exist -import csv -import os -import time -from io import BytesIO - -import cv2 -import numpy as np -import pyarrow as pa -import requests -from dora import Node -from PIL import Image - -CAT_URL = "https://i.ytimg.com/vi/fzzjgBAaWZw/hqdefault.jpg" - - -def get_cat_image(): - """ - Get a cat image as a numpy array. - - :return: Cat image as a numpy array. - """ - # Fetch the image from the URL - response = requests.get(CAT_URL) - response.raise_for_status() - - # Open the image using PIL - - image = Image.open(BytesIO(response.content)) - # Convert the image to a numpy array - - image_array = np.array(image) - cv2.resize(image_array, (640, 480)) - # Convert RGB to BGR for - - return image_array - - -def write_to_csv(filename, header, row): - """ - Create a CSV file with a header if it does not exist, and write a row to it. - If the file exists, append the row to the file. - - :param filename: Name of the CSV file. - :param header: List of column names to use as the header. - :param row: List of data to write as a row in the CSV file. - """ - file_exists = os.path.exists(filename) - - with open( - filename, mode="a" if file_exists else "w", newline="", encoding="utf8" - ) as file: - writer = csv.writer(file) - - # Write the header if the file is being created - if not file_exists: - writer.writerow(header) - print(f"File '{filename}' created with header: {header}") - - # Write the row - writer.writerow(row) - print(f"Row written to '{filename}': {row}") - - -def main(): - # Handle dynamic nodes, ask for the name of the node in the dataflow, and the same values as the ENV variables. - """TODO: Add docstring.""" - parser = argparse.ArgumentParser(description="Simple arrow sender") - - parser.add_argument( - "--name", - type=str, - required=False, - help="The name of the node in the dataflow.", - default="pyarrow-sender", - ) - parser.add_argument( - "--data", - type=str, - required=False, - help="Arrow Data as string.", - default=None, - ) - - args = parser.parse_args() - - data = os.getenv("DATA", args.data) - - node = Node( - args.name, - ) # provide the name to connect to the dataflow if dynamic node - name = node.dataflow_descriptor()["nodes"][1]["path"] - - if data is None: - raise ValueError( - "No data provided. Please specify `DATA` environment argument or as `--data` argument", - ) - try: - data = ast.literal_eval(data) - except Exception: # noqa - print("Passing input as string") - - if isinstance(data, (str, int, float)): - data = pa.array([data]) - else: - data = pa.array(data) # initialize pyarrow array - - cat = get_cat_image() - durations = [] - speed = [] - for _ in range(10): - node.send_output( - "image", - pa.array(cat.ravel()), - {"encoding": "rgb8", "width": cat.shape[1], "height": cat.shape[0]}, - ) - time.sleep(0.1) - start_time = time.time() - node.send_output("text", data) - event = node.next() - duration = time.time() - start_time - if event is not None and event["type"] == "INPUT": - text = event["value"][0].as_py() - tokens = event["metadata"].get("tokens", 6) - assert ( - "this is a cat" in text.lower() - ), f"Expected 'This is a cat', got {text}" - durations.append(duration) - speed.append(tokens / duration) - time.sleep(0.1) - durations = np.array(durations) - speed = np.array(speed) - print( - f"\nAverage duration: {sum(durations) / len(durations)}" - + f"\nMax duration: {max(durations)}" - + f"\nMin duration: {min(durations)}" - + f"\nMedian duration: {np.median(durations)}" - + f"\nMedian frequency: {1/np.median(durations)}" - + f"\nAverage speed: {sum(speed) / len(speed)}" - + f"\nMax speed: {max(speed)}" - + f"\nMin speed: {min(speed)}" - + f"\nMedian speed: {np.median(speed)}" - + f"\nTotal tokens: {tokens}" - ) - write_to_csv( - "benchmark.csv", - [ - "path", - "date", - "average_duration(s)", - "max_duration(s)", - "min_duration(s)", - "median_duration(s)", - "median_frequency(Hz)", - "average_speed(tok/s)", - "max_speed(tok/s)", - "min_speed(tok/s)", - "median_speed(tok/s)", - "total_tokens", - ], - [ - name, - time.strftime("%Y-%m-%d %H:%M:%S"), - sum(durations) / len(durations), - max(durations), - min(durations), - np.median(durations), - 1 / np.median(durations), - sum(speed) / len(speed), - max(speed), - min(speed), - np.median(speed), - tokens, - ], - ) - - -if __name__ == "__main__": - main()