| @@ -1,4 +1,4 @@ | |||||
| # Dora QwenVL2 node | |||||
| # Dora QwenVL2.5 node | |||||
| Experimental node for using a VLM within dora. | Experimental node for using a VLM within dora. | ||||
| @@ -8,8 +8,8 @@ This node is supposed to be used as follows: | |||||
| ```yaml | ```yaml | ||||
| - id: dora-qwenvl | - id: dora-qwenvl | ||||
| build: pip install dora-qwenvl | |||||
| path: dora-qwenvl | |||||
| build: pip install dora-qwen2-5-vl | |||||
| path: dora-qwen2-5-vl | |||||
| inputs: | inputs: | ||||
| image: | image: | ||||
| source: camera/image | source: camera/image | ||||
| @@ -152,6 +152,7 @@ def main(): | |||||
| ], | ], | ||||
| }, | }, | ||||
| ] | ] | ||||
| cached_text = DEFAULT_QUESTION | |||||
| for event in node: | for event in node: | ||||
| event_type = event["type"] | event_type = event["type"] | ||||
| @@ -198,17 +199,19 @@ def main(): | |||||
| image = Image.fromarray(frame) | image = Image.fromarray(frame) | ||||
| frames[event_id] = image.resize((IMAGE_HEIGHT, IMAGE_WIDTH)) | frames[event_id] = image.resize((IMAGE_HEIGHT, IMAGE_WIDTH)) | ||||
| elif event_id == "text": | |||||
| elif "text" in event_id: | |||||
| if len(event["value"]) > 0: | if len(event["value"]) > 0: | ||||
| text = event["value"][0].as_py() | text = event["value"][0].as_py() | ||||
| else: | else: | ||||
| text = "" | |||||
| text = cached_text | |||||
| words = text.split() | words = text.split() | ||||
| if len(ACTIVATION_WORDS) > 0 and all( | if len(ACTIVATION_WORDS) > 0 and all( | ||||
| word not in ACTIVATION_WORDS for word in words | word not in ACTIVATION_WORDS for word in words | ||||
| ): | ): | ||||
| continue | continue | ||||
| cached_text = text | |||||
| if len(frames.keys()) == 0: | if len(frames.keys()) == 0: | ||||
| continue | continue | ||||
| # set the max number of tiles in `max_num` | # set the max number of tiles in `max_num` | ||||
| @@ -43,8 +43,8 @@ dev = ["pytest >=8.1.1", "ruff >=0.9.1"] | |||||
| transformers = { git = "https://github.com/huggingface/transformers" } | transformers = { git = "https://github.com/huggingface/transformers" } | ||||
| [project.scripts] | [project.scripts] | ||||
| dora-qwenvl = "dora_qwenvl.main:main" | |||||
| dora-qwen2-5-vl = "dora_qwen2_5_vl.main:main" | |||||
| [build-system] | [build-system] | ||||
| requires = ["setuptools", "setuptools-scm", "torch"] | |||||
| requires = ["setuptools", "setuptools-scm"] | |||||
| build-backend = "setuptools.build_meta" | build-backend = "setuptools.build_meta" | ||||
| @@ -40,7 +40,7 @@ Example: | |||||
| ```yaml,diff | ```yaml,diff | ||||
| - model_name_or_path: Qwen/Qwen2-VL-7B-Instruct | - model_name_or_path: Qwen/Qwen2-VL-7B-Instruct | ||||
| + model_name_or_path: Qwen/Qwen2-VL-2B-Instruct | |||||
| + model_name_or_path: Qwen/Qwen2.5-VL-3B-Instruct | |||||
| ``` | ``` | ||||
| - Then | - Then | ||||