You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

main.py 3.3 kB

10 months ago
11 months ago
11 months ago
11 months ago
10 months ago
11 months ago
10 months ago
10 months ago
11 months ago
10 months ago
11 months ago
11 months ago
11 months ago
10 months ago
10 months ago
11 months ago
11 months ago
11 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. """TODO: Add docstring."""
  2. import os
  3. import pyarrow as pa
  4. from dora import Node
  5. from transformers import AutoModelForCausalLM, AutoTokenizer
  6. SYSTEM_PROMPT = os.getenv(
  7. "SYSTEM_PROMPT",
  8. "You're a very succinct AI assistant with short answers.",
  9. )
  10. MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", "Qwen/Qwen2.5-0.5B-Instruct-GGUF")
  11. MODEL_FILE_PATTERN = os.getenv("MODEL_FILE_PATTERN", "*fp16.gguf")
  12. MAX_TOKENS = int(os.getenv("MAX_TOKENS", "512"))
  13. N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
  14. N_THREADS = int(os.getenv("N_THREADS", "4"))
  15. CONTEXT_SIZE = int(os.getenv("CONTEXT_SIZE", "4096"))
  16. def get_model_gguf():
  17. """TODO: Add docstring."""
  18. from llama_cpp import Llama
  19. return Llama.from_pretrained(
  20. repo_id=MODEL_NAME_OR_PATH,
  21. filename=MODEL_FILE_PATTERN,
  22. n_gpu_layers=N_GPU_LAYERS,
  23. n_ctx=CONTEXT_SIZE,
  24. n_threads=N_THREADS,
  25. verbose=False,
  26. )
  27. def get_model_darwin():
  28. """TODO: Add docstring."""
  29. from mlx_lm import load
  30. model, tokenizer = load("mlx-community/Qwen2.5-0.5B-Instruct-8bit")
  31. return model, tokenizer
  32. def get_model_huggingface():
  33. """TODO: Add docstring."""
  34. model_name = "Qwen/Qwen2.5-0.5B-Instruct"
  35. model = AutoModelForCausalLM.from_pretrained(
  36. model_name,
  37. torch_dtype="auto",
  38. device_map="auto",
  39. )
  40. tokenizer = AutoTokenizer.from_pretrained(model_name)
  41. return model, tokenizer
  42. ACTIVATION_WORDS = os.getenv("ACTIVATION_WORDS", "").split()
  43. def generate_hf(model, tokenizer, prompt: str, history) -> str:
  44. """TODO: Add docstring."""
  45. history += [{"role": "user", "content": prompt}]
  46. text = tokenizer.apply_chat_template(
  47. history,
  48. tokenize=False,
  49. add_generation_prompt=True,
  50. )
  51. model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
  52. generated_ids = model.generate(**model_inputs, max_new_tokens=512)
  53. generated_ids = [
  54. output_ids[len(input_ids) :]
  55. for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
  56. ]
  57. response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
  58. history += [{"role": "assistant", "content": response}]
  59. return response, history
  60. def main():
  61. """TODO: Add docstring."""
  62. history = []
  63. # If OS is not Darwin, use Huggingface model
  64. model = get_model_gguf()
  65. node = Node()
  66. for event in node:
  67. if event["type"] == "INPUT":
  68. # Warning: Make sure to add my_output_id and my_input_id within the dataflow.
  69. text = event["value"][0].as_py()
  70. words = text.lower().split()
  71. if len(ACTIVATION_WORDS) == 0 or any(
  72. word in ACTIVATION_WORDS for word in words
  73. ):
  74. history += [{"role": "user", "content": text}]
  75. response = model.create_chat_completion(
  76. messages=history, # Prompt
  77. max_tokens=24,
  78. )["choices"][0]["message"]["content"]
  79. history += [{"role": "assistant", "content": response}]
  80. node.send_output(
  81. output_id="text",
  82. data=pa.array([response]),
  83. metadata={},
  84. )
  85. if __name__ == "__main__":
  86. main()