You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

base_model.py 1.0 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. class BaseModel(object):
  2. """base model for all models"""
  3. def __init__(self):
  4. pass
  5. def prepare_input(self, data):
  6. raise NotImplementedError
  7. def mode(self, test=False):
  8. raise NotImplementedError
  9. def data_forward(self, x):
  10. raise NotImplementedError
  11. def grad_backward(self):
  12. raise NotImplementedError
  13. def loss(self, pred, truth):
  14. raise NotImplementedError
  15. class Vocabulary(object):
  16. """
  17. A collection of lookup tables.
  18. """
  19. def __init__(self):
  20. self.word_set = None
  21. self.word2idx = None
  22. self.emb_matrix = None
  23. def lookup(self, word):
  24. if word in self.word_set:
  25. return self.emb_matrix[self.word2idx[word]]
  26. return LookupError("The key " + word + " does not exist.")
  27. class Document(object):
  28. """
  29. contains a sequence of tokens
  30. each token is a character with linguistic attributes
  31. """
  32. def __init__(self):
  33. # wrap pandas.dataframe
  34. self.dataframe = None

一款轻量级的自然语言处理(NLP)工具包,目标是减少用户项目中的工程型代码,例如数据处理循环、训练循环、多卡运行等