You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

fast_text.h 2.1 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. /**
  2. * Copyright 2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_
  18. #include <memory>
  19. #include <string>
  20. #include <unordered_map>
  21. #include <utility>
  22. #include <vector>
  23. #include "minddata/dataset/core/tensor.h"
  24. #include "minddata/dataset/include/dataset/iterator.h"
  25. #include "minddata/dataset/text/vectors.h"
  26. #include "minddata/dataset/util/path.h"
  27. namespace mindspore {
  28. namespace dataset {
  29. /// \brief Pre-train word vectors.
  30. class FastText : public Vectors {
  31. public:
  32. /// Constructor.
  33. FastText() = default;
  34. /// Constructor.
  35. /// \param[in] map A map between string and vector.
  36. /// \param[in] dim Dimension of the vectors.
  37. FastText(const std::unordered_map<std::string, std::vector<float>> &map, int dim);
  38. /// Destructor.
  39. ~FastText() = default;
  40. /// \brief Build Vectors from reading a pre-train vector file.
  41. /// \param[out] fast_text FastText object which contains the pre-train vectors.
  42. /// \param[in] path Path to the pre-trained word vector file. The suffix of set must be `*.vec`.
  43. /// \param[in] max_vectors This can be used to limit the number of pre-trained vectors loaded (default=0, no limit).
  44. static Status BuildFromFile(std::shared_ptr<FastText> *fast_text, const std::string &path, int32_t max_vectors = 0);
  45. };
  46. } // namespace dataset
  47. } // namespace mindspore
  48. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_FAST_TEXT_H_