You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

text.h 2.2 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_API_TEXT_H_
  17. #define MINDSPORE_CCSRC_MINDDATA_DATASET_API_TEXT_H_
  18. #include <vector>
  19. #include <memory>
  20. #include <string>
  21. #include "minddata/dataset/core/constants.h"
  22. #include "minddata/dataset/include/transforms.h"
  23. #include "minddata/dataset/text/vocab.h"
  24. namespace mindspore {
  25. namespace dataset {
  26. namespace api {
  27. // Transform operations for text
  28. namespace text {
  29. // Text Op classes (in alphabetical order)
  30. class LookupOperation;
  31. /// \brief Lookup operator that looks up a word to an id.
  32. /// \param[in] vocab a Vocab object.
  33. /// \param[in] unknown_token word to use for lookup if the word being looked up is out of Vocabulary (oov).
  34. /// If unknown_token is oov, runtime error will be thrown
  35. /// \return Shared pointer to the current TensorOperation.
  36. std::shared_ptr<LookupOperation> Lookup(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token);
  37. /* ####################################### Derived TensorOperation classes ################################# */
  38. class LookupOperation : public TensorOperation {
  39. public:
  40. explicit LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token);
  41. ~LookupOperation() = default;
  42. std::shared_ptr<TensorOp> Build() override;
  43. bool ValidateParams() override;
  44. private:
  45. std::shared_ptr<Vocab> vocab_;
  46. std::string unknown_token_;
  47. int32_t default_id_;
  48. };
  49. } // namespace text
  50. } // namespace api
  51. } // namespace dataset
  52. } // namespace mindspore
  53. #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_API_TEXT_H_