You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

process_output.py 1.7 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Convert ids to tokens."""
  16. from __future__ import absolute_import
  17. from __future__ import division
  18. from __future__ import print_function
  19. import argparse
  20. import sys
  21. import tokenization
  22. # Explicitly set the encoding
  23. sys.stdin = open(sys.stdin.fileno(), mode='r', encoding='utf-8', buffering=True)
  24. sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf-8', buffering=True)
  25. def main():
  26. parser = argparse.ArgumentParser(
  27. description="recore nbest with smoothed sentence-level bleu.")
  28. parser.add_argument("--vocab_file", type=str, default="", required=True, help="vocab file path.")
  29. args = parser.parse_args()
  30. tokenizer = tokenization.WhiteSpaceTokenizer(vocab_file=args.vocab_file)
  31. for line in sys.stdin:
  32. token_ids = [int(x) for x in line.strip().split()]
  33. tokens = tokenizer.convert_ids_to_tokens(token_ids)
  34. sent = " ".join(tokens)
  35. sent = sent.split("<s>")[-1]
  36. sent = sent.split("</s>")[0]
  37. print(sent.strip())
  38. if __name__ == "__main__":
  39. main()