Browse Source

adjust model zoo utils

tags/v0.6.0-beta
liyong jonyguo 5 years ago
parent
commit
748e07eb9e
69 changed files with 69 additions and 69 deletions
  1. +0
    -40
      example/nlp_to_mindrecord/CLUERNER2020/run.sh
  2. +1
    -1
      model_zoo/gat/scripts/run_process_data.sh
  3. +1
    -1
      model_zoo/gcn/scripts/run_process_data.sh
  4. +0
    -0
      model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/README.md
  5. +0
    -0
      model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/create_dataset.py
  6. +0
    -0
      model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/data/README.md
  7. +0
    -0
      model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/gen_mindrecord.py
  8. +0
    -0
      model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/output/README.md
  9. +0
    -0
      model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/run.sh
  10. +0
    -0
      model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/run_read.sh
  11. +0
    -0
      model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/README.md
  12. +0
    -0
      model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/__init__.py
  13. +0
    -0
      model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py
  14. +0
    -0
      model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/run_imagenet.sh
  15. +0
    -0
      model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/run_template.sh
  16. +0
    -0
      model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/template/__init__.py
  17. +0
    -0
      model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/template/mr_api.py
  18. +0
    -0
      model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/writer.py
  19. +0
    -0
      model_zoo/utils/cv_to_mindrecord/README.md
  20. +0
    -0
      model_zoo/utils/graph_to_mindrecord/README.md
  21. +0
    -0
      model_zoo/utils/graph_to_mindrecord/citeseer/__init__.py
  22. +0
    -0
      model_zoo/utils/graph_to_mindrecord/citeseer/mr_api.py
  23. +0
    -0
      model_zoo/utils/graph_to_mindrecord/cora/__init__.py
  24. +0
    -0
      model_zoo/utils/graph_to_mindrecord/cora/mr_api.py
  25. +0
    -0
      model_zoo/utils/graph_to_mindrecord/graph_map_schema.py
  26. +0
    -0
      model_zoo/utils/graph_to_mindrecord/read_citeseer.sh
  27. +0
    -0
      model_zoo/utils/graph_to_mindrecord/read_cora.sh
  28. +0
    -0
      model_zoo/utils/graph_to_mindrecord/reader.py
  29. +0
    -0
      model_zoo/utils/graph_to_mindrecord/sns/__init__.py
  30. +0
    -0
      model_zoo/utils/graph_to_mindrecord/sns/mr_api.py
  31. +0
    -0
      model_zoo/utils/graph_to_mindrecord/write_citeseer.sh
  32. +0
    -0
      model_zoo/utils/graph_to_mindrecord/write_cora.sh
  33. +0
    -0
      model_zoo/utils/graph_to_mindrecord/write_sns.sh
  34. +0
    -0
      model_zoo/utils/graph_to_mindrecord/writer.py
  35. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/README.md
  36. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/create_dataset.py
  37. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/data/.gitignore
  38. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/data/README.md
  39. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/output/README.md
  40. +40
    -0
      model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/run.sh
  41. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/run_read.sh
  42. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/README.md
  43. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb/README.md
  44. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb/create_dataset.py
  45. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb/data/README.md
  46. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb/gen_mindrecord.py
  47. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb/output/README.md
  48. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb/run.sh
  49. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb/run_read.sh
  50. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/README.md
  51. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/create_dataset.py
  52. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/data/README.md
  53. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/gen_mindrecord.py
  54. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/output/README.md
  55. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/run.sh
  56. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/run_read.sh
  57. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/enwiki/README.md
  58. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/enwiki/create_dataset.py
  59. +7
    -7
      model_zoo/utils/nlp_to_mindrecord/enwiki/run.sh
  60. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/enwiki/run_read.sh
  61. +1
    -1
      model_zoo/utils/nlp_to_mindrecord/zhwiki/README.md
  62. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/zhwiki/create_dataset.py
  63. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/zhwiki/data/.gitignore
  64. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/zhwiki/data/README.md
  65. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/zhwiki/output/README.md
  66. +8
    -8
      model_zoo/utils/nlp_to_mindrecord/zhwiki/run.sh
  67. +0
    -0
      model_zoo/utils/nlp_to_mindrecord/zhwiki/run_read.sh
  68. +1
    -1
      model_zoo/utils/nlp_to_mindrecord/zhwiki/run_read_simple.sh
  69. +10
    -10
      model_zoo/utils/nlp_to_mindrecord/zhwiki/run_simple.sh

+ 0
- 40
example/nlp_to_mindrecord/CLUERNER2020/run.sh View File

@@ -1,40 +0,0 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

rm -f output/train.mindrecord*
rm -f output/dev.mindrecord*

if [ ! -d "../../../third_party/to_mindrecord/CLUERNER2020" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
exit 1
fi

if [ ! -f "../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
exit 1
fi

# patch for data_processor_seq.py
patch -p0 -d ../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
exit 1
fi

# use patched script
python ../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
--vocab_file=../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
--label2id_file=../../../third_party/to_mindrecord/CLUERNER2020/label2id.json

+ 1
- 1
model_zoo/gat/scripts/run_process_data.sh View File

@@ -42,7 +42,7 @@ MINDRECORD_PATH=`pwd`/data_mr

rm -f $MINDRECORD_PATH/*

cd ../../../example/graph_to_mindrecord || exit
cd ../../utils/graph_to_mindrecord || exit

python writer.py --mindrecord_script $DATASET_NAME \
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \


+ 1
- 1
model_zoo/gcn/scripts/run_process_data.sh View File

@@ -43,7 +43,7 @@ MINDRECORD_PATH=`pwd`/data_mr
rm -f $MINDRECORD_PATH/$DATASET_NAME
rm -f $MINDRECORD_PATH/$DATASET_NAME.db

cd ../../../example/graph_to_mindrecord || exit
cd ../../utils/graph_to_mindrecord || exit

python writer.py --mindrecord_script $DATASET_NAME \
--mindrecord_file "$MINDRECORD_PATH/$DATASET_NAME" \


example/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/README.md → model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/README.md View File


example/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/create_dataset.py → model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/create_dataset.py View File


example/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/data/README.md → model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/data/README.md View File


example/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/gen_mindrecord.py → model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/gen_mindrecord.py View File


example/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/output/README.md → model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/output/README.md View File


example/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/run.sh → model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/run.sh View File


example/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/run_read.sh → model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/run_read.sh View File


example/cv_to_mindrecord/ImageNet_Similar_Perf/README.md → model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/README.md View File


example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/__init__.py → model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/__init__.py View File


example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py → model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py View File


example/cv_to_mindrecord/ImageNet_Similar_Perf/run_imagenet.sh → model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/run_imagenet.sh View File


example/cv_to_mindrecord/ImageNet_Similar_Perf/run_template.sh → model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/run_template.sh View File


example/cv_to_mindrecord/ImageNet_Similar_Perf/template/__init__.py → model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/template/__init__.py View File


example/cv_to_mindrecord/ImageNet_Similar_Perf/template/mr_api.py → model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/template/mr_api.py View File


example/cv_to_mindrecord/ImageNet_Similar_Perf/writer.py → model_zoo/utils/cv_to_mindrecord/ImageNet_Similar_Perf/writer.py View File


example/cv_to_mindrecord/README.md → model_zoo/utils/cv_to_mindrecord/README.md View File


example/graph_to_mindrecord/README.md → model_zoo/utils/graph_to_mindrecord/README.md View File


example/graph_to_mindrecord/citeseer/__init__.py → model_zoo/utils/graph_to_mindrecord/citeseer/__init__.py View File


example/graph_to_mindrecord/citeseer/mr_api.py → model_zoo/utils/graph_to_mindrecord/citeseer/mr_api.py View File


example/graph_to_mindrecord/cora/__init__.py → model_zoo/utils/graph_to_mindrecord/cora/__init__.py View File


example/graph_to_mindrecord/cora/mr_api.py → model_zoo/utils/graph_to_mindrecord/cora/mr_api.py View File


example/graph_to_mindrecord/graph_map_schema.py → model_zoo/utils/graph_to_mindrecord/graph_map_schema.py View File


example/graph_to_mindrecord/read_citeseer.sh → model_zoo/utils/graph_to_mindrecord/read_citeseer.sh View File


example/graph_to_mindrecord/read_cora.sh → model_zoo/utils/graph_to_mindrecord/read_cora.sh View File


example/graph_to_mindrecord/reader.py → model_zoo/utils/graph_to_mindrecord/reader.py View File


example/graph_to_mindrecord/sns/__init__.py → model_zoo/utils/graph_to_mindrecord/sns/__init__.py View File


example/graph_to_mindrecord/sns/mr_api.py → model_zoo/utils/graph_to_mindrecord/sns/mr_api.py View File


example/graph_to_mindrecord/write_citeseer.sh → model_zoo/utils/graph_to_mindrecord/write_citeseer.sh View File


example/graph_to_mindrecord/write_cora.sh → model_zoo/utils/graph_to_mindrecord/write_cora.sh View File


example/graph_to_mindrecord/write_sns.sh → model_zoo/utils/graph_to_mindrecord/write_sns.sh View File


example/graph_to_mindrecord/writer.py → model_zoo/utils/graph_to_mindrecord/writer.py View File


example/nlp_to_mindrecord/CLUERNER2020/README.md → model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/README.md View File


example/nlp_to_mindrecord/CLUERNER2020/create_dataset.py → model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/create_dataset.py View File


example/nlp_to_mindrecord/CLUERNER2020/data/.gitignore → model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/data/.gitignore View File


example/nlp_to_mindrecord/CLUERNER2020/data/README.md → model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/data/README.md View File


example/nlp_to_mindrecord/CLUERNER2020/output/README.md → model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/output/README.md View File


+ 40
- 0
model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/run.sh View File

@@ -0,0 +1,40 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

rm -f output/train.mindrecord*
rm -f output/dev.mindrecord*

if [ ! -d "../../../../third_party/to_mindrecord/CLUERNER2020" ]; then
echo "The patch base dir ../../../../third_party/to_mindrecord/CLUERNER2020 is not exist."
exit 1
fi

if [ ! -f "../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch" ]; then
echo "The patch file ../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch is not exist."
exit 1
fi

# patch for data_processor_seq.py
patch -p0 -d ../../../../third_party/to_mindrecord/CLUERNER2020/ -o data_processor_seq_patched.py < ../../../../third_party/patch/to_mindrecord/CLUERNER2020/data_processor_seq.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq.py failed"
exit 1
fi

# use patched script
python ../../../../third_party/to_mindrecord/CLUERNER2020/data_processor_seq_patched.py \
--vocab_file=../../../../third_party/to_mindrecord/CLUERNER2020/vocab.txt \
--label2id_file=../../../../third_party/to_mindrecord/CLUERNER2020/label2id.json

example/nlp_to_mindrecord/CLUERNER2020/run_read.sh → model_zoo/utils/nlp_to_mindrecord/CLUERNER2020/run_read.sh View File


example/nlp_to_mindrecord/README.md → model_zoo/utils/nlp_to_mindrecord/README.md View File


example/nlp_to_mindrecord/aclImdb/README.md → model_zoo/utils/nlp_to_mindrecord/aclImdb/README.md View File


example/nlp_to_mindrecord/aclImdb/create_dataset.py → model_zoo/utils/nlp_to_mindrecord/aclImdb/create_dataset.py View File


example/nlp_to_mindrecord/aclImdb/data/README.md → model_zoo/utils/nlp_to_mindrecord/aclImdb/data/README.md View File


example/nlp_to_mindrecord/aclImdb/gen_mindrecord.py → model_zoo/utils/nlp_to_mindrecord/aclImdb/gen_mindrecord.py View File


example/nlp_to_mindrecord/aclImdb/output/README.md → model_zoo/utils/nlp_to_mindrecord/aclImdb/output/README.md View File


example/nlp_to_mindrecord/aclImdb/run.sh → model_zoo/utils/nlp_to_mindrecord/aclImdb/run.sh View File


example/nlp_to_mindrecord/aclImdb/run_read.sh → model_zoo/utils/nlp_to_mindrecord/aclImdb/run_read.sh View File


example/nlp_to_mindrecord/aclImdb_preprocess/README.md → model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/README.md View File


example/nlp_to_mindrecord/aclImdb_preprocess/create_dataset.py → model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/create_dataset.py View File


example/nlp_to_mindrecord/aclImdb_preprocess/data/README.md → model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/data/README.md View File


example/nlp_to_mindrecord/aclImdb_preprocess/gen_mindrecord.py → model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/gen_mindrecord.py View File


example/nlp_to_mindrecord/aclImdb_preprocess/output/README.md → model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/output/README.md View File


example/nlp_to_mindrecord/aclImdb_preprocess/run.sh → model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/run.sh View File


example/nlp_to_mindrecord/aclImdb_preprocess/run_read.sh → model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/run_read.sh View File


example/nlp_to_mindrecord/enwiki/README.md → model_zoo/utils/nlp_to_mindrecord/enwiki/README.md View File


example/nlp_to_mindrecord/enwiki/create_dataset.py → model_zoo/utils/nlp_to_mindrecord/enwiki/create_dataset.py View File


example/nlp_to_mindrecord/enwiki/run.sh → model_zoo/utils/nlp_to_mindrecord/enwiki/run.sh View File

@@ -66,20 +66,20 @@ getdir "${data_dir}"
# echo "The input files: "${file_list[@]}
# echo "The output files: "${output_filename[@]}

if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
exit 1
fi

if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
exit 1
fi

# patch for create_pretraining_data.py
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
exit 1
fi

@@ -94,7 +94,7 @@ file_list_len=`expr ${#file_list[*]} - 1`
for index in $(seq 0 $file_list_len); do
echo "Begin preprocess input file: ${file_list[$index]}"
echo "Begin output file: ${output_filename[$index]}"
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=${file_list[$index]} \
--output_file=${output_dir}/${output_filename[$index]} \
--partition_number=1 \

example/nlp_to_mindrecord/enwiki/run_read.sh → model_zoo/utils/nlp_to_mindrecord/enwiki/run_read.sh View File


example/nlp_to_mindrecord/zhwiki/README.md → model_zoo/utils/nlp_to_mindrecord/zhwiki/README.md View File

@@ -26,7 +26,7 @@ This example is based on [zhwiki](https://dumps.wikimedia.org/zhwiki) training d
Follow the step:

```bash
bash run_simple.sh # generate output/simple.mindrecord* by ../../../third_party/to_mindrecord/zhwiki/sample_text.txt
bash run_simple.sh # generate output/simple.mindrecord* by ../../../../third_party/to_mindrecord/zhwiki/sample_text.txt
bash run_read_simple.sh # use MindDataset to read output/simple.mindrecord*
```


example/nlp_to_mindrecord/zhwiki/create_dataset.py → model_zoo/utils/nlp_to_mindrecord/zhwiki/create_dataset.py View File


example/nlp_to_mindrecord/zhwiki/data/.gitignore → model_zoo/utils/nlp_to_mindrecord/zhwiki/data/.gitignore View File


example/nlp_to_mindrecord/zhwiki/data/README.md → model_zoo/utils/nlp_to_mindrecord/zhwiki/data/README.md View File


example/nlp_to_mindrecord/zhwiki/output/README.md → model_zoo/utils/nlp_to_mindrecord/zhwiki/output/README.md View File


example/nlp_to_mindrecord/zhwiki/run.sh → model_zoo/utils/nlp_to_mindrecord/zhwiki/run.sh View File

@@ -45,20 +45,20 @@ getdir "${data_dir}"
# echo "The input files: "${file_list[@]}
# echo "The output files: "${output_filename[@]}

if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
exit 1
fi

if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
exit 1
fi

# patch for create_pretraining_data.py
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
exit 1
fi

@@ -73,11 +73,11 @@ file_list_len=`expr ${#file_list[*]} - 1`
for index in $(seq 0 $file_list_len); do
echo "Begin preprocess input file: ${file_list[$index]}"
echo "Begin output file: ${output_filename[$index]}"
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=${file_list[$index]} \
--output_file=output/${output_filename[$index]} \
--partition_number=1 \
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--vocab_file=../../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--do_lower_case=True \
--max_seq_length=128 \
--max_predictions_per_seq=20 \

example/nlp_to_mindrecord/zhwiki/run_read.sh → model_zoo/utils/nlp_to_mindrecord/zhwiki/run_read.sh View File


example/nlp_to_mindrecord/zhwiki/run_read_simple.sh → model_zoo/utils/nlp_to_mindrecord/zhwiki/run_read_simple.sh View File

@@ -15,4 +15,4 @@
# ============================================================================

# create dataset for train
python create_dataset.py --input_file=output/simple.mindrecord0
python create_dataset.py --input_file=output/simple.mindrecord

example/nlp_to_mindrecord/zhwiki/run_simple.sh → model_zoo/utils/nlp_to_mindrecord/zhwiki/run_simple.sh View File

@@ -16,29 +16,29 @@

rm -f output/simple.mindrecord*

if [ ! -d "../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../third_party/to_mindrecord/zhwiki is not exist."
if [ ! -d "../../../../third_party/to_mindrecord/zhwiki" ]; then
echo "The patch base dir ../../../../third_party/to_mindrecord/zhwiki is not exist."
exit 1
fi

if [ ! -f "../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
if [ ! -f "../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch" ]; then
echo "The patch file ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch is not exist."
exit 1
fi

# patch for create_pretraining_data.py
patch -p0 -d ../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
patch -p0 -d ../../../../third_party/to_mindrecord/zhwiki/ -o create_pretraining_data_patched.py < ../../../../third_party/patch/to_mindrecord/zhwiki/create_pretraining_data.patch
if [ $? -ne 0 ]; then
echo "Patch ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
echo "Patch ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data.py failed"
exit 1
fi

# using patched script to generate mindrecord
python ../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
python ../../../../third_party/to_mindrecord/zhwiki/create_pretraining_data_patched.py \
--input_file=../../../../third_party/to_mindrecord/zhwiki/sample_text.txt \
--output_file=output/simple.mindrecord \
--partition_number=4 \
--vocab_file=../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--partition_number=1 \
--vocab_file=../../../../third_party/to_mindrecord/zhwiki/vocab.txt \
--do_lower_case=True \
--max_seq_length=128 \
--max_predictions_per_seq=20 \

Loading…
Cancel
Save