Browse Source

add general_chars.txt

pull/15491/head
panfengfeng 4 years ago
parent
commit
fc56a0cdb8
4 changed files with 9658 additions and 10 deletions
  1. +9640
    -0
      model_zoo/official/cv/crnn_seq2seq_ocr/general_chars.txt
  2. +1
    -0
      model_zoo/official/cv/crnn_seq2seq_ocr/scripts/run_eval_ascend.sh
  3. +1
    -0
      model_zoo/official/cv/crnn_seq2seq_ocr/src/config.py
  4. +16
    -10
      model_zoo/official/cv/crnn_seq2seq_ocr/src/create_mindrecord_files.py

+ 9640
- 0
model_zoo/official/cv/crnn_seq2seq_ocr/general_chars.txt
File diff suppressed because it is too large
View File


+ 1
- 0
model_zoo/official/cv/crnn_seq2seq_ocr/scripts/run_eval_ascend.sh View File

@@ -55,6 +55,7 @@ then
fi fi
mkdir ./eval mkdir ./eval
cp ../*.py ./eval cp ../*.py ./eval
cp ../*.txt ./eval
cp *.sh ./eval cp *.sh ./eval
cp -r ../src ./eval cp -r ../src ./eval
cd ./eval || exit cd ./eval || exit


+ 1
- 0
model_zoo/official/cv/crnn_seq2seq_ocr/src/config.py View File

@@ -28,6 +28,7 @@ config = ed({
"val_annotation_file": "", "val_annotation_file": "",
"data_json": "", "data_json": "",


"go_shift": 1,
"characters_dictionary": {"pad_id": 0, "go_id": 1, "eos_id": 2, "unk_id": 3}, "characters_dictionary": {"pad_id": 0, "go_id": 1, "eos_id": 2, "unk_id": 3},
"labels_not_use": [u'%#�?%', u'%#背景#%', u'%#不识�?%', u'#%不识�?#', u'%#模糊#%', u'%#模糊#%'], "labels_not_use": [u'%#�?%', u'%#背景#%', u'%#不识�?%', u'#%不识�?#', u'%#模糊#%', u'%#模糊#%'],
"vocab_path": "./general_chars.txt", "vocab_path": "./general_chars.txt",


+ 16
- 10
model_zoo/official/cv/crnn_seq2seq_ocr/src/create_mindrecord_files.py View File

@@ -19,8 +19,8 @@ import numpy as np


from mindspore.mindrecord import FileWriter from mindspore.mindrecord import FileWriter


from src.config import config
from src.utils import initialize_vocabulary
from config import config
from utils import initialize_vocabulary




def serialize_annotation(img_path, lex, vocab): def serialize_annotation(img_path, lex, vocab):
@@ -82,7 +82,7 @@ def create_fsns_label(image_dir, anno_file_dirs):


def fsns_train_data_to_mindrecord(mindrecord_dir, prefix="data_ocr.mindrecord", file_num=8): def fsns_train_data_to_mindrecord(mindrecord_dir, prefix="data_ocr.mindrecord", file_num=8):


anno_file_dirs = [config.train_annotation_file]
anno_file_dirs = [config.annotation_file]
images, image_path_dict, image_anno_dict = create_fsns_label(image_dir=config.data_root, images, image_path_dict, image_anno_dict = create_fsns_label(image_dir=config.data_root,
anno_file_dirs=anno_file_dirs) anno_file_dirs=anno_file_dirs)
vocab, _ = initialize_vocabulary(config.vocab_path) vocab, _ = initialize_vocabulary(config.vocab_path)
@@ -104,8 +104,8 @@ def fsns_train_data_to_mindrecord(mindrecord_dir, prefix="data_ocr.mindrecord",
image_path = image_path_dict[img_id] image_path = image_path_dict[img_id]
annotation = image_anno_dict[img_id] annotation = image_anno_dict[img_id]


label_max_len = config.max_text_len
text_max_len = config.max_text_len - 2
label_max_len = config.max_length
text_max_len = config.max_length - 2


if len(annotation) > text_max_len: if len(annotation) > text_max_len:
continue continue
@@ -151,8 +151,8 @@ def fsns_train_data_to_mindrecord(mindrecord_dir, prefix="data_ocr.mindrecord",


def fsns_val_data_to_mindrecord(mindrecord_dir, prefix="data_ocr.mindrecord", file_num=8): def fsns_val_data_to_mindrecord(mindrecord_dir, prefix="data_ocr.mindrecord", file_num=8):


anno_file_dirs = [config.train_annotation_file]
images, image_path_dict, image_anno_dict = create_fsns_label(image_dir=config.data_root,
anno_file_dirs = [config.val_annotation_file]
images, image_path_dict, image_anno_dict = create_fsns_label(image_dir=config.val_data_root,
anno_file_dirs=anno_file_dirs) anno_file_dirs=anno_file_dirs)
vocab, _ = initialize_vocabulary(config.vocab_path) vocab, _ = initialize_vocabulary(config.vocab_path)


@@ -171,8 +171,8 @@ def fsns_val_data_to_mindrecord(mindrecord_dir, prefix="data_ocr.mindrecord", fi
image_path = image_path_dict[img_id] image_path = image_path_dict[img_id]
annotation = image_anno_dict[img_id] annotation = image_anno_dict[img_id]


label_max_len = config.max_text_len
text_max_len = config.max_text_len - 2
label_max_len = config.max_length
text_max_len = config.max_length - 2


if len(annotation) > text_max_len: if len(annotation) > text_max_len:
continue continue
@@ -226,7 +226,7 @@ def create_mindrecord(dataset="fsns", prefix="fsns.mindrecord", is_training=True
print("{} dataset is not defined!".format(dataset)) print("{} dataset is not defined!".format(dataset))


if not is_training: if not is_training:
mindrecord_dir = os.path.join(config.mindrecord_dir, "val")
mindrecord_dir = os.path.join(config.mindrecord_dir, "test")
mindrecord_files = [os.path.join(mindrecord_dir, prefix + "0")] mindrecord_files = [os.path.join(mindrecord_dir, prefix + "0")]


if not os.path.exists(mindrecord_files[0]): if not os.path.exists(mindrecord_files[0]):
@@ -243,3 +243,9 @@ def create_mindrecord(dataset="fsns", prefix="fsns.mindrecord", is_training=True
print("{} dataset is not defined!".format(dataset)) print("{} dataset is not defined!".format(dataset))


return mindrecord_files return mindrecord_files


if __name__ == '__main__':
create_mindrecord(is_training=True)
create_mindrecord(is_training=False)
print("END")

Loading…
Cancel
Save