|
- # Copyright 2021 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- import os
- import pytest
-
- from tests.st.model_zoo_tests import utils
-
-
- @pytest.mark.level1
- @pytest.mark.platform_x86_ascend_training
- @pytest.mark.platform_arm_ascend_training
- @pytest.mark.env_single
- def test_center_net():
- cur_path = os.path.dirname(os.path.abspath(__file__))
- model_path = "{}/../../../../tests/models/research/cv".format(cur_path)
- model_name = "centernet"
- utils.copy_files(model_path, cur_path, model_name)
- cur_model_path = os.path.join(cur_path, model_name)
- old_list = ['new_repeat_count, dataset', 'args_opt.data_sink_steps']
- new_list = ['5, dataset', '20']
- utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
- old_list = ["device_ips = {}", "device_ip.strip()",
- "rank_size = 0", "this_server = server",
- "this_server\\[\\\"device\\\"\\]",
- "instance\\[\\\"device_id\\\"\\]"]
- new_list = ["device_ips = {}\\n '''", "device_ip.strip()\\n '''",
- "rank_size = 8\\n this_server = hccl_config[\\\"group_list\\\"][0]\\n '''",
- "this_server = server\\n '''",
- "this_server[\\\"instance_list\\\"]",
- "instance[\\\"devices\\\"][0][\\\"device_id\\\"]"]
- generator_cmd_file = "scripts/ascend_distributed_launcher/get_distribute_train_cmd.py"
- utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, generator_cmd_file))
- dataset_path = os.path.join(utils.data_root, "coco/coco2017/mindrecord_train/centernet_mindrecord")
- exec_network_shell = "cd centernet; bash scripts/run_distributed_train_ascend.sh {0} {1}"\
- .format(dataset_path, utils.rank_table_path)
- os.system(exec_network_shell)
- cmd = "ps -ef |grep train.py | grep coco | grep -v grep"
- ret = utils.process_check(120, cmd)
- assert ret
- log_file = os.path.join(cur_model_path, "LOG{}/training_log.txt")
- for i in range(8):
- per_step_time = utils.get_perf_data(log_file.format(i))
- assert per_step_time < 435
- loss_list = []
- for i in range(8):
- loss_cmd = "grep -nr \"outputs are\" {} | awk '{{print $14}}' | awk -F\")\" '{{print $1}}'"\
- .format(log_file.format(i))
- loss = utils.get_loss_data_list(log_file.format(i), cmd=loss_cmd)
- loss_list.append(loss[-1])
- assert sum(loss_list) / len(loss_list) < 58.8
|