| @@ -0,0 +1,143 @@ | |||||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| import sys | |||||
| import re | |||||
| import json | |||||
| import os | |||||
| import time | |||||
| import openpyxl as opx | |||||
| def parse_arguments(): | |||||
| print(sys.argv) | |||||
| me_report_path = sys.argv[1] | |||||
| log_path = sys.argv[2] | |||||
| n_iter = sys.argv[3] | |||||
| out = sys.argv[4] | |||||
| assert n_iter.isdigit() | |||||
| return me_report_path, log_path, int(n_iter), out | |||||
| def extract_by_keyword(doc, keyword, pattern): | |||||
| rst = [] | |||||
| for i, s in enumerate(doc): | |||||
| if keyword in s: | |||||
| p = re.findall(pattern, s) | |||||
| print("L%d: extracted %s from '%s'" % (i, p, s.strip())) | |||||
| rst.extend(p) | |||||
| return rst | |||||
| def process_log(fname, log_path, n_iter, keyword, pattern): | |||||
| rnt = {} | |||||
| for i in range(1, 1+n_iter): | |||||
| fname_path = os.path.join(log_path, fname % i) | |||||
| with open(fname_path) as f: | |||||
| print("\nLoading %s" % fname_path) | |||||
| rst = extract_by_keyword(f, keyword, pattern) | |||||
| rnt[fname % i] = rst | |||||
| return rnt | |||||
| def summarize(func): | |||||
| def wrapper(*args, **kwargs): | |||||
| log = func(*args, **kwargs) | |||||
| times = list(log.items()) | |||||
| times.sort(key=lambda x: x[1]) | |||||
| min_file, min_time = times[0] | |||||
| avg = sum(map(lambda x: x[1], times)) / len(times) | |||||
| log["min_time"] = min_time | |||||
| log["min_file"] = min_file | |||||
| log["avg_time"] = avg | |||||
| return log | |||||
| return wrapper | |||||
| @summarize | |||||
| def process_bert_log(log_path, n_iter): | |||||
| fname = "bert%d.log" | |||||
| total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") | |||||
| task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") | |||||
| log = {} | |||||
| for fname in total: | |||||
| log[fname] = float(total[fname][0]) - float(task[fname][0]) | |||||
| return log | |||||
| @summarize | |||||
| def process_resnet_log(log_path, n_iter): | |||||
| fname = "resnet%d.log" | |||||
| total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") | |||||
| task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") | |||||
| log = {} | |||||
| for fname in total: | |||||
| log[fname] = float(total[fname][0]) - float(task[fname][0]) | |||||
| return log | |||||
| @summarize | |||||
| def process_gpt_log(log_path, n_iter): | |||||
| fname = "gpt%d.log" | |||||
| total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") | |||||
| task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") | |||||
| log = {} | |||||
| for fname in total: | |||||
| log[fname] = float(total[fname][0]) - float(task[fname][0]) | |||||
| return log | |||||
| @summarize | |||||
| def process_reid_log(log_path, n_iter): | |||||
| log = {} | |||||
| for i in range(8): | |||||
| fname = "reid_%d_"+str(i)+".log" | |||||
| total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") | |||||
| task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") | |||||
| for fname in total: | |||||
| log[fname] = float(total[fname][0]) - float(task[fname][0]) | |||||
| return log | |||||
| def write_to_me_report(log, me_report_path): | |||||
| wb = opx.load_workbook(me_report_path) | |||||
| sheet = wb["Sheet"] | |||||
| idx = sheet.max_row + 1 | |||||
| date = time.strftime('%m%d', time.localtime()) | |||||
| sheet['A%d' % idx] = date | |||||
| sheet['B%d' % idx] = round(log["reid"]["min_time"], 2) | |||||
| sheet['C%d' % idx] = round(log["bert"]["min_time"], 2) | |||||
| sheet['D%d' % idx] = round(log['resnet']["min_time"], 2) | |||||
| sheet['E%d' % idx] = round(log['gpt']["min_time"], 2) | |||||
| wb.save(me_report_path) | |||||
| def generate_report(): | |||||
| me_report_path, log_path, n_iter, out = parse_arguments() | |||||
| log_data = {} | |||||
| bert_log = process_bert_log(log_path, n_iter) | |||||
| resnet_log = process_resnet_log(log_path, n_iter) | |||||
| gpt_log = process_gpt_log(log_path, n_iter) | |||||
| reid_log = process_reid_log(log_path, n_iter) | |||||
| log_data["bert"] = bert_log | |||||
| log_data["resnet"] = resnet_log | |||||
| log_data["gpt"] = gpt_log | |||||
| log_data["reid"] = reid_log | |||||
| with open(out, "w") as f: | |||||
| json.dump(log_data, f, indent=2) | |||||
| write_to_me_report(log_data, me_report_path) | |||||
| if __name__ == "__main__": | |||||
| generate_report() | |||||
| @@ -0,0 +1,104 @@ | |||||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| import os | |||||
| import sys | |||||
| import json | |||||
| import openpyxl as opx | |||||
| import matplotlib.ticker as ticker | |||||
| import matplotlib.pyplot as plt | |||||
| def parse_arguments(): | |||||
| log_path = sys.argv[1] | |||||
| log_data = sys.argv[2] | |||||
| me_report = sys.argv[3] | |||||
| n_days = sys.argv[4] | |||||
| assert n_days.isdigit() | |||||
| return log_path, log_data, me_report, int(n_days) | |||||
| def read_data(log_data, me_report_path, n_days): | |||||
| with open(log_data) as f: | |||||
| log = json.load(f) | |||||
| wb = opx.load_workbook(me_report_path) | |||||
| sheet = wb["Sheet"] | |||||
| n_row = sheet.max_row | |||||
| date = [cell[0].value for cell in sheet["A2":"A%d" % n_row]] | |||||
| reid_data = [float(cell[0].value) for cell in sheet["B2":"B%d" % n_row]] | |||||
| bert_data = [float(cell[0].value) for cell in sheet["C2":"C%d" % n_row]] | |||||
| resnet_data = [float(cell[0].value) for cell in sheet["D2":"D%d" % n_row]] | |||||
| gpt_data = [float(cell[0].value) for cell in sheet["E43":"E%d" % n_row]] | |||||
| if n_days > 0: | |||||
| date = date[-n_days:] | |||||
| reid_data = reid_data[-n_days:] | |||||
| bert_data = bert_data[-n_days:] | |||||
| resnet_data = resnet_data[-n_days:] | |||||
| gpt_data = gpt_data[-n_days:] | |||||
| return log, date, reid_data, bert_data, resnet_data, gpt_data | |||||
| def draw_figure(x_data, y_data, labels, title, out, height=24, width=8, tick_space=2): | |||||
| print("Generating figure to: %s" % out) | |||||
| plt.figure(figsize=(height, width)) | |||||
| for y, label in zip(y_data, labels): | |||||
| x = x_data[-len(y):] | |||||
| n_data = len(x) | |||||
| assert len(x) == len( | |||||
| y), "assume len(x) == len(y), while %d != %d" % (len(x), len(y)) | |||||
| plt.plot(x, y, linewidth=2, marker='o', markersize=5, label=label) | |||||
| ax = plt.gca() | |||||
| ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_space)) | |||||
| for i in range(n_data): | |||||
| if i % 2 == 0: | |||||
| plt.text(x[i], y[i], y[i], ha='center', | |||||
| va='bottom', fontsize=8) | |||||
| plt.title(title) | |||||
| plt.xlabel("Date") | |||||
| plt.ylabel("Time(s)") | |||||
| plt.grid() | |||||
| plt.legend() | |||||
| plt.savefig(out) | |||||
| def generate_report(log, labels, log_path): | |||||
| for label in labels: | |||||
| fname = log[label]["min_file"] | |||||
| fname_path = os.path.join(log_path, fname) | |||||
| out_path = os.path.join(log_path, "reports", label+"_me.log") | |||||
| print("Generating report to: %s" % out_path) | |||||
| os.system("grep -A 230 'TotalTime = ' %s > %s" % | |||||
| (fname_path, out_path)) | |||||
| def process_data(): | |||||
| log_path, log_data, me_report, n_days = parse_arguments() | |||||
| log, date, reid_data, bert_data, resnet_data, gpt_data = read_data( | |||||
| log_data, me_report, n_days) | |||||
| draw_figure(date, | |||||
| [reid_data, bert_data, gpt_data], | |||||
| ["ReID", "BERT", "GPT"], | |||||
| "ReID&BERT&GPT", | |||||
| os.path.join(log_path, "reports", "reid_bert_gpt.png") | |||||
| ) | |||||
| draw_figure(date, [resnet_data], ["ResNet"], "ResNet", | |||||
| os.path.join(log_path, "reports", "resnet.png")) | |||||
| generate_report(log, list(log.keys()), log_path) | |||||
| if __name__ == "__main__": | |||||
| process_data() | |||||
| @@ -0,0 +1,146 @@ | |||||
| #!/bin/bash | |||||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| stage=0 | |||||
| days=7 | |||||
| iter=5 | |||||
| device_id=0 | |||||
| n_worker=128 | |||||
| work_dir="/opt/npu/me_monitor" | |||||
| me_report_path=$work_dir/logs/ME_report_daily.xlsx | |||||
| log_dir=logs_$(date "+%m%d-%H%M") | |||||
| log_path=$work_dir/logs/$log_dir | |||||
| ms_master="https://gitee.com/mindspore/mindspore.git" | |||||
| log_data="data.json" | |||||
| ci_mode=true | |||||
| set -e | |||||
| set -o pipefail | |||||
| # parse arguments from command line | |||||
| while getopts "s:d:i:l:" args | |||||
| do | |||||
| case $args in | |||||
| s) | |||||
| stage=$OPTARG | |||||
| ;; | |||||
| d) | |||||
| days=$OPTARG | |||||
| ;; | |||||
| i) | |||||
| iter=$OPTARG | |||||
| ;; | |||||
| l) | |||||
| log_dir=$OPTARG | |||||
| log_path=$work_dir/logs/$log_dir | |||||
| ;; | |||||
| ?) | |||||
| echo "unknown argument" | |||||
| exit 1 | |||||
| ;; | |||||
| esac | |||||
| done | |||||
| source env.sh | |||||
| export DEVICE_ID=$device_id | |||||
| echo "Args: days=$days, iter=$iter, log_path=$log_path" | |||||
| cd $work_dir | |||||
| echo $WORKSPACE | |||||
| WORKSPACE=/home/jenkins-slave/workspace/MindSpore_Network_reid_compile_performance | |||||
| echo $WORKSPACE | |||||
| if [ $stage -le 1 ]; then | |||||
| echo "" | |||||
| echo "===========Stage 1: Fetching latest mindspore from master===========" | |||||
| if [ -d mindspore ]; then | |||||
| rm -rf mindspore | |||||
| fi | |||||
| git clone $ms_master | |||||
| fi | |||||
| if [ $stage -le 2 ]; then | |||||
| echo "" | |||||
| echo "===========Stage 2: Building mindspore===========" | |||||
| cd $work_dir/mindspore | |||||
| bash build.sh -e ascend -j $n_worker -p on | |||||
| fi | |||||
| if [ $stage -le 3 ]; then | |||||
| echo "" | |||||
| echo "===========Stage 3: Compiling networks===========" | |||||
| cd $work_dir | |||||
| mkdir -p $log_path | |||||
| # Compiling ReID-8 | |||||
| # split resource-consuming task from others | |||||
| for count in $(seq 1 $iter); do | |||||
| echo "[INFO] Compiling ReID-8p, iteration $count" | |||||
| if [ -d reid$count ]; then | |||||
| rm -rf reid$count | |||||
| fi | |||||
| mkdir reid$count | |||||
| cd reid$count | |||||
| bash $work_dir/faceReidToMe/dist_env/env_26/dist_env_26.sh | |||||
| for num in {0..7}; do | |||||
| cp device_$num/test_reid_stage123_1024node_graphdata_dynamiclossscale_log$num.log $log_path/reid_${count}_${num}.log | |||||
| done | |||||
| cd $work_dir | |||||
| mv reid$count $log_path | |||||
| done | |||||
| # Compiling BERT | |||||
| cd $work_dir | |||||
| for count in $(seq 1 $iter); do | |||||
| echo "[INFO] Compiling BERT, iteration $count" | |||||
| pytest -s mindspore/tests/perf_test/bert/test_bert_train.py::test_bert_train | tee $log_path/bert$count.log | |||||
| done | |||||
| # Compiling ResNet50 | |||||
| for count in $(seq 1 $iter); do | |||||
| echo "[INFO] Compiling ResNet50, iteration $count" | |||||
| pytest -s mindspore/tests/perf_test/test_resnet_train.py::test_train_step | tee $log_path/resnet$count.log | |||||
| done | |||||
| # Compiling GPT | |||||
| for count in $(seq 1 $iter); do | |||||
| echo "[INFO] Compiling GPT, iteration $count" | |||||
| cd gpt | |||||
| bash scripts/run_standalone_train.sh 0 1 $work_dir/gpt_data | tee $log_path/gpt$count.log | |||||
| done | |||||
| fi | |||||
| if [ $stage -le 4 ]; then | |||||
| echo "" | |||||
| echo "===========Stage 4: Processing log files===========" | |||||
| cd $work_dir | |||||
| python process_data.py $me_report_path $log_path $iter $log_path/$log_data | |||||
| fi | |||||
| if [ $stage -le 5 ]; then | |||||
| echo "" | |||||
| echo "===========Stage 5: Generating reports===========" | |||||
| if [ ! -d $log_path/reports ]; then | |||||
| mkdir $log_path/reports | |||||
| fi | |||||
| python generate_report.py $log_path $log_path/$log_data $me_report_path $days | |||||
| if [ $ci_mode ]; then | |||||
| echo "copying file to artifacts" | |||||
| mkdir -p ${WORKSPACE}/archive | |||||
| cp $log_path/reports/* ${WORKSPACE}/archive | |||||
| fi | |||||
| fi | |||||