| @@ -0,0 +1,143 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import sys | |||
| import re | |||
| import json | |||
| import os | |||
| import time | |||
| import openpyxl as opx | |||
| def parse_arguments(): | |||
| print(sys.argv) | |||
| me_report_path = sys.argv[1] | |||
| log_path = sys.argv[2] | |||
| n_iter = sys.argv[3] | |||
| out = sys.argv[4] | |||
| assert n_iter.isdigit() | |||
| return me_report_path, log_path, int(n_iter), out | |||
| def extract_by_keyword(doc, keyword, pattern): | |||
| rst = [] | |||
| for i, s in enumerate(doc): | |||
| if keyword in s: | |||
| p = re.findall(pattern, s) | |||
| print("L%d: extracted %s from '%s'" % (i, p, s.strip())) | |||
| rst.extend(p) | |||
| return rst | |||
| def process_log(fname, log_path, n_iter, keyword, pattern): | |||
| rnt = {} | |||
| for i in range(1, 1+n_iter): | |||
| fname_path = os.path.join(log_path, fname % i) | |||
| with open(fname_path) as f: | |||
| print("\nLoading %s" % fname_path) | |||
| rst = extract_by_keyword(f, keyword, pattern) | |||
| rnt[fname % i] = rst | |||
| return rnt | |||
| def summarize(func): | |||
| def wrapper(*args, **kwargs): | |||
| log = func(*args, **kwargs) | |||
| times = list(log.items()) | |||
| times.sort(key=lambda x: x[1]) | |||
| min_file, min_time = times[0] | |||
| avg = sum(map(lambda x: x[1], times)) / len(times) | |||
| log["min_time"] = min_time | |||
| log["min_file"] = min_file | |||
| log["avg_time"] = avg | |||
| return log | |||
| return wrapper | |||
| @summarize | |||
| def process_bert_log(log_path, n_iter): | |||
| fname = "bert%d.log" | |||
| total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") | |||
| task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") | |||
| log = {} | |||
| for fname in total: | |||
| log[fname] = float(total[fname][0]) - float(task[fname][0]) | |||
| return log | |||
| @summarize | |||
| def process_resnet_log(log_path, n_iter): | |||
| fname = "resnet%d.log" | |||
| total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") | |||
| task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") | |||
| log = {} | |||
| for fname in total: | |||
| log[fname] = float(total[fname][0]) - float(task[fname][0]) | |||
| return log | |||
| @summarize | |||
| def process_gpt_log(log_path, n_iter): | |||
| fname = "gpt%d.log" | |||
| total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") | |||
| task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") | |||
| log = {} | |||
| for fname in total: | |||
| log[fname] = float(total[fname][0]) - float(task[fname][0]) | |||
| return log | |||
| @summarize | |||
| def process_reid_log(log_path, n_iter): | |||
| log = {} | |||
| for i in range(8): | |||
| fname = "reid_%d_"+str(i)+".log" | |||
| total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+") | |||
| task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+") | |||
| for fname in total: | |||
| log[fname] = float(total[fname][0]) - float(task[fname][0]) | |||
| return log | |||
| def write_to_me_report(log, me_report_path): | |||
| wb = opx.load_workbook(me_report_path) | |||
| sheet = wb["Sheet"] | |||
| idx = sheet.max_row + 1 | |||
| date = time.strftime('%m%d', time.localtime()) | |||
| sheet['A%d' % idx] = date | |||
| sheet['B%d' % idx] = round(log["reid"]["min_time"], 2) | |||
| sheet['C%d' % idx] = round(log["bert"]["min_time"], 2) | |||
| sheet['D%d' % idx] = round(log['resnet']["min_time"], 2) | |||
| sheet['E%d' % idx] = round(log['gpt']["min_time"], 2) | |||
| wb.save(me_report_path) | |||
| def generate_report(): | |||
| me_report_path, log_path, n_iter, out = parse_arguments() | |||
| log_data = {} | |||
| bert_log = process_bert_log(log_path, n_iter) | |||
| resnet_log = process_resnet_log(log_path, n_iter) | |||
| gpt_log = process_gpt_log(log_path, n_iter) | |||
| reid_log = process_reid_log(log_path, n_iter) | |||
| log_data["bert"] = bert_log | |||
| log_data["resnet"] = resnet_log | |||
| log_data["gpt"] = gpt_log | |||
| log_data["reid"] = reid_log | |||
| with open(out, "w") as f: | |||
| json.dump(log_data, f, indent=2) | |||
| write_to_me_report(log_data, me_report_path) | |||
| if __name__ == "__main__": | |||
| generate_report() | |||
| @@ -0,0 +1,104 @@ | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| import os | |||
| import sys | |||
| import json | |||
| import openpyxl as opx | |||
| import matplotlib.ticker as ticker | |||
| import matplotlib.pyplot as plt | |||
| def parse_arguments(): | |||
| log_path = sys.argv[1] | |||
| log_data = sys.argv[2] | |||
| me_report = sys.argv[3] | |||
| n_days = sys.argv[4] | |||
| assert n_days.isdigit() | |||
| return log_path, log_data, me_report, int(n_days) | |||
| def read_data(log_data, me_report_path, n_days): | |||
| with open(log_data) as f: | |||
| log = json.load(f) | |||
| wb = opx.load_workbook(me_report_path) | |||
| sheet = wb["Sheet"] | |||
| n_row = sheet.max_row | |||
| date = [cell[0].value for cell in sheet["A2":"A%d" % n_row]] | |||
| reid_data = [float(cell[0].value) for cell in sheet["B2":"B%d" % n_row]] | |||
| bert_data = [float(cell[0].value) for cell in sheet["C2":"C%d" % n_row]] | |||
| resnet_data = [float(cell[0].value) for cell in sheet["D2":"D%d" % n_row]] | |||
| gpt_data = [float(cell[0].value) for cell in sheet["E43":"E%d" % n_row]] | |||
| if n_days > 0: | |||
| date = date[-n_days:] | |||
| reid_data = reid_data[-n_days:] | |||
| bert_data = bert_data[-n_days:] | |||
| resnet_data = resnet_data[-n_days:] | |||
| gpt_data = gpt_data[-n_days:] | |||
| return log, date, reid_data, bert_data, resnet_data, gpt_data | |||
| def draw_figure(x_data, y_data, labels, title, out, height=24, width=8, tick_space=2): | |||
| print("Generating figure to: %s" % out) | |||
| plt.figure(figsize=(height, width)) | |||
| for y, label in zip(y_data, labels): | |||
| x = x_data[-len(y):] | |||
| n_data = len(x) | |||
| assert len(x) == len( | |||
| y), "assume len(x) == len(y), while %d != %d" % (len(x), len(y)) | |||
| plt.plot(x, y, linewidth=2, marker='o', markersize=5, label=label) | |||
| ax = plt.gca() | |||
| ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_space)) | |||
| for i in range(n_data): | |||
| if i % 2 == 0: | |||
| plt.text(x[i], y[i], y[i], ha='center', | |||
| va='bottom', fontsize=8) | |||
| plt.title(title) | |||
| plt.xlabel("Date") | |||
| plt.ylabel("Time(s)") | |||
| plt.grid() | |||
| plt.legend() | |||
| plt.savefig(out) | |||
| def generate_report(log, labels, log_path): | |||
| for label in labels: | |||
| fname = log[label]["min_file"] | |||
| fname_path = os.path.join(log_path, fname) | |||
| out_path = os.path.join(log_path, "reports", label+"_me.log") | |||
| print("Generating report to: %s" % out_path) | |||
| os.system("grep -A 230 'TotalTime = ' %s > %s" % | |||
| (fname_path, out_path)) | |||
| def process_data(): | |||
| log_path, log_data, me_report, n_days = parse_arguments() | |||
| log, date, reid_data, bert_data, resnet_data, gpt_data = read_data( | |||
| log_data, me_report, n_days) | |||
| draw_figure(date, | |||
| [reid_data, bert_data, gpt_data], | |||
| ["ReID", "BERT", "GPT"], | |||
| "ReID&BERT&GPT", | |||
| os.path.join(log_path, "reports", "reid_bert_gpt.png") | |||
| ) | |||
| draw_figure(date, [resnet_data], ["ResNet"], "ResNet", | |||
| os.path.join(log_path, "reports", "resnet.png")) | |||
| generate_report(log, list(log.keys()), log_path) | |||
| if __name__ == "__main__": | |||
| process_data() | |||
| @@ -0,0 +1,146 @@ | |||
| #!/bin/bash | |||
| # Copyright 2021 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| stage=0 | |||
| days=7 | |||
| iter=5 | |||
| device_id=0 | |||
| n_worker=128 | |||
| work_dir="/opt/npu/me_monitor" | |||
| me_report_path=$work_dir/logs/ME_report_daily.xlsx | |||
| log_dir=logs_$(date "+%m%d-%H%M") | |||
| log_path=$work_dir/logs/$log_dir | |||
| ms_master="https://gitee.com/mindspore/mindspore.git" | |||
| log_data="data.json" | |||
| ci_mode=true | |||
| set -e | |||
| set -o pipefail | |||
| # parse arguments from command line | |||
| while getopts "s:d:i:l:" args | |||
| do | |||
| case $args in | |||
| s) | |||
| stage=$OPTARG | |||
| ;; | |||
| d) | |||
| days=$OPTARG | |||
| ;; | |||
| i) | |||
| iter=$OPTARG | |||
| ;; | |||
| l) | |||
| log_dir=$OPTARG | |||
| log_path=$work_dir/logs/$log_dir | |||
| ;; | |||
| ?) | |||
| echo "unknown argument" | |||
| exit 1 | |||
| ;; | |||
| esac | |||
| done | |||
| source env.sh | |||
| export DEVICE_ID=$device_id | |||
| echo "Args: days=$days, iter=$iter, log_path=$log_path" | |||
| cd $work_dir | |||
| echo $WORKSPACE | |||
| WORKSPACE=/home/jenkins-slave/workspace/MindSpore_Network_reid_compile_performance | |||
| echo $WORKSPACE | |||
| if [ $stage -le 1 ]; then | |||
| echo "" | |||
| echo "===========Stage 1: Fetching latest mindspore from master===========" | |||
| if [ -d mindspore ]; then | |||
| rm -rf mindspore | |||
| fi | |||
| git clone $ms_master | |||
| fi | |||
| if [ $stage -le 2 ]; then | |||
| echo "" | |||
| echo "===========Stage 2: Building mindspore===========" | |||
| cd $work_dir/mindspore | |||
| bash build.sh -e ascend -j $n_worker -p on | |||
| fi | |||
| if [ $stage -le 3 ]; then | |||
| echo "" | |||
| echo "===========Stage 3: Compiling networks===========" | |||
| cd $work_dir | |||
| mkdir -p $log_path | |||
| # Compiling ReID-8 | |||
| # split resource-consuming task from others | |||
| for count in $(seq 1 $iter); do | |||
| echo "[INFO] Compiling ReID-8p, iteration $count" | |||
| if [ -d reid$count ]; then | |||
| rm -rf reid$count | |||
| fi | |||
| mkdir reid$count | |||
| cd reid$count | |||
| bash $work_dir/faceReidToMe/dist_env/env_26/dist_env_26.sh | |||
| for num in {0..7}; do | |||
| cp device_$num/test_reid_stage123_1024node_graphdata_dynamiclossscale_log$num.log $log_path/reid_${count}_${num}.log | |||
| done | |||
| cd $work_dir | |||
| mv reid$count $log_path | |||
| done | |||
| # Compiling BERT | |||
| cd $work_dir | |||
| for count in $(seq 1 $iter); do | |||
| echo "[INFO] Compiling BERT, iteration $count" | |||
| pytest -s mindspore/tests/perf_test/bert/test_bert_train.py::test_bert_train | tee $log_path/bert$count.log | |||
| done | |||
| # Compiling ResNet50 | |||
| for count in $(seq 1 $iter); do | |||
| echo "[INFO] Compiling ResNet50, iteration $count" | |||
| pytest -s mindspore/tests/perf_test/test_resnet_train.py::test_train_step | tee $log_path/resnet$count.log | |||
| done | |||
| # Compiling GPT | |||
| for count in $(seq 1 $iter); do | |||
| echo "[INFO] Compiling GPT, iteration $count" | |||
| cd gpt | |||
| bash scripts/run_standalone_train.sh 0 1 $work_dir/gpt_data | tee $log_path/gpt$count.log | |||
| done | |||
| fi | |||
| if [ $stage -le 4 ]; then | |||
| echo "" | |||
| echo "===========Stage 4: Processing log files===========" | |||
| cd $work_dir | |||
| python process_data.py $me_report_path $log_path $iter $log_path/$log_data | |||
| fi | |||
| if [ $stage -le 5 ]; then | |||
| echo "" | |||
| echo "===========Stage 5: Generating reports===========" | |||
| if [ ! -d $log_path/reports ]; then | |||
| mkdir $log_path/reports | |||
| fi | |||
| python generate_report.py $log_path $log_path/$log_data $me_report_path $days | |||
| if [ $ci_mode ]; then | |||
| echo "copying file to artifacts" | |||
| mkdir -p ${WORKSPACE}/archive | |||
| cp $log_path/reports/* ${WORKSPACE}/archive | |||
| fi | |||
| fi | |||