add me daily monitor files

4 years ago · 7e3afac1c0
--- a/tests/perf_test/mind_expression_perf/generate_report.py
+++ b/tests/perf_test/mind_expression_perf/generate_report.py
@@ -0,0 +1,143 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 import sys
 import re
 import json
 import os
 import time
 import openpyxl as opx
 def parse_arguments():
    print(sys.argv)
    me_report_path = sys.argv[1]
    log_path = sys.argv[2]
    n_iter = sys.argv[3]
    out = sys.argv[4]
    assert n_iter.isdigit()
    return me_report_path, log_path, int(n_iter), out
 def extract_by_keyword(doc, keyword, pattern):
    rst = []
    for i, s in enumerate(doc):
        if keyword in s:
            p = re.findall(pattern, s)
            print("L%d: extracted %s from '%s'" % (i, p, s.strip()))
            rst.extend(p)
    return rst
 def process_log(fname, log_path, n_iter, keyword, pattern):
    rnt = {}
    for i in range(1, 1+n_iter):
        fname_path = os.path.join(log_path, fname % i)
        with open(fname_path) as f:
            print("\nLoading %s" % fname_path)
            rst = extract_by_keyword(f, keyword, pattern)
        rnt[fname % i] = rst
    return rnt
 def summarize(func):
    def wrapper(*args, **kwargs):
        log = func(*args, **kwargs)
        times = list(log.items())
        times.sort(key=lambda x: x[1])
        min_file, min_time = times[0]
        avg = sum(map(lambda x: x[1], times)) / len(times)
        log["min_time"] = min_time
        log["min_file"] = min_file
        log["avg_time"] = avg
        return log
    return wrapper
@summarize
 def process_bert_log(log_path, n_iter):
    fname = "bert%d.log"
    total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+")
    task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+")
    log = {}
    for fname in total:
        log[fname] = float(total[fname][0]) - float(task[fname][0])
    return log
@summarize
 def process_resnet_log(log_path, n_iter):
    fname = "resnet%d.log"
    total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+")
    task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+")
    log = {}
    for fname in total:
        log[fname] = float(total[fname][0]) - float(task[fname][0])
    return log
@summarize
 def process_gpt_log(log_path, n_iter):
    fname = "gpt%d.log"
    total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+")
    task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+")
    log = {}
    for fname in total:
        log[fname] = float(total[fname][0]) - float(task[fname][0])
    return log
@summarize
 def process_reid_log(log_path, n_iter):
    log = {}
    for i in range(8):
        fname = "reid_%d_"+str(i)+".log"
        total = process_log(fname, log_path, n_iter, "TotalTime", r"\d+.\d+")
        task = process_log(fname, log_path, n_iter, "task_emit", r"\d+.\d+")
        for fname in total:
            log[fname] = float(total[fname][0]) - float(task[fname][0])
    return log
 def write_to_me_report(log, me_report_path):
    wb = opx.load_workbook(me_report_path)
    sheet = wb["Sheet"]
    idx = sheet.max_row + 1
    date = time.strftime('%m%d', time.localtime())
    sheet['A%d' % idx] = date
    sheet['B%d' % idx] = round(log["reid"]["min_time"], 2)
    sheet['C%d' % idx] = round(log["bert"]["min_time"], 2)
    sheet['D%d' % idx] = round(log['resnet']["min_time"], 2)
    sheet['E%d' % idx] = round(log['gpt']["min_time"], 2)
    wb.save(me_report_path)
 def generate_report():
    me_report_path, log_path, n_iter, out = parse_arguments()
    log_data = {}
    bert_log = process_bert_log(log_path, n_iter)
    resnet_log = process_resnet_log(log_path, n_iter)
    gpt_log = process_gpt_log(log_path, n_iter)
    reid_log = process_reid_log(log_path, n_iter)
    log_data["bert"] = bert_log
    log_data["resnet"] = resnet_log
    log_data["gpt"] = gpt_log
    log_data["reid"] = reid_log
    with open(out, "w") as f:
        json.dump(log_data, f, indent=2)
    write_to_me_report(log_data, me_report_path)
 if __name__ == "__main__":
    generate_report()
--- a/tests/perf_test/mind_expression_perf/process_data.py
+++ b/tests/perf_test/mind_expression_perf/process_data.py
@@ -0,0 +1,104 @@
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 import os
 import sys
 import json
 import openpyxl as opx
 import matplotlib.ticker as ticker
 import matplotlib.pyplot as plt
 def parse_arguments():
    log_path = sys.argv[1]
    log_data = sys.argv[2]
    me_report = sys.argv[3]
    n_days = sys.argv[4]
    assert n_days.isdigit()
    return log_path, log_data, me_report, int(n_days)
 def read_data(log_data, me_report_path, n_days):
    with open(log_data) as f:
        log = json.load(f)
    wb = opx.load_workbook(me_report_path)
    sheet = wb["Sheet"]
    n_row = sheet.max_row
    date = [cell[0].value for cell in sheet["A2":"A%d" % n_row]]
    reid_data = [float(cell[0].value) for cell in sheet["B2":"B%d" % n_row]]
    bert_data = [float(cell[0].value) for cell in sheet["C2":"C%d" % n_row]]
    resnet_data = [float(cell[0].value) for cell in sheet["D2":"D%d" % n_row]]
    gpt_data = [float(cell[0].value) for cell in sheet["E43":"E%d" % n_row]]
    if n_days > 0:
        date = date[-n_days:]
        reid_data = reid_data[-n_days:]
        bert_data = bert_data[-n_days:]
        resnet_data = resnet_data[-n_days:]
        gpt_data = gpt_data[-n_days:]
    return log, date, reid_data, bert_data, resnet_data, gpt_data
 def draw_figure(x_data, y_data, labels, title, out, height=24, width=8, tick_space=2):
    print("Generating figure to: %s" % out)
    plt.figure(figsize=(height, width))
    for y, label in zip(y_data, labels):
        x = x_data[-len(y):]
        n_data = len(x)
        assert len(x) == len(
            y), "assume len(x) == len(y), while %d != %d" % (len(x), len(y))
        plt.plot(x, y, linewidth=2, marker='o', markersize=5, label=label)
        ax = plt.gca()
        ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_space))
        for i in range(n_data):
            if i % 2 == 0:
                plt.text(x[i], y[i], y[i], ha='center',
                         va='bottom', fontsize=8)
    plt.title(title)
    plt.xlabel("Date")
    plt.ylabel("Time(s)")
    plt.grid()
    plt.legend()
    plt.savefig(out)
 def generate_report(log, labels, log_path):
    for label in labels:
        fname = log[label]["min_file"]
        fname_path = os.path.join(log_path, fname)
        out_path = os.path.join(log_path, "reports", label+"_me.log")
        print("Generating report to: %s" % out_path)
        os.system("grep -A 230 'TotalTime = ' %s > %s" %
                  (fname_path, out_path))
 def process_data():
    log_path, log_data, me_report, n_days = parse_arguments()
    log, date, reid_data, bert_data, resnet_data, gpt_data = read_data(
        log_data, me_report, n_days)
    draw_figure(date,
                [reid_data, bert_data, gpt_data],
                ["ReID", "BERT", "GPT"],
                "ReID&BERT&GPT",
                os.path.join(log_path, "reports", "reid_bert_gpt.png")
                )
    draw_figure(date, [resnet_data], ["ResNet"], "ResNet",
                os.path.join(log_path, "reports", "resnet.png"))
    generate_report(log, list(log.keys()), log_path)
 if __name__ == "__main__":
    process_data()
--- a/tests/perf_test/mind_expression_perf/run.sh
+++ b/tests/perf_test/mind_expression_perf/run.sh
@@ -0,0 +1,146 @@
 #!/bin/bash
 # Copyright 2021 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 stage=0
 days=7
 iter=5
 device_id=0
 n_worker=128
 work_dir="/opt/npu/me_monitor"
 me_report_path=$work_dir/logs/ME_report_daily.xlsx
 log_dir=logs_$(date "+%m%d-%H%M")
 log_path=$work_dir/logs/$log_dir
 ms_master="https://gitee.com/mindspore/mindspore.git"
 log_data="data.json"
 ci_mode=true
 set -e
 set -o pipefail
 # parse arguments from command line
 while getopts "s:d:i:l:" args
 do
    case $args in
        s)
            stage=$OPTARG
            ;;
        d)
            days=$OPTARG
            ;;
        i)
            iter=$OPTARG
            ;;
        l)
            log_dir=$OPTARG
            log_path=$work_dir/logs/$log_dir
            ;;
        ?)
            echo "unknown argument"
            exit 1
            ;;
    esac
 done
 source env.sh
 export DEVICE_ID=$device_id
 echo "Args: days=$days, iter=$iter, log_path=$log_path"
 cd $work_dir
 echo $WORKSPACE
 WORKSPACE=/home/jenkins-slave/workspace/MindSpore_Network_reid_compile_performance
 echo $WORKSPACE
 if [ $stage -le 1 ]; then
    echo ""
    echo "===========Stage 1: Fetching latest mindspore from master==========="
    if [ -d mindspore ]; then
        rm -rf mindspore
    fi
    git clone $ms_master
 fi
 if [ $stage -le 2 ]; then
    echo ""
    echo "===========Stage 2: Building mindspore==========="
    cd $work_dir/mindspore
    bash build.sh -e ascend -j $n_worker -p on
 fi
 if [ $stage -le 3 ]; then
    echo ""
    echo "===========Stage 3: Compiling networks==========="
    cd $work_dir
    mkdir -p $log_path
    # Compiling ReID-8
    # split resource-consuming task from others
    for count in $(seq 1 $iter); do
        echo "[INFO] Compiling ReID-8p, iteration $count"
        if [ -d reid$count ]; then
            rm -rf reid$count
        fi
        mkdir reid$count
        cd reid$count
        bash $work_dir/faceReidToMe/dist_env/env_26/dist_env_26.sh
        for num in {0..7}; do
            cp device_$num/test_reid_stage123_1024node_graphdata_dynamiclossscale_log$num.log $log_path/reid_${count}_${num}.log
        done
        cd $work_dir
        mv reid$count $log_path
    done
    # Compiling BERT
    cd $work_dir
    for count in $(seq 1 $iter); do
        echo "[INFO] Compiling BERT, iteration $count"
        pytest -s mindspore/tests/perf_test/bert/test_bert_train.py::test_bert_train | tee $log_path/bert$count.log
    done
    # Compiling ResNet50
    for count in $(seq 1 $iter); do
        echo "[INFO] Compiling ResNet50, iteration $count"
        pytest -s mindspore/tests/perf_test/test_resnet_train.py::test_train_step | tee $log_path/resnet$count.log
    done
    # Compiling GPT
    for count in $(seq 1 $iter); do
        echo "[INFO] Compiling GPT, iteration $count"
        cd gpt
        bash scripts/run_standalone_train.sh 0 1 $work_dir/gpt_data | tee $log_path/gpt$count.log
    done
 fi
 if [ $stage -le 4 ]; then
    echo ""
    echo "===========Stage 4: Processing log files==========="
    cd $work_dir
    python process_data.py $me_report_path $log_path $iter $log_path/$log_data
 fi
 if [ $stage -le 5 ]; then
    echo ""
    echo "===========Stage 5: Generating reports==========="
    if [ ! -d $log_path/reports ]; then
        mkdir $log_path/reports
    fi
    python generate_report.py $log_path $log_path/$log_data $me_report_path $days
    if [ $ci_mode ]; then
        echo "copying file to artifacts"
        mkdir -p ${WORKSPACE}/archive 
        cp $log_path/reports/* ${WORKSPACE}/archive
    fi
 fi