GitOrigin-RevId: d7b3a79283
tags/v1.10.0
| @@ -188,20 +188,24 @@ if(MGE_WITH_CUDA) | |||||
| gen_cutlass_kimpl(dwconv2d_dgrad tensorop884 CUTLASS_SOURCES) | gen_cutlass_kimpl(dwconv2d_dgrad tensorop884 CUTLASS_SOURCES) | ||||
| gen_cutlass_kimpl(dwconv2d_wgrad simt CUTLASS_SOURCES) | gen_cutlass_kimpl(dwconv2d_wgrad simt CUTLASS_SOURCES) | ||||
| gen_cutlass_kimpl(dwconv2d_wgrad tensorop884 CUTLASS_SOURCES) | gen_cutlass_kimpl(dwconv2d_wgrad tensorop884 CUTLASS_SOURCES) | ||||
| # Compile the following opr first | |||||
| list(PREPEND CUSOURCES ${CUTLASS_SOURCES}) | |||||
| # Compile the following file first, the priority_compile_opr.txt is generated by | |||||
| # ../../scripts/cmake-build/utils/adjust_compile_opr/sort_compile_time_map.py. | |||||
| file( | file( | ||||
| GLOB_RECURSE | |||||
| PRIORITY_DIRS | |||||
| cuda/type_cvt/*cu | |||||
| cuda/conv_bias/*cu | |||||
| cuda/reduce/*cu | |||||
| cuda/relayout/*cu | |||||
| cuda/relayout_format/*cu | |||||
| cuda/powc/*cu | |||||
| cuda/warp_perspective/*cu) | |||||
| list(PREPEND PRIORITY_DIRS ${CUTLASS_SOURCES}) | |||||
| list(PREPEND CUSOURCES ${PRIORITY_DIRS}) | |||||
| list(REMOVE_DUPLICATES CUSOURCES) | |||||
| STRINGS | |||||
| ${CMAKE_CURRENT_SOURCE_DIR}/../../scripts/cmake-build/utils/adjust_compile_opr/priority_compile_opr.txt | |||||
| PRIORITY_FILES_TMPS) | |||||
| foreach(PRIORITY_FILES_TMP ${PRIORITY_FILES_TMPS}) | |||||
| foreach(CUSOURCE ${CUSOURCES}) | |||||
| string(FIND ${CUSOURCE} ${PRIORITY_FILES_TMP} PRIORITY_FILES_FOUND) | |||||
| if(NOT ${PRIORITY_FILES_FOUND} EQUAL -1) | |||||
| list(APPEND PRIORITY_FILES ${CUSOURCE}) | |||||
| list(REMOVE_ITEM CUSOURCES ${CUSOURCE}) | |||||
| endif() | |||||
| endforeach(CUSOURCE) | |||||
| endforeach(PRIORITY_FILES_TMP) | |||||
| list(PREPEND CUSOURCES ${PRIORITY_FILES}) | |||||
| list(PREPEND SOURCES ${CUSOURCES}) | list(PREPEND SOURCES ${CUSOURCES}) | ||||
| endif() | endif() | ||||
| @@ -0,0 +1,9 @@ | |||||
| # The main purpose of this folder is to adjust the compilation order of megdnn to improve the overall compilation time.If you add a new opr and find that it significantly slows down the compilation time, you can manually add the opr to the front of priority_compile_opr.txt. If you add more opr, you can also run: python3 sort_compile_time_map.py. However, this operation will be very time-consuming because it is a single-threaded compilation. | |||||
| - priority_compile_opr.txt: Compile order sorted by compile time | |||||
| - sort_compile_time_map.py: Generate the compile script for the above two files | |||||
| ## Usage | |||||
| ```bash | |||||
| python3 sort_compile_time_map.py | |||||
| ``` | |||||
| @@ -0,0 +1,46 @@ | |||||
| cuda/type_cvt | |||||
| cuda/cutlass | |||||
| cuda/conv_bias | |||||
| cuda/reduce | |||||
| cuda/relayout | |||||
| cuda/add_update | |||||
| cuda/relayout_format | |||||
| cuda/elemwise | |||||
| cuda/argmxx | |||||
| cuda/powc | |||||
| cuda/elemwise_multi_type | |||||
| cuda/cumsum | |||||
| cuda/local_share | |||||
| cuda/argsort | |||||
| cuda/warp_perspective | |||||
| cuda/local | |||||
| cuda/batch_conv_bias | |||||
| cuda/warp_affine | |||||
| cuda/fake_quant | |||||
| cuda/remap | |||||
| cuda/cond_take | |||||
| cuda/rng | |||||
| cuda/tqt | |||||
| cuda/resize | |||||
| cuda/lsq | |||||
| cuda/convolution3d | |||||
| cuda/check_non_finite | |||||
| cuda/indexing_one_hot | |||||
| cuda/indexing_multi_axis_vec | |||||
| cuda/convpooling | |||||
| cuda/convolution | |||||
| cuda/tile | |||||
| cuda/padding | |||||
| cuda/matrix_mul | |||||
| cuda/gaussian_blur | |||||
| cuda/flip | |||||
| cuda/checksum | |||||
| cuda/sliding_window_transpose | |||||
| cuda/rotate | |||||
| cuda/roi_pooling | |||||
| cuda/roi_align | |||||
| cuda/repeat | |||||
| cuda/param_pack | |||||
| cuda/linspace | |||||
| cuda/layer_norm | |||||
| cuda/images2neibs | |||||
| @@ -0,0 +1,41 @@ | |||||
| # Get the top N files with the highest compilation time in megdnn, so that they can be compiled first | |||||
| from click import command | |||||
| import subprocess | |||||
| import re | |||||
| import tempfile | |||||
| def topN_file(src_file_path,des_file_path,N): | |||||
| src = open(src_file_path) | |||||
| des = open(des_file_path, "w") | |||||
| lines = src.readlines() | |||||
| file_map = {} | |||||
| for index,line in enumerate(lines): | |||||
| if ("Building C" in line) and ("megdnn.dir" in line): | |||||
| if "Elapsed time: " in lines[index + 1]: | |||||
| key = line[line.find("megdnn.dir"):line.find(".o")] | |||||
| value = lines[index + 1][lines[index + 1].find("Elapsed time: ") + 14:lines[index + 1].find(" s. ")] | |||||
| file_map[key] = value | |||||
| a = sorted(file_map.items(), key=lambda x: int(x[1]), reverse=True) | |||||
| result_file = a[:N] | |||||
| result_opr = [] | |||||
| for i in result_file: | |||||
| key= '/'.join(list(re.findall(r"megdnn.dir\/(.*?)\/(.*?)[\.\/]",i[0])[0])) | |||||
| if key not in result_opr: | |||||
| result_opr.append(key) | |||||
| des.write(key + "\n") | |||||
| src.close() | |||||
| des.close() | |||||
| return result_opr | |||||
| def compile(cmd:str,dir:str, failed_name=3): | |||||
| for i in range(failed_name): | |||||
| subprocess.run(cmd, shell=True,cwd=t) | |||||
| if __name__ == '__main__': | |||||
| cmd = f''' | |||||
| cmake .. -DMGE_PROFILE_COMPILE_TIME=ON | |||||
| time make -j1 megdnn | tee megdnn_map_compile_time.txt | |||||
| ''' | |||||
| with tempfile.TemporaryDirectory(dir = "../../../../") as t: | |||||
| compile(cmd,t) | |||||
| topN_file(t + "/megdnn_map_compile_time.txt","./priority_compile_opr.txt",500) | |||||