GitOrigin-RevId: 11a4b06f6f
tags/v1.7.0
| @@ -53,7 +53,9 @@ static inline void kern_4x4(const int8_t* packA, const int8_t* packB, int K, | |||||
| const int8_t* b_ptr = packB; | const int8_t* b_ptr = packB; | ||||
| LDC = LDC * sizeof(int8_t); | LDC = LDC * sizeof(int8_t); | ||||
| // clang-format off | |||||
| // clang-format off | |||||
| #define STORE_LINE(reg0) \ | #define STORE_LINE(reg0) \ | ||||
| "cmp w10, #0 \n" \ | "cmp w10, #0 \n" \ | ||||
| "beq 101f\n" \ | "beq 101f\n" \ | ||||
| @@ -10,7 +10,6 @@ | |||||
| * implied. | * implied. | ||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| #include "src/fallback/elemwise/opr_impl.h" | #include "src/fallback/elemwise/opr_impl.h" | ||||
| #include "src/arm_common/elemwise_op.h" | #include "src/arm_common/elemwise_op.h" | ||||
| @@ -10,6 +10,7 @@ | |||||
| */ | */ | ||||
| #include "src/arm_common/simd_macro/marm_neon.h" | #include "src/arm_common/simd_macro/marm_neon.h" | ||||
| #include "src/arm_common/matrix_mul/int8/gemv.h" | #include "src/arm_common/matrix_mul/int8/gemv.h" | ||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #include "megdnn/oprs.h" | #include "megdnn/oprs.h" | ||||
| @@ -60,11 +60,8 @@ | |||||
| #pragma GCC diagnostic ignored "-Wnon-virtual-dtor" | #pragma GCC diagnostic ignored "-Wnon-virtual-dtor" | ||||
| // TableHolderBase has no problem; ignore the warning for old clang versions | // TableHolderBase has no problem; ignore the warning for old clang versions | ||||
| #include "./helper.h" | |||||
| #include "./interp_helper.h" | #include "./interp_helper.h" | ||||
| #include "src/common/utils.h" | |||||
| using namespace megdnn; | using namespace megdnn; | ||||
| using namespace megdnn::megcv; | using namespace megdnn::megcv; | ||||
| @@ -62,7 +62,9 @@ | |||||
| #pragma once | #pragma once | ||||
| #include "src/common/cv/aligned_allocator.h" | #include "src/common/cv/aligned_allocator.h" | ||||
| #include "src/common/utils.h" | |||||
| #include "./helper.h" | |||||
| #include "megdnn/opr_param_defs.h" | #include "megdnn/opr_param_defs.h" | ||||
| #include <cstdint> | #include <cstdint> | ||||
| @@ -10,6 +10,7 @@ | |||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| #include "src/cuda/convolution_helper/parameter.cuh" | #include "src/cuda/convolution_helper/parameter.cuh" | ||||
| #include "src/cuda/utils.cuh" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace cuda { | namespace cuda { | ||||
| @@ -10,6 +10,7 @@ | |||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include "src/cuda/utils.cuh" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace cuda { | namespace cuda { | ||||
| @@ -10,6 +10,7 @@ | |||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| #include "megdnn/dtype.h" | #include "megdnn/dtype.h" | ||||
| #include "src/cuda/utils.cuh" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace cuda { | namespace cuda { | ||||
| @@ -9,6 +9,7 @@ | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| #include "src/cuda/utils.cuh" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace cuda { | namespace cuda { | ||||
| @@ -9,6 +9,7 @@ | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| #include "src/cuda/utils.cuh" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace cuda { | namespace cuda { | ||||
| @@ -9,6 +9,7 @@ | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| #include "src/cuda/utils.cuh" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace cuda { | namespace cuda { | ||||
| @@ -9,6 +9,7 @@ | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| #include "src/cuda/utils.cuh" | |||||
| namespace megdnn { | namespace megdnn { | ||||
| namespace cuda { | namespace cuda { | ||||
| @@ -9,6 +9,8 @@ | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| #include "src/rocm/miopen_wrapper.h" | |||||
| #include "megcore_rocm.h" | #include "megcore_rocm.h" | ||||
| #include "megdnn/basic_types.h" | #include "megdnn/basic_types.h" | ||||
| #include "megdnn/handle.h" | #include "megdnn/handle.h" | ||||
| @@ -16,7 +18,6 @@ | |||||
| #include "src/common/handle_impl.h" | #include "src/common/handle_impl.h" | ||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| #include "src/rocm/miopen_with_check.h" | |||||
| #include <rocblas.h> | #include <rocblas.h> | ||||
| #include <atomic> | #include <atomic> | ||||
| @@ -13,9 +13,11 @@ | |||||
| #include "megdnn/arch.h" | #include "megdnn/arch.h" | ||||
| #include <immintrin.h> | #include <immintrin.h> | ||||
| #ifdef WIN32 | |||||
| #include <avxintrin.h> | #include <avxintrin.h> | ||||
| #include <avx2intrin.h> | #include <avx2intrin.h> | ||||
| #include <fmaintrin.h> | #include <fmaintrin.h> | ||||
| #endif | |||||
| #if !defined (__clang__) | #if !defined (__clang__) | ||||
| #pragma GCC target ("avx") | #pragma GCC target ("avx") | ||||
| @@ -9,7 +9,8 @@ | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| // clang-format off | |||||
| #include "src/x86/simd_macro/sse_helper.h" | #include "src/x86/simd_macro/sse_helper.h" | ||||
| #include "src/fallback/convolution/do_conv_stride2_decl.inl" | #include "src/fallback/convolution/do_conv_stride2_decl.inl" | ||||
| #include "src/x86/simd_macro/sse_helper_epilogue.h" | #include "src/x86/simd_macro/sse_helper_epilogue.h" | ||||
| // clang-format on | |||||
| @@ -801,8 +801,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -896,8 +896,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -943,8 +943,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -948,8 +948,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -917,8 +917,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -856,8 +856,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -771,8 +771,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -788,8 +788,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -872,8 +872,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -910,8 +910,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -908,8 +908,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -872,8 +872,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -808,8 +808,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -722,8 +722,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -785,9 +785,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -827,9 +827,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -842,9 +842,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -833,9 +833,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -803,9 +803,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -755,9 +755,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -692,9 +692,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -771,9 +771,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -801,9 +801,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -806,9 +806,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -789,9 +789,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -753,9 +753,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -701,9 +701,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -636,9 +636,7 @@ | |||||
| } \ | } \ | ||||
| } while (0) | } while (0) | ||||
| #include <immintrin.h> | |||||
| #include <avxintrin.h> | |||||
| #include <fmaintrin.h> | |||||
| #include "src/x86/avx_helper.h" | |||||
| #include <algorithm> | #include <algorithm> | ||||
| #include "../convolution_direct_special_cases.h" | #include "../convolution_direct_special_cases.h" | ||||
| @@ -8,6 +8,8 @@ | |||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| // clang-format off | |||||
| #include "src/x86/simd_helper.h" | #include "src/x86/simd_helper.h" | ||||
| #include "src/x86/simd_macro/avx_helper.h" | #include "src/x86/simd_macro/avx_helper.h" | ||||
| #include "src/common/local/local_def.inl" | #include "src/common/local/local_def.inl" | ||||
| // clang-format on | |||||
| @@ -8,6 +8,8 @@ | |||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| // clang-format off | |||||
| #include "src/x86/simd_helper.h" | #include "src/x86/simd_helper.h" | ||||
| #include "src/x86/simd_macro/fma_helper.h" | #include "src/x86/simd_macro/fma_helper.h" | ||||
| #include "src/common/local/local_def.inl" | #include "src/common/local/local_def.inl" | ||||
| // clang-format on | |||||
| @@ -10,6 +10,7 @@ | |||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| // clang-format off | |||||
| #include "src/x86/simd_macro/sse_helper.h" | #include "src/x86/simd_macro/sse_helper.h" | ||||
| #include "src/common/local/local_decl.inl" | #include "src/common/local/local_decl.inl" | ||||
| #include "src/x86/simd_macro/sse_helper_epilogue.h" | #include "src/x86/simd_macro/sse_helper_epilogue.h" | ||||
| @@ -21,3 +22,4 @@ | |||||
| #include "src/x86/simd_macro/fma_helper.h" | #include "src/x86/simd_macro/fma_helper.h" | ||||
| #include "src/common/local/local_decl.inl" | #include "src/common/local/local_decl.inl" | ||||
| #include "src/x86/simd_macro/fma_helper_epilogue.h" | #include "src/x86/simd_macro/fma_helper_epilogue.h" | ||||
| // clang-format on | |||||
| @@ -8,6 +8,8 @@ | |||||
| * software distributed under the License is distributed on an | * software distributed under the License is distributed on an | ||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| // clang-format off | |||||
| #include "src/x86/simd_helper.h" | #include "src/x86/simd_helper.h" | ||||
| #include "src/x86/simd_macro/sse_helper.h" | #include "src/x86/simd_macro/sse_helper.h" | ||||
| #include "src/common/local/local_def.inl" | #include "src/common/local/local_def.inl" | ||||
| // clang-form on | |||||
| @@ -11,7 +11,6 @@ | |||||
| */ | */ | ||||
| #pragma once | #pragma once | ||||
| #include <x86intrin.h> | #include <x86intrin.h> | ||||
| #ifdef WIN32 | #ifdef WIN32 | ||||
| #include <avx2intrin.h> | #include <avx2intrin.h> | ||||
| #include <avxintrin.h> | #include <avxintrin.h> | ||||
| @@ -13,9 +13,11 @@ | |||||
| #include "megdnn/arch.h" | #include "megdnn/arch.h" | ||||
| #include <immintrin.h> | #include <immintrin.h> | ||||
| #ifdef WIN32 | |||||
| #include <xmmintrin.h> | #include <xmmintrin.h> | ||||
| #include <avxintrin.h> | #include <avxintrin.h> | ||||
| #include <fmaintrin.h> | #include <fmaintrin.h> | ||||
| #endif | |||||
| #include <cmath> | #include <cmath> | ||||
| #include <algorithm> | #include <algorithm> | ||||
| @@ -17,6 +17,7 @@ | |||||
| #include "llvm/Support/raw_ostream.h" | #include "llvm/Support/raw_ostream.h" | ||||
| namespace mlir::tblgen { | namespace mlir::tblgen { | ||||
| using llvm::raw_ostream; | |||||
| struct Environment { | struct Environment { | ||||
| std::unordered_map<unsigned int, std::pair<llvm::StringRef, llvm::StringRef>> enumAlias; | std::unordered_map<unsigned int, std::pair<llvm::StringRef, llvm::StringRef>> enumAlias; | ||||
| @@ -37,4 +38,4 @@ protected: | |||||
| Environment* env_p = nullptr; | Environment* env_p = nullptr; | ||||
| }; | }; | ||||
| } // namespace mlir::tblgen | |||||
| } // namespace mlir::tblgen | |||||
| @@ -9,6 +9,7 @@ | |||||
| * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| */ | */ | ||||
| #include "./macros.h" | |||||
| #include "./cpp_class.h" | #include "./cpp_class.h" | ||||
| #include "../emitter.h" | #include "../emitter.h" | ||||
| @@ -125,7 +125,7 @@ StaticDeviceMemoryManager::make_default_impl() { | |||||
| #endif // MGB_THREAD_SAFE | #endif // MGB_THREAD_SAFE | ||||
| /* ==================== AsyncVarReleaser ==================== */ | /* ==================== AsyncVarReleaser ==================== */ | ||||
| #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM | |||||
| #if MGB_COMMON_ASYNC_COMPNODE | |||||
| class VarNodeMemManager::AsyncVarReleaser { | class VarNodeMemManager::AsyncVarReleaser { | ||||
| struct WaiterParam { | struct WaiterParam { | ||||
| CompNode cn; | CompNode cn; | ||||
| @@ -248,7 +248,7 @@ bool VarNodeMemManager::ImpureMemPlanManager::check_need_realloc() { | |||||
| VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph) | VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph) | ||||
| : m_owner_graph(graph), | : m_owner_graph(graph), | ||||
| m_seq_mem_opt(graph) | m_seq_mem_opt(graph) | ||||
| #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM | |||||
| #if MGB_COMMON_ASYNC_COMPNODE | |||||
| ,m_asyn_var_releaser(new AsyncVarReleaser) | ,m_asyn_var_releaser(new AsyncVarReleaser) | ||||
| #endif | #endif | ||||
| { | { | ||||
| @@ -256,7 +256,7 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph) | |||||
| MGB_MARK_USED_VAR(ev); | MGB_MARK_USED_VAR(ev); | ||||
| // async release is only used for sync between multiple comp nodes, and | // async release is only used for sync between multiple comp nodes, and | ||||
| // does not wait for device to finish | // does not wait for device to finish | ||||
| #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM | |||||
| #if MGB_COMMON_ASYNC_COMPNODE | |||||
| m_asyn_var_releaser->wait_release_finish(); | m_asyn_var_releaser->wait_release_finish(); | ||||
| #endif | #endif | ||||
| m_cpu_async_release_barrier.wait_zero(); | m_cpu_async_release_barrier.wait_zero(); | ||||
| @@ -297,8 +297,7 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph) | |||||
| graph->event().register_receiver_permanent<event::CompSeqExecError>( | graph->event().register_receiver_permanent<event::CompSeqExecError>( | ||||
| on_comp_seq_error); | on_comp_seq_error); | ||||
| #if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER && \ | |||||
| (MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM) | |||||
| #if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER && MGB_COMMON_ASYNC_COMPNODE | |||||
| auto on_mem_defrag_start = [this](const event::BeforeMemDefrag&) { | auto on_mem_defrag_start = [this](const event::BeforeMemDefrag&) { | ||||
| m_asyn_var_releaser->wait_release_finish(); | m_asyn_var_releaser->wait_release_finish(); | ||||
| }; | }; | ||||
| @@ -445,7 +445,12 @@ class VarNodeMemManager { | |||||
| SyncableCounter m_cpu_async_release_barrier; | SyncableCounter m_cpu_async_release_barrier; | ||||
| #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM | |||||
| // clang-format off | |||||
| #define MGB_COMMON_ASYNC_COMPNODE \ | |||||
| (MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM) | |||||
| // clang-format on | |||||
| #if MGB_COMMON_ASYNC_COMPNODE | |||||
| //! release dynamic var on after compnode event finishes | //! release dynamic var on after compnode event finishes | ||||
| class AsyncVarReleaser; | class AsyncVarReleaser; | ||||
| std::unique_ptr<AsyncVarReleaser> m_asyn_var_releaser; | std::unique_ptr<AsyncVarReleaser> m_asyn_var_releaser; | ||||
| @@ -14,6 +14,7 @@ | |||||
| #include "megbrain/common.h" | #include "megbrain/common.h" | ||||
| #include <thread> | #include <thread> | ||||
| #include <atomic> | #include <atomic> | ||||
| #include "megbrain/utils/metahelper.h" | |||||
| namespace mgb { | namespace mgb { | ||||
| @@ -24,7 +25,7 @@ class Spinlock final: public NonCopyableObj { | |||||
| public: | public: | ||||
| void lock() { | void lock() { | ||||
| while (m_state.test_and_set(std::memory_order_acquire)); | |||||
| while (m_state.test_and_set(std::memory_order_acquire)) {}; | |||||
| } | } | ||||
| void unlock() { | void unlock() { | ||||
| @@ -281,8 +281,8 @@ MGB_DEFINE_OPR_CLASS(AddUpdate, | |||||
| * Mode specifies the actual arithmetic; and exactly one of *axis* and | * Mode specifies the actual arithmetic; and exactly one of *axis* and | ||||
| * *target_shape* must be provided, to specify output shape. | * *target_shape* must be provided, to specify output shape. | ||||
| */ | */ | ||||
| MGB_DEFINE_OPR_CLASS(Reduce, intl::DynamicOutputIfInputDynamic< | |||||
| intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolder>>) // { | |||||
| MGB_DEFINE_OPR_CLASS(Reduce, | |||||
| intl::DynamicOutputIfInputDynamic<intl::OutshapeBySymvarSCNOpr<mixin::MegDNNOprHolder>>) // { | |||||
| public: | public: | ||||
| using Param = megdnn::param::Reduce; | using Param = megdnn::param::Reduce; | ||||
| @@ -350,16 +350,17 @@ MGB_DEFINE_OPR_CLASS(Reduce, intl::DynamicOutputIfInputDynamic< | |||||
| * the optimizer. | * the optimizer. | ||||
| */ | */ | ||||
| MGB_DEFINE_OPR_CLASS(PowC, intl::MegDNNOprWrapperFwd<megdnn::PowC>) // { | MGB_DEFINE_OPR_CLASS(PowC, intl::MegDNNOprWrapperFwd<megdnn::PowC>) // { | ||||
| public: | |||||
| PowC(VarNode* inp, const Param& param, const OperatorNodeConfig& config); | |||||
| static SymbolVar make(SymbolVar inp, const Param& param = {}, | |||||
| const OperatorNodeConfig& config = {}); | |||||
| private: | |||||
| void add_input_layout_constraint() override; | void add_input_layout_constraint() override; | ||||
| void init_output_static_infer_desc() override; | void init_output_static_infer_desc() override; | ||||
| void mem_plan_fwd_in2out_writable() override; | void mem_plan_fwd_in2out_writable() override; | ||||
| NodeProp* do_make_node_prop() const override; | NodeProp* do_make_node_prop() const override; | ||||
| void scn_do_execute() override; | void scn_do_execute() override; | ||||
| public: | |||||
| PowC(VarNode* inp, const Param& param, const OperatorNodeConfig& config); | |||||
| static SymbolVar make(SymbolVar inp, const Param& param = {}, | |||||
| const OperatorNodeConfig& config = {}); | |||||
| }; | }; | ||||
| } // namespace opr | } // namespace opr | ||||
| @@ -1,4 +1,5 @@ | |||||
| //generated by tools/atlas/embed.py | |||||
| // generated by tools/atlas/embed.py | |||||
| // clang-format off | |||||
| #pragma once | #pragma once | ||||
| #include <map> | #include <map> | ||||
| #include <string> | #include <string> | ||||