GitOrigin-RevId: ffd6431299
tags/v1.6.0-rc1
| @@ -67,7 +67,6 @@ option(MGE_WITH_ROCM "Enable ROCM support" OFF) | |||||
| option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF) | option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF) | ||||
| option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF) | option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF) | ||||
| if(MSVC OR WIN32) | if(MSVC OR WIN32) | ||||
| message(STATUS "windows force cudnn static link") | message(STATUS "windows force cudnn static link") | ||||
| set(MGE_WITH_CUDNN_SHARED OFF) | set(MGE_WITH_CUDNN_SHARED OFF) | ||||
| @@ -332,7 +331,6 @@ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_ | |||||
| set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}") | set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}") | ||||
| set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}") | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}") | ||||
| if(NOT MGE_WITH_JIT) | if(NOT MGE_WITH_JIT) | ||||
| if(MGE_WITH_HALIDE) | if(MGE_WITH_HALIDE) | ||||
| message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled") | message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled") | ||||
| @@ -728,7 +726,6 @@ if (MGE_WITH_ROCM) | |||||
| include(cmake/rocm.cmake) | include(cmake/rocm.cmake) | ||||
| endif () | endif () | ||||
| if(MGE_WITH_ATLAS) | if(MGE_WITH_ATLAS) | ||||
| add_subdirectory(dnn/atlas-stub) | add_subdirectory(dnn/atlas-stub) | ||||
| list(APPEND MGE_ATLAS_LIBS atlas-stub) | list(APPEND MGE_ATLAS_LIBS atlas-stub) | ||||
| @@ -736,7 +733,6 @@ if(MGE_WITH_ATLAS) | |||||
| set(MGB_ATLAS ${MGE_WITH_ATLAS}) | set(MGB_ATLAS ${MGE_WITH_ATLAS}) | ||||
| endif() | endif() | ||||
| find_program(CCACHE_BIN ccache) | find_program(CCACHE_BIN ccache) | ||||
| if(CCACHE_BIN) | if(CCACHE_BIN) | ||||
| set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN}) | set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN}) | ||||
| @@ -834,12 +830,10 @@ endif() | |||||
| set(MGB_CUDA ${MGE_WITH_CUDA}) | set(MGB_CUDA ${MGE_WITH_CUDA}) | ||||
| set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA}) | set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA}) | ||||
| #ROCM | #ROCM | ||||
| set(MGB_ROCM ${MGE_WITH_ROCM}) | set(MGB_ROCM ${MGE_WITH_ROCM}) | ||||
| set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM}) | set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM}) | ||||
| # CAMBRICON | # CAMBRICON | ||||
| set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON}) | set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON}) | ||||
| set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON}) | set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON}) | ||||
| @@ -1029,7 +1023,6 @@ if(MGE_BUILD_SDK) | |||||
| add_subdirectory(sdk/load-and-run) | add_subdirectory(sdk/load-and-run) | ||||
| endif() | endif() | ||||
| if(MGE_BUILD_IMPERATIVE_RT) | if(MGE_BUILD_IMPERATIVE_RT) | ||||
| add_subdirectory(imperative) | add_subdirectory(imperative) | ||||
| message(STATUS "Enable imperative python wrapper runtime") | message(STATUS "Enable imperative python wrapper runtime") | ||||
| @@ -1117,4 +1110,3 @@ if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER | |||||
| message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | ||||
| message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ") | ||||
| endif() | endif() | ||||
| @@ -1,3 +1,4 @@ | |||||
| include(ExternalProject) | include(ExternalProject) | ||||
| find_package(LLVM 6.0 REQUIRED CONFIG) | find_package(LLVM 6.0 REQUIRED CONFIG) | ||||
| @@ -38,7 +38,6 @@ list(APPEND OPR_PARAM_DEFS_OUTS | |||||
| ) | ) | ||||
| list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | ||||
| install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h") | install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h") | ||||
| add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS}) | add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS}) | ||||
| @@ -56,7 +55,6 @@ endforeach() | |||||
| add_dependencies(opr_param_defs _opr_param_defs) | add_dependencies(opr_param_defs _opr_param_defs) | ||||
| install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) | install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) | ||||
| if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
| add_library(cutlass INTERFACE) | add_library(cutlass INTERFACE) | ||||
| target_include_directories(cutlass | target_include_directories(cutlass | ||||
| @@ -13,7 +13,6 @@ | |||||
| #if !defined(__CUDACC__) && !defined(__HIPCC__) | #if !defined(__CUDACC__) && !defined(__HIPCC__) | ||||
| #endif // !defined(__CUDACC__) | #endif // !defined(__CUDACC__) | ||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -90,7 +90,6 @@ class Handle { | |||||
| std::unique_ptr<opr> create_rocm_operator(); | std::unique_ptr<opr> create_rocm_operator(); | ||||
| #endif | #endif | ||||
| virtual ~Handle(); | virtual ~Handle(); | ||||
| /*! | /*! | ||||
| @@ -137,11 +137,9 @@ if(MGE_WITH_CUDA) | |||||
| gen_cutlass_kimpl(conv2d tensorop8832) | gen_cutlass_kimpl(conv2d tensorop8832) | ||||
| file(GLOB_RECURSE CUTLASS_SOURCES ${CUTLASS_GEN_DIR}/*.cu) | file(GLOB_RECURSE CUTLASS_SOURCES ${CUTLASS_GEN_DIR}/*.cu) | ||||
| list(APPEND SOURCES ${CUTLASS_SOURCES}) | list(APPEND SOURCES ${CUTLASS_SOURCES}) | ||||
| list(APPEND SOURCES ${CUSOURCES}) | list(APPEND SOURCES ${CUSOURCES}) | ||||
| endif() | endif() | ||||
| if(MGE_WITH_CAMBRICON) | if(MGE_WITH_CAMBRICON) | ||||
| file(GLOB_RECURSE SOURCES_ cambricon/*.cpp) | file(GLOB_RECURSE SOURCES_ cambricon/*.cpp) | ||||
| list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
| @@ -161,7 +159,6 @@ if(MGE_WITH_ATLAS) | |||||
| list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) | list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) | ||||
| endif() | endif() | ||||
| add_definitions(${LIBMEGDNN_DEF}) | add_definitions(${LIBMEGDNN_DEF}) | ||||
| add_library(megdnn EXCLUDE_FROM_ALL OBJECT ${SOURCES}) | add_library(megdnn EXCLUDE_FROM_ALL OBJECT ${SOURCES}) | ||||
| @@ -186,7 +183,6 @@ if(MGE_WITH_ROCM) | |||||
| ${AMDOCL_LIBRARY_DIR}) | ${AMDOCL_LIBRARY_DIR}) | ||||
| endif() | endif() | ||||
| if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") | if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") | ||||
| if(MGE_ENABLE_CPUINFO) | if(MGE_ENABLE_CPUINFO) | ||||
| target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>) | target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>) | ||||
| @@ -15,5 +15,4 @@ | |||||
| #pragma message "Mangling is disabled." | #pragma message "Mangling is disabled." | ||||
| #endif // MEGDNN_ENABLE_MANGLING | #endif // MEGDNN_ENABLE_MANGLING | ||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -31,13 +31,10 @@ | |||||
| #include "src/aarch64/handle.h" | #include "src/aarch64/handle.h" | ||||
| #endif | #endif | ||||
| #if MEGDNN_WITH_CUDA | #if MEGDNN_WITH_CUDA | ||||
| #include "src/cuda/handle.h" | #include "src/cuda/handle.h" | ||||
| #endif | #endif | ||||
| #if MEGDNN_WITH_CAMBRICON | #if MEGDNN_WITH_CAMBRICON | ||||
| #include "src/cambricon/handle.h" | #include "src/cambricon/handle.h" | ||||
| #endif | #endif | ||||
| @@ -128,7 +125,6 @@ std::unique_ptr<Handle> Handle::make(megcoreComputingHandle_t computing_handle, | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| void Handle::set_destructor(const thin_function<void()>& d) { | void Handle::set_destructor(const thin_function<void()>& d) { | ||||
| megdnn_assert(!m_destructor, "destructor can be set only once"); | megdnn_assert(!m_destructor, "destructor can be set only once"); | ||||
| m_destructor = d; | m_destructor = d; | ||||
| @@ -17,8 +17,6 @@ | |||||
| #include "src/cuda/megcore/cuda_computing_context.hpp" | #include "src/cuda/megcore/cuda_computing_context.hpp" | ||||
| #endif | #endif | ||||
| #if MEGDNN_WITH_ROCM | #if MEGDNN_WITH_ROCM | ||||
| #include "src/rocm/megcore/computing_context.hpp" | #include "src/rocm/megcore/computing_context.hpp" | ||||
| #endif | #endif | ||||
| @@ -880,7 +880,6 @@ void remap(const Mat<T>& src, Mat<T>& dst, Mat<short>& map1, Mat<ushort>& map2, | |||||
| for (; x1 <= bcols - 8; x1 += 8) | for (; x1 <= bcols - 8; x1 += 8) | ||||
| vst1q_u16(A + x1, | vst1q_u16(A + x1, | ||||
| vandq_u16(vld1q_u16(sA + x1), v_scale)); | vandq_u16(vld1q_u16(sA + x1), v_scale)); | ||||
| #endif | #endif | ||||
| for (; x1 < bcols; ++x1) | for (; x1 < bcols; ++x1) | ||||
| A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2 - 1)); | A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2 - 1)); | ||||
| @@ -287,7 +287,6 @@ void ConvBiasForwardImpl::AlgoPack::fill_dp4a_algos() { | |||||
| int8_nchw4_dotprod.emplace_back(AlgoParam{16, 64, 8, 16, 64, 8, 2}); | int8_nchw4_dotprod.emplace_back(AlgoParam{16, 64, 8, 16, 64, 8, 2}); | ||||
| } | } | ||||
| ConvBiasForwardImpl::AlgoBase* | ConvBiasForwardImpl::AlgoBase* | ||||
| ConvBiasForwardImpl::AlgoPack::cudnn_conv_from_enum( | ConvBiasForwardImpl::AlgoPack::cudnn_conv_from_enum( | ||||
| cudnnConvolutionFwdAlgo_t algo) { | cudnnConvolutionFwdAlgo_t algo) { | ||||
| @@ -1037,7 +1037,6 @@ private: | |||||
| WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const; | ||||
| }; | }; | ||||
| class ConvBiasForwardImpl::AlgoPack : NonCopyableObj { | class ConvBiasForwardImpl::AlgoPack : NonCopyableObj { | ||||
| private: | private: | ||||
| AlgoBase::Mapper m_all_algos_map; | AlgoBase::Mapper m_all_algos_map; | ||||
| @@ -10,7 +10,6 @@ | |||||
| */ | */ | ||||
| #include "src/common/utils.h" | #include "src/common/utils.h" | ||||
| namespace { | namespace { | ||||
| template <bool is_xcorr, typename dtype> | template <bool is_xcorr, typename dtype> | ||||
| @@ -34,7 +34,6 @@ if(MGE_WITH_CAMBRICON) | |||||
| list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
| endif() | endif() | ||||
| if(MGE_WITH_ATLAS) | if(MGE_WITH_ATLAS) | ||||
| file(GLOB_RECURSE SOURCES_ atlas/*.cpp) | file(GLOB_RECURSE SOURCES_ atlas/*.cpp) | ||||
| list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
| @@ -45,8 +44,6 @@ if (MGE_WITH_ROCM) | |||||
| list (APPEND SOURCES ${SOURCES_}) | list (APPEND SOURCES ${SOURCES_}) | ||||
| endif() | endif() | ||||
| add_executable(megdnn_test ${SOURCES}) | add_executable(megdnn_test ${SOURCES}) | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") | ||||
| target_link_libraries(megdnn_test gtest) | target_link_libraries(megdnn_test gtest) | ||||
| @@ -60,7 +57,6 @@ if(MGE_WITH_ATLAS) | |||||
| target_link_libraries(megdnn_test atlas-stub) | target_link_libraries(megdnn_test atlas-stub) | ||||
| endif() | endif() | ||||
| target_include_directories(megdnn_test | target_include_directories(megdnn_test | ||||
| PRIVATE | PRIVATE | ||||
| ${PROJECT_SOURCE_DIR}/third_party/midout/src | ${PROJECT_SOURCE_DIR}/third_party/midout/src | ||||
| @@ -494,7 +494,6 @@ std::vector<TestArg> get_int8_nchw44_args(size_t kernel_size, size_t pack_size, | |||||
| return args; | return args; | ||||
| } | } | ||||
| std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) { | std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) { | ||||
| std::vector<TestArg> args; | std::vector<TestArg> args; | ||||
| param::ConvBias cur_param; | param::ConvBias cur_param; | ||||
| @@ -530,7 +529,6 @@ std::vector<TestArg> get_int8_nchw4_args_check_bounds(size_t kernel_size) { | |||||
| return args; | return args; | ||||
| } | } | ||||
| std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) { | std::vector<TestArg> get_int8_nchw4_args_small_batch(size_t kernel_size) { | ||||
| std::vector<TestArg> args; | std::vector<TestArg> args; | ||||
| param::ConvBias cur_param; | param::ConvBias cur_param; | ||||
| @@ -974,7 +972,6 @@ void benchmark_winograd(const char* algo_name, Handle* handle, size_t kernel, | |||||
| } | } | ||||
| #endif // MEGDNN_WITH_BENCHMARK | #endif // MEGDNN_WITH_BENCHMARK | ||||
| std::vector<conv_bias::TestArg> get_conv_bias_args( | std::vector<conv_bias::TestArg> get_conv_bias_args( | ||||
| std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias, | std::vector<size_t> kernel, size_t stride, bool no_pad, bool no_bias, | ||||
| bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) { | bool no_nonlinemode, bool quantized_nlmod, bool only_broadcast_bias) { | ||||
| @@ -1188,7 +1185,6 @@ void check_conv_bias_preprocess(std::vector<conv_bias::TestArg> args, | |||||
| } | } | ||||
| } | } | ||||
| void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, Handle* handle, | void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, Handle* handle, | ||||
| RNG* rng, float epsilon, DType type0, DType type1, | RNG* rng, float epsilon, DType type0, DType type1, | ||||
| DType type2, DType type3, const char* algo_name) { | DType type2, DType type3, const char* algo_name) { | ||||
| @@ -93,7 +93,6 @@ void check_conv_bias(std::vector<megdnn::test::conv_bias::TestArg> args, | |||||
| void checker_conv_bias_int8x8x16( | void checker_conv_bias_int8x8x16( | ||||
| std::vector<megdnn::test::conv_bias::TestArg> args, | std::vector<megdnn::test::conv_bias::TestArg> args, | ||||
| megdnn::Handle* handle, const char* algo_name); | megdnn::Handle* handle, const char* algo_name); | ||||
| void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, | void checker_conv_bias_common(std::vector<conv_bias::TestArg> args, | ||||
| Handle* handle, RNG* rng, float epsilon, | Handle* handle, RNG* rng, float epsilon, | ||||
| DType type0, DType type1, DType type2, | DType type0, DType type1, DType type2, | ||||
| @@ -1145,7 +1145,6 @@ TEST(SmallVectorTest, SwapMoveOnly) { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } // anonymous namespace | } // anonymous namespace | ||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -40,7 +40,6 @@ TensorLayout make_layout(std::initializer_list<size_t> shape, | |||||
| } | } | ||||
| } // anonymous namespace | } // anonymous namespace | ||||
| #if MEGDNN_64_BIT | #if MEGDNN_64_BIT | ||||
| TEST(BASIC_TYPES, TOTAL_NR_ELEMS) { | TEST(BASIC_TYPES, TOTAL_NR_ELEMS) { | ||||
| TensorShape shp{1u<<31, 1u<<31}; | TensorShape shp{1u<<31, 1u<<31}; | ||||
| @@ -340,5 +339,4 @@ TEST(BASIC_TYPES, TENSOR_LAYOUT_FMT_LOW_BITS_VALID) { | |||||
| LowbitsAlignedToBytesTensorFormat::make(4_z)), | LowbitsAlignedToBytesTensorFormat::make(4_z)), | ||||
| MegDNNError); | MegDNNError); | ||||
| } | } | ||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -697,7 +697,6 @@ TEST_F(CUDA, CONV_BIAS_INT8_CHWN4_UNROLL_WIDTH_TENSORCORE_1x1_ALGO_2) { | |||||
| conv_bias::get_int8_chwn4_args_small_batch(1)); | conv_bias::get_int8_chwn4_args_small_batch(1)); | ||||
| } | } | ||||
| TEST_F(CUDA, FALLBACK_CONV_QS8) { | TEST_F(CUDA, FALLBACK_CONV_QS8) { | ||||
| require_compute_capability_eq(7, 5); | require_compute_capability_eq(7, 5); | ||||
| Checker<ConvBiasForward> checker(handle_cuda()); | Checker<ConvBiasForward> checker(handle_cuda()); | ||||
| @@ -1100,7 +1099,6 @@ TEST_F(CUDA, BENCHMARK_CONV_BIAS_INT8_NCHW4_NCHW) { | |||||
| run({{16, 16, 46, 80, 4}, {32, 16, 3, 3, 4}, {1, 32, 1, 1}}); | run({{16, 16, 46, 80, 4}, {32, 16, 3, 3, 4}, {1, 32, 1, 1}}); | ||||
| } | } | ||||
| #if CUDA_VERSION >= 10020 | #if CUDA_VERSION >= 10020 | ||||
| TEST_F(CUDA, BENCHMARK_CUTLASS_CONV_BIAS_INT8_NCHW32) { | TEST_F(CUDA, BENCHMARK_CUTLASS_CONV_BIAS_INT8_NCHW32) { | ||||
| require_compute_capability(7, 5); | require_compute_capability(7, 5); | ||||
| @@ -32,7 +32,6 @@ TYPED_TEST(CUDA_ELEMWISE_MULTI_TYPE, run) { | |||||
| elemwise_multi_type::run_test<TypeParam>(this->handle_cuda()); | elemwise_multi_type::run_test<TypeParam>(this->handle_cuda()); | ||||
| } | } | ||||
| using Mode = ElemwiseMultiType::Param::Mode; | using Mode = ElemwiseMultiType::Param::Mode; | ||||
| static void run_test(int arity, Checker<ElemwiseMultiType>& checker, Mode mode) { | static void run_test(int arity, Checker<ElemwiseMultiType>& checker, Mode mode) { | ||||
| for (auto type : std::vector<std::pair<DType, DType>>{ | for (auto type : std::vector<std::pair<DType, DType>>{ | ||||
| @@ -22,7 +22,6 @@ | |||||
| using namespace megdnn; | using namespace megdnn; | ||||
| using namespace test; | using namespace test; | ||||
| TEST_F(CUDA, SLEEP) { | TEST_F(CUDA, SLEEP) { | ||||
| auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>(); | auto opr = this->handle_cuda()->create_operator<megdnn::SleepForward>(); | ||||
| @@ -53,6 +52,5 @@ TEST_F(CUDA, SLEEP) { | |||||
| } | } | ||||
| // vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen | ||||
| @@ -75,7 +75,6 @@ TEST_F(FALLBACK, CONV_BIAS_FORWARD) { | |||||
| .execs({src_shape, filter_shape, bias_shape, {}, {}}) | .execs({src_shape, filter_shape, bias_shape, {}, {}}) | ||||
| .execs({src_shape, filter_shape, bias_shape_channel, {}, {}}); | .execs({src_shape, filter_shape, bias_shape_channel, {}, {}}); | ||||
| } | } | ||||
| } | } | ||||
| std::vector<conv_bias::TestArg> get_conv_bias_args( | std::vector<conv_bias::TestArg> get_conv_bias_args( | ||||
| @@ -236,7 +235,6 @@ TEST_F(FALLBACK_MULTI_THREADS, CONV_BIAS_FORWARD_QUANTIZED) { | |||||
| "FALLBACK_NAIVE"); | "FALLBACK_NAIVE"); | ||||
| } | } | ||||
| #if MEGDNN_WITH_BENCHMARK | #if MEGDNN_WITH_BENCHMARK | ||||
| TEST_F(FALLBACK, BENCHMARK_CONVBIAS) { | TEST_F(FALLBACK, BENCHMARK_CONVBIAS) { | ||||
| constexpr size_t RUNS = 10; | constexpr size_t RUNS = 10; | ||||
| @@ -139,3 +139,52 @@ def batch_conv_bias_activation( | |||||
| ) | ) | ||||
| (outputs,) = apply(op, inp, weight, bias) | (outputs,) = apply(op, inp, weight, bias) | ||||
| return outputs | return outputs | ||||
| def conv_transpose2d( | |||||
| inp: Tensor, | |||||
| weight: Tensor, | |||||
| bias: Tensor = None, | |||||
| dtype=None, | |||||
| stride: Union[int, Tuple[int, int]] = 1, | |||||
| padding: Union[int, Tuple[int, int]] = 0, | |||||
| dilation: Union[int, Tuple[int, int]] = 1, | |||||
| groups: int = 1, | |||||
| conv_mode="cross_correlation", | |||||
| compute_mode="default", | |||||
| ) -> Tensor: | |||||
| assert ( | |||||
| conv_mode.lower() == "cross_correlation" | |||||
| or conv_mode.name == "CROSS_CORRELATION" | |||||
| ) | |||||
| assert compute_mode.lower() == "default" or compute_mode.name == "DEFAULT" | |||||
| if groups != 1: | |||||
| raise NotImplementedError( | |||||
| "group quantized transposed conv2d is not supported yet." | |||||
| ) | |||||
| if bias is not None: | |||||
| raise NotImplementedError( | |||||
| "bias of quantized transposed conv2d is not supported yet." | |||||
| ) | |||||
| pad_h, pad_w = _pair(padding) | |||||
| stride_h, stride_w = _pair_nonzero(stride) | |||||
| dilate_h, dilate_w = _pair_nonzero(dilation) | |||||
| # should be replaced by Op with bias such as ConvolutionBackwardDataBias | |||||
| op = builtin.ConvolutionBackwardData( | |||||
| stride_h=stride_h, | |||||
| stride_w=stride_w, | |||||
| pad_h=pad_h, | |||||
| pad_w=pad_w, | |||||
| dilate_h=dilate_h, | |||||
| dilate_w=dilate_w, | |||||
| strategy=get_execution_strategy(), | |||||
| dtype=dtype, | |||||
| compute_mode=compute_mode, | |||||
| mode=conv_mode, | |||||
| ) | |||||
| (output,) = apply(op, weight, inp) | |||||
| return output | |||||
| @@ -651,11 +651,11 @@ class ConvTranspose2d(_ConvNd): | |||||
| # Assume format is NCHW | # Assume format is NCHW | ||||
| return (1, self.out_channels, 1, 1) | return (1, self.out_channels, 1, 1) | ||||
| def forward(self, inp): | |||||
| def calc_conv_transpose2d(self, inp, weight, bias): | |||||
| return conv_transpose2d( | return conv_transpose2d( | ||||
| inp, | inp, | ||||
| self.weight, | |||||
| self.bias, | |||||
| weight, | |||||
| bias, | |||||
| self.stride, | self.stride, | ||||
| self.padding, | self.padding, | ||||
| self.dilation, | self.dilation, | ||||
| @@ -664,6 +664,9 @@ class ConvTranspose2d(_ConvNd): | |||||
| self.compute_mode, | self.compute_mode, | ||||
| ) | ) | ||||
| def forward(self, inp): | |||||
| return self.calc_conv_transpose2d(inp, self.weight, self.bias) | |||||
| class LocalConv2d(Conv2d): | class LocalConv2d(Conv2d): | ||||
| r""" | r""" | ||||
| @@ -7,7 +7,7 @@ | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| from .batch_matmul_activation import BatchMatMulActivation | from .batch_matmul_activation import BatchMatMulActivation | ||||
| from .concat import Concat | from .concat import Concat | ||||
| from .conv import Conv2d, ConvRelu2d | |||||
| from .conv import Conv2d, ConvRelu2d, ConvTranspose2d | |||||
| from .conv_bn import ConvBn2d, ConvBnRelu2d | from .conv_bn import ConvBn2d, ConvBnRelu2d | ||||
| from .elemwise import Elemwise | from .elemwise import Elemwise | ||||
| from .linear import Linear | from .linear import Linear | ||||
| @@ -57,3 +57,42 @@ class ConvRelu2d(Conv2d): | |||||
| def forward(self, inp): | def forward(self, inp): | ||||
| return self.apply_quant_activation(F.relu(self.calc_conv_qat(inp))) | return self.apply_quant_activation(F.relu(self.calc_conv_qat(inp))) | ||||
| class ConvTranspose2d(Float.ConvTranspose2d, QATModule): | |||||
| r""" | |||||
| A :class:`~.QATModule` :class:`~.module.ConvTranspose2d` with QAT support. | |||||
| Could be applied with :class:`~.Observer` and :class:`~.FakeQuantize`. | |||||
| """ | |||||
| def calc_conv_transpose2d_qat(self, inp): | |||||
| w_qat = self.apply_quant_weight(self.weight) | |||||
| b_qat = self.apply_quant_bias(self.bias, inp, w_qat) | |||||
| conv = self.calc_conv_transpose2d(inp, w_qat, b_qat) | |||||
| return conv | |||||
| @classmethod | |||||
| def from_float_module(cls, float_module: Float.ConvTranspose2d): | |||||
| r""" | |||||
| Return a :class:`~.QATModule` instance converted from | |||||
| a float :class:`~.Module` instance. | |||||
| """ | |||||
| qat_module = cls( | |||||
| float_module.in_channels, | |||||
| float_module.out_channels, | |||||
| float_module.kernel_size, | |||||
| float_module.stride, | |||||
| float_module.padding, | |||||
| float_module.dilation, | |||||
| float_module.groups, | |||||
| float_module.bias is not None, | |||||
| float_module.conv_mode, | |||||
| float_module.compute_mode, | |||||
| name=float_module.name, | |||||
| ) | |||||
| qat_module.weight = float_module.weight | |||||
| qat_module.bias = float_module.bias | |||||
| return qat_module | |||||
| def forward(self, inp): | |||||
| return self.apply_quant_activation(self.calc_conv_transpose2d_qat(inp)) | |||||
| @@ -7,7 +7,7 @@ | |||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| from .batch_matmul_activation import BatchMatMulActivation | from .batch_matmul_activation import BatchMatMulActivation | ||||
| from .concat import Concat | from .concat import Concat | ||||
| from .conv import Conv2d, ConvRelu2d | |||||
| from .conv import Conv2d, ConvRelu2d, ConvTranspose2d | |||||
| from .conv_bn import ConvBn2d, ConvBnRelu2d | from .conv_bn import ConvBn2d, ConvBnRelu2d | ||||
| from .elemwise import Elemwise | from .elemwise import Elemwise | ||||
| from .linear import Linear | from .linear import Linear | ||||
| @@ -12,6 +12,7 @@ import numpy as np | |||||
| from ... import module as Float | from ... import module as Float | ||||
| from ...core.tensor import dtype | from ...core.tensor import dtype | ||||
| from ...functional.nn import conv_bias_activation | from ...functional.nn import conv_bias_activation | ||||
| from ...functional.quantized import conv_transpose2d | |||||
| from ...tensor import Parameter | from ...tensor import Parameter | ||||
| from ..qat import conv as QAT | from ..qat import conv as QAT | ||||
| from .module import QuantizedModule | from .module import QuantizedModule | ||||
| @@ -108,3 +109,98 @@ class ConvRelu2d(Conv2d): | |||||
| def forward(self, inp): | def forward(self, inp): | ||||
| return self.calc_conv_quantized(inp, nonlinear_mode="relu") | return self.calc_conv_quantized(inp, nonlinear_mode="relu") | ||||
| class ConvTranspose2d(Float.ConvTranspose2d, QuantizedModule): | |||||
| r"""Quantized version of :class:`~.qat.ConvTranspose2d`. | |||||
| Applies a 2D transposed convolution over a quantized input tensor, used | |||||
| for inference only. | |||||
| The parameter is same with :class:`~.module.ConvTranspose2d` but dtype. | |||||
| :param dtype: data type of the output, should be qint8. | |||||
| """ | |||||
| def __init__( | |||||
| self, | |||||
| in_channels: int, | |||||
| out_channels: int, | |||||
| kernel_size: Union[int, Tuple[int, int]], | |||||
| stride: Union[int, Tuple[int, int]] = 1, | |||||
| padding: Union[int, Tuple[int, int]] = 0, | |||||
| dilation: Union[int, Tuple[int, int]] = 1, | |||||
| groups: int = 1, | |||||
| bias: bool = True, | |||||
| conv_mode: str = "cross_correlation", | |||||
| compute_mode: str = "default", | |||||
| dtype=None, | |||||
| **kwargs | |||||
| ): | |||||
| super().__init__( | |||||
| in_channels=in_channels, | |||||
| out_channels=out_channels, | |||||
| kernel_size=kernel_size, | |||||
| stride=stride, | |||||
| padding=padding, | |||||
| dilation=dilation, | |||||
| groups=groups, | |||||
| bias=bias, | |||||
| conv_mode=conv_mode, | |||||
| compute_mode=compute_mode, | |||||
| ) | |||||
| self.output_dtype = dtype | |||||
| @classmethod | |||||
| def from_qat_module(cls, qat_module: QAT.ConvTranspose2d): | |||||
| r""" | |||||
| return a :class:`~.QuantizedModule` instance converted from a | |||||
| :class:`~.QATModule` instance. | |||||
| """ | |||||
| output_dtype = qat_module.get_activation_dtype() | |||||
| qconv = cls( | |||||
| qat_module.in_channels, | |||||
| qat_module.out_channels, | |||||
| qat_module.kernel_size, | |||||
| qat_module.stride, | |||||
| qat_module.padding, | |||||
| qat_module.dilation, | |||||
| qat_module.groups, | |||||
| qat_module.bias is not None, | |||||
| qat_module.conv_mode, | |||||
| qat_module.compute_mode, | |||||
| dtype=output_dtype, | |||||
| name=qat_module.name, | |||||
| ) | |||||
| weight = qat_module.weight.astype(qat_module.get_weight_dtype()) | |||||
| qconv.weight = Parameter(weight.numpy(), name=qat_module.weight.name) | |||||
| qconv.bias = ( | |||||
| Parameter(qat_module.bias.numpy(), name=qat_module.bias.name) | |||||
| if qat_module.bias is not None | |||||
| else None | |||||
| ) | |||||
| return qconv | |||||
| def calc_conv_transpose2d_quantized(self, inp): | |||||
| if self.bias is not None: | |||||
| inp_scale = dtype.get_scale(inp.dtype) | |||||
| w_scale = dtype.get_scale(self.weight.dtype) | |||||
| bias_scale = inp_scale * w_scale | |||||
| return conv_transpose2d( | |||||
| inp=inp, | |||||
| weight=self.weight, | |||||
| bias=self.bias.astype(dtype.qint32(bias_scale)) | |||||
| if self.bias is not None | |||||
| else None, | |||||
| dtype=self.output_dtype, | |||||
| stride=self.stride, | |||||
| padding=self.padding, | |||||
| dilation=self.dilation, | |||||
| groups=self.groups, | |||||
| conv_mode=self.conv_mode, | |||||
| compute_mode=self.compute_mode, | |||||
| ) | |||||
| def forward(self, inp): | |||||
| return self.calc_conv_transpose2d_quantized(inp) | |||||
| @@ -13,5 +13,3 @@ from .fake_quant import _FakeQuantize | |||||
| from .observer import MinMaxObserver | from .observer import MinMaxObserver | ||||
| from .qconfig import QConfig | from .qconfig import QConfig | ||||
| from .utils import QParams | from .utils import QParams | ||||
| @@ -69,7 +69,6 @@ class PersistentCacheOnServer(_PersistentCache): | |||||
| def make_user_prefix(cls): | def make_user_prefix(cls): | ||||
| return "mgbcache:{}".format(getpass.getuser()) | return "mgbcache:{}".format(getpass.getuser()) | ||||
| def _make_key(self, category, key): | def _make_key(self, category, key): | ||||
| prefix_with_version = "{}:MGB{}".format(self._prefix, __version__) | prefix_with_version = "{}:MGB{}".format(self._prefix, __version__) | ||||
| return b"@".join( | return b"@".join( | ||||
| @@ -86,5 +85,3 @@ class PersistentCacheOnServer(_PersistentCache): | |||||
| key = self._make_key(category, key) | key = self._make_key(category, key) | ||||
| self._prev_get_refkeep = conn.get(key) | self._prev_get_refkeep = conn.get(key) | ||||
| return self._prev_get_refkeep | return self._prev_get_refkeep | ||||
| @@ -38,7 +38,6 @@ class build_ext(_build_ext): | |||||
| modpath = str(pathlib.Path(*modpath).resolve()) | modpath = str(pathlib.Path(*modpath).resolve()) | ||||
| copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) | copy_file(modpath, fullpath, verbose=self.verbose, dry_run=self.dry_run) | ||||
| package_name = 'MegEngine' | package_name = 'MegEngine' | ||||
| v = {} | v = {} | ||||
| @@ -79,7 +78,6 @@ megengine_data += [ | |||||
| for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*') | for f in pathlib.Path('megengine', 'core', 'lib').glob('**/*') | ||||
| ] | ] | ||||
| with open('requires.txt') as f: | with open('requires.txt') as f: | ||||
| requires = f.read().splitlines() | requires = f.read().splitlines() | ||||
| with open('requires-style.txt') as f: | with open('requires-style.txt') as f: | ||||
| @@ -108,8 +106,6 @@ setup_kwargs = dict( | |||||
| cmdclass={'build_ext': build_ext}, | cmdclass={'build_ext': build_ext}, | ||||
| scripts = ['./megengine/tools/mge'], | scripts = ['./megengine/tools/mge'], | ||||
| ) | ) | ||||
| setup_kwargs.update(dict( | setup_kwargs.update(dict( | ||||
| classifiers=[ | classifiers=[ | ||||
| 'Development Status :: 3 - Alpha', | 'Development Status :: 3 - Alpha', | ||||
| @@ -876,8 +876,6 @@ def test_nms_is_same(): | |||||
| assert op3 != op4 | assert op3 != op4 | ||||
| def test_argmxx_on_inf(): | def test_argmxx_on_inf(): | ||||
| def run_argmax(): | def run_argmax(): | ||||
| x = F.zeros((100, 100)) | x = F.zeros((100, 100)) | ||||
| @@ -13,6 +13,7 @@ from megengine.module import ( | |||||
| Conv2d, | Conv2d, | ||||
| ConvBn2d, | ConvBn2d, | ||||
| ConvRelu2d, | ConvRelu2d, | ||||
| ConvTranspose2d, | |||||
| DequantStub, | DequantStub, | ||||
| Module, | Module, | ||||
| QuantStub, | QuantStub, | ||||
| @@ -202,3 +203,40 @@ def test_quantize_batchmatmul_activation(): | |||||
| infer_cg = cgtools.GraphInference(file)[0] | infer_cg = cgtools.GraphInference(file)[0] | ||||
| dumped_outputs = list(infer_cg.run(inputs.numpy()).values())[0] | dumped_outputs = list(infer_cg.run(inputs.numpy()).values())[0] | ||||
| np.testing.assert_allclose(quantize_outputs.numpy(), dumped_outputs, atol=1e-6) | np.testing.assert_allclose(quantize_outputs.numpy(), dumped_outputs, atol=1e-6) | ||||
| def test_qat_conv_transpose2d(): | |||||
| in_channels = 32 | |||||
| out_channels = 64 | |||||
| kernel_size = 3 | |||||
| class TestNet(Module): | |||||
| def __init__(self, bias): | |||||
| super().__init__() | |||||
| self.quant = QuantStub() | |||||
| self.dequant = DequantStub() | |||||
| self.conv = ConvTranspose2d( | |||||
| in_channels, out_channels, kernel_size, bias=bias | |||||
| ) | |||||
| def forward(self, inp): | |||||
| out = self.quant(inp) | |||||
| out = self.conv(out) | |||||
| out = self.dequant(out) | |||||
| return out | |||||
| inputs = tensor(np.random.randn(4, in_channels, 32, 32).astype(np.float32)) | |||||
| for bias in [True, False]: | |||||
| net = TestNet(bias) | |||||
| net.train() | |||||
| qat_net = quantize_qat(net, inplace=False) | |||||
| disable_fake_quant(qat_net) | |||||
| normal_outputs = net(inputs) | |||||
| qat_outputs = qat_net(inputs) | |||||
| np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy()) | |||||
| net.eval() | |||||
| normal_outputs = net(inputs) | |||||
| qat_net.eval() | |||||
| qat_outputs = qat_net(inputs) | |||||
| np.testing.assert_allclose(normal_outputs.numpy(), qat_outputs.numpy()) | |||||
| @@ -92,8 +92,6 @@ def test_tqt(): | |||||
| np.testing.assert_allclose(g_s.numpy(), g_s_np, rtol=5e-5, atol=5e-5) | np.testing.assert_allclose(g_s.numpy(), g_s_np, rtol=5e-5, atol=5e-5) | ||||
| def _save_to(self, name="grad"): | def _save_to(self, name="grad"): | ||||
| def callback(grad): | def callback(grad): | ||||
| setattr(self, name, grad) | setattr(self, name, grad) | ||||
| @@ -14,6 +14,7 @@ import megengine.functional as F | |||||
| from megengine.core.tensor import dtype | from megengine.core.tensor import dtype | ||||
| from megengine.device import get_device_count | from megengine.device import get_device_count | ||||
| from megengine.functional.elemwise import _elemwise_multi_type, _elwise | from megengine.functional.elemwise import _elemwise_multi_type, _elwise | ||||
| from megengine.module.quantized.conv import ConvTranspose2d | |||||
| from megengine.quantization import QuantMode, create_qparams | from megengine.quantization import QuantMode, create_qparams | ||||
| @@ -168,3 +169,94 @@ def test_conv_bias(): | |||||
| run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "relu") | run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, False, "relu") | ||||
| run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "relu") | run(10, 36, 8, 46, 26, 2, 2, 2, 1, 1, 2, True, "relu") | ||||
| def test_conv_transpose2d(): | |||||
| rng = np.random.RandomState(seed=2021) | |||||
| def test_func( | |||||
| N, | |||||
| IC, | |||||
| IH, | |||||
| IW, | |||||
| OC, | |||||
| KH, | |||||
| KW, | |||||
| SH, | |||||
| SW, | |||||
| PH, | |||||
| PW, | |||||
| DH, | |||||
| DW, | |||||
| groups=1, | |||||
| has_bias=True, | |||||
| conv_mode: str = "cross_correlation", | |||||
| compute_mode: str = "default", | |||||
| ): | |||||
| inp_scale = np.float32(rng.uniform(low=0.04, high=0.06)) | |||||
| weight_scale = np.float32(rng.uniform(low=0.04, high=0.06)) | |||||
| bias_scale = inp_scale * weight_scale | |||||
| out_scale = np.float32(rng.uniform(low=0.04, high=0.06)) | |||||
| inp_dtype = dtype.qint8(inp_scale) | |||||
| weight_dtype = dtype.qint8(weight_scale) | |||||
| bias_dtype = dtype.qint32(bias_scale) | |||||
| out_dtype = dtype.qint8(out_scale) | |||||
| inp_fp32 = rng.uniform(low=-1, high=1, size=(N, IC, IH, IW)).astype(np.float32) | |||||
| weight_fp32 = rng.uniform(low=-1, high=1, size=(IC, OC, KH, KW)).astype( | |||||
| np.float32 | |||||
| ) | |||||
| bias_fp32 = rng.uniform(low=-1, high=1, size=(1, OC, 1, 1)).astype(np.float32) | |||||
| inp_int8 = dtype.convert_to_qint8(inp_fp32, inp_dtype) | |||||
| weight_int8 = dtype.convert_to_qint8(weight_fp32, weight_dtype) | |||||
| bias_int32 = dtype.convert_to_qint32(bias_fp32, bias_dtype) | |||||
| inp_int8 = mge.tensor(inp_int8, dtype=inp_dtype) | |||||
| weight_int8 = mge.Parameter(weight_int8, dtype=weight_dtype) | |||||
| bias_int32 = mge.Parameter(bias_int32, dtype=bias_dtype) | |||||
| inp_fp32 = inp_int8.astype("float32") | |||||
| weight_fp32 = weight_int8.astype("float32") | |||||
| bias_fp32 = bias_int32.astype("float32") | |||||
| expected = F.conv_transpose2d( | |||||
| inp_fp32, | |||||
| weight_fp32, | |||||
| bias_fp32 if has_bias else None, | |||||
| stride=(SH, SW), | |||||
| padding=(PH, PW), | |||||
| dilation=(DH, DW), | |||||
| groups=groups, | |||||
| conv_mode=conv_mode, | |||||
| compute_mode=compute_mode, | |||||
| ) | |||||
| expected = dtype.convert_to_qint8(expected.numpy(), out_dtype) | |||||
| expected = dtype.convert_from_qint8(expected) | |||||
| conv_transpose2d = ConvTranspose2d( | |||||
| in_channels=IC, | |||||
| out_channels=OC, | |||||
| kernel_size=(KH, KW), | |||||
| stride=(SH, SW), | |||||
| padding=(PH, PW), | |||||
| dilation=(DH, DW), | |||||
| groups=groups, | |||||
| bias=has_bias, | |||||
| conv_mode=conv_mode, | |||||
| compute_mode=compute_mode, | |||||
| dtype=out_dtype, | |||||
| ) | |||||
| conv_transpose2d.weight = mge.Parameter(weight_int8) | |||||
| if has_bias: | |||||
| conv_transpose2d.bias = mge.Parameter(bias_int32) | |||||
| result = conv_transpose2d.forward(inp_int8).numpy() | |||||
| result = dtype.convert_from_qint8(result) | |||||
| np.testing.assert_allclose(result, expected, atol=out_scale) | |||||
| test_func(1, 4, 1, 1, 4, 1, 1, 1, 1, 0, 0, 1, 1, 1, False) | |||||
| test_func(2, 4, 3, 1, 8, 1, 1, 1, 1, 0, 0, 1, 1, 1, False) | |||||
| test_func(4, 4, 16, 16, 8, 3, 3, 1, 1, 1, 1, 1, 1, 1, False) | |||||
| test_func(32, 64, 36, 28, 16, 3, 2, 1, 3, 1, 0, 1, 1, 1, False) | |||||
| @@ -486,8 +486,6 @@ def test_topk(): | |||||
| check_pygraph_dump(fwd, [x], [top, indices]) | check_pygraph_dump(fwd, [x], [top, indices]) | ||||
| def test_random(): | def test_random(): | ||||
| @trace(symbolic=True, capture_as_const=True) | @trace(symbolic=True, capture_as_const=True) | ||||
| def fwd(): | def fwd(): | ||||
| @@ -723,8 +721,6 @@ def test_elemwise_multitype(): | |||||
| check_pygraph_dump(fwd, [x, y], [result]) | check_pygraph_dump(fwd, [x, y], [result]) | ||||
| def test_cvtcolor(): | def test_cvtcolor(): | ||||
| inp = np.random.randn(3, 3, 3, 3).astype(np.float32) | inp = np.random.randn(3, 3, 3, 3).astype(np.float32) | ||||
| x = Tensor(inp) | x = Tensor(inp) | ||||
| @@ -7,4 +7,3 @@ | |||||
| # software distributed under the License is distributed on an | # software distributed under the License is distributed on an | ||||
| # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| __version__ = "1.6.0.dev" | __version__ = "1.6.0.dev" | ||||
| @@ -43,6 +43,11 @@ auto apply_on_var_node( | |||||
| const VarNodeArray& inputs) { | const VarNodeArray& inputs) { | ||||
| auto&& conv = static_cast<const ConvolutionBackwardData&>(def); | auto&& conv = static_cast<const ConvolutionBackwardData&>(def); | ||||
| OperatorNodeConfig config{conv.make_name()}; | OperatorNodeConfig config{conv.make_name()}; | ||||
| DType output_dtype = conv.dtype; | |||||
| if (output_dtype.valid()) { | |||||
| config.output_dtype(output_dtype); | |||||
| } | |||||
| if (inputs.size() == 2) { | if (inputs.size() == 2) { | ||||
| return opr::ConvolutionBackwardData::make(inputs[0], inputs[1], conv.param(), conv.policy(), config); | return opr::ConvolutionBackwardData::make(inputs[0], inputs[1], conv.param(), conv.policy(), config); | ||||
| } else { | } else { | ||||
| @@ -192,7 +192,6 @@ function do_build() { | |||||
| #handle dlopen path | #handle dlopen path | ||||
| install_name_tool -change @rpath/libmegengine_export.dylib @loader_path/lib/libmegengine_export.dylib _imperative_rt.so | install_name_tool -change @rpath/libmegengine_export.dylib @loader_path/lib/libmegengine_export.dylib _imperative_rt.so | ||||
| #copy megbrain_export lib | #copy megbrain_export lib | ||||
| DEPEND_LIB=${BUILD_DIR}/staging/megengine/core/lib/ | DEPEND_LIB=${BUILD_DIR}/staging/megengine/core/lib/ | ||||
| rm -rf ${DEPEND_LIB} | rm -rf ${DEPEND_LIB} | ||||
| @@ -209,7 +208,6 @@ function do_build() { | |||||
| echo "comapt whl name: ${compat_whl_name}" | echo "comapt whl name: ${compat_whl_name}" | ||||
| cp ${BUILD_DIR}/staging/dist/Meg*.whl ${MACOS_WHL_HOME}/${compat_whl_name} | cp ${BUILD_DIR}/staging/dist/Meg*.whl ${MACOS_WHL_HOME}/${compat_whl_name} | ||||
| cd ${SRC_DIR} | cd ${SRC_DIR} | ||||
| echo "" | echo "" | ||||
| echo "##############################################################################################" | echo "##############################################################################################" | ||||
| @@ -220,12 +218,10 @@ function do_build() { | |||||
| done | done | ||||
| } | } | ||||
| function third_party_prepare() { | function third_party_prepare() { | ||||
| echo "init third_party..." | echo "init third_party..." | ||||
| ${SRC_DIR}/third_party/prepare.sh | ${SRC_DIR}/third_party/prepare.sh | ||||
| if [[ -z ${ALREADY_INSTALL_MKL} ]] | if [[ -z ${ALREADY_INSTALL_MKL} ]] | ||||
| then | then | ||||
| echo "init third_party..." | echo "init third_party..." | ||||
| @@ -55,13 +55,11 @@ function patch_elf_depend_lib_mgb_mge() { | |||||
| patchelf --force-rpath --set-rpath '$ORIGIN/.' ${LIBS_DIR}/libmegengine_export.so | patchelf --force-rpath --set-rpath '$ORIGIN/.' ${LIBS_DIR}/libmegengine_export.so | ||||
| handle_strip ${LIBS_DIR}/libmegengine_export.so | handle_strip ${LIBS_DIR}/libmegengine_export.so | ||||
| # as some version of cudnn/trt libs have dlopen libs, so we can not use auditwheel | # as some version of cudnn/trt libs have dlopen libs, so we can not use auditwheel | ||||
| # TODO: PR for auditwheel to support args for dlopen libs | # TODO: PR for auditwheel to support args for dlopen libs | ||||
| handle_copy_cuda_libs ${LIBS_DIR} | handle_copy_cuda_libs ${LIBS_DIR} | ||||
| } | } | ||||
| SRC_DIR=$(readlink -f "`dirname $0`/../../../") | SRC_DIR=$(readlink -f "`dirname $0`/../../../") | ||||
| source ${SRC_DIR}/scripts/whl/utils/utils.sh | source ${SRC_DIR}/scripts/whl/utils/utils.sh | ||||
| @@ -142,7 +140,6 @@ do | |||||
| mkdir -p staging | mkdir -p staging | ||||
| cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ | ||||
| cd ${BUILD_DIR}/staging/megengine/core | cd ${BUILD_DIR}/staging/megengine/core | ||||
| mkdir -p lib/ucx | mkdir -p lib/ucx | ||||
| patch_elf_depend_lib_mgb_mge | patch_elf_depend_lib_mgb_mge | ||||
| @@ -158,7 +155,6 @@ do | |||||
| echo "comapt whl name: ${compat_whl_name}" | echo "comapt whl name: ${compat_whl_name}" | ||||
| mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name} | mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name} | ||||
| cd /home/output | cd /home/output | ||||
| chown -R ${UID}.${UID} . | chown -R ${UID}.${UID} . | ||||
| # compat for root-less docker env to remove output at host side | # compat for root-less docker env to remove output at host side | ||||
| @@ -70,7 +70,6 @@ then | |||||
| BUILD_WHL_CPU_ONLY="OFF" | BUILD_WHL_CPU_ONLY="OFF" | ||||
| fi | fi | ||||
| # config NVIDIA libs | # config NVIDIA libs | ||||
| TRT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/TensorRT-6.0.1.5/lib/nvinfer.dll" | TRT_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/TensorRT-6.0.1.5/lib/nvinfer.dll" | ||||
| CUDNN_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/cudnn-10.1-windows10-x64-v7.6.5.32/cuda/bin/cudnn64_7.dll" | CUDNN_LIB="/c/Program Files/NVIDIA GPU Computing Toolkit/cudnn-10.1-windows10-x64-v7.6.5.32/cuda/bin/cudnn64_7.dll" | ||||
| @@ -102,14 +101,11 @@ function copy_more_dll() { | |||||
| # empty.file to triger setup.py to create a null empty | # empty.file to triger setup.py to create a null empty | ||||
| echo "empty" > ${CP_WHL_DST_IMP}/empty.file | echo "empty" > ${CP_WHL_DST_IMP}/empty.file | ||||
| if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then | if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then | ||||
| echo "copy nvidia lib to whl use...." | echo "copy nvidia lib to whl use...." | ||||
| depend_real_copy ${CP_WHL_DST_IMP} | depend_real_copy ${CP_WHL_DST_IMP} | ||||
| fi | fi | ||||
| } | } | ||||
| BUILD_DIR=${SRC_DIR}/build_dir/host/build/ | BUILD_DIR=${SRC_DIR}/build_dir/host/build/ | ||||
| # here we just treat cu file should not in the increment build file list | # here we just treat cu file should not in the increment build file list | ||||
| @@ -194,14 +190,12 @@ function do_build() { | |||||
| llvm-strip -s ${rt_file} | llvm-strip -s ${rt_file} | ||||
| mv ${rt_file} _imperative_rt.pyd | mv ${rt_file} _imperative_rt.pyd | ||||
| copy_more_dll | copy_more_dll | ||||
| cd ${BUILD_DIR}/staging | cd ${BUILD_DIR}/staging | ||||
| echo "call setup.py now" | echo "call setup.py now" | ||||
| ${PYTHON_DIR}/python3 setup.py bdist_wheel | ${PYTHON_DIR}/python3 setup.py bdist_wheel | ||||
| cp ${BUILD_DIR}/staging/dist/Meg*.whl ${WINDOWS_WHL_HOME}/ | cp ${BUILD_DIR}/staging/dist/Meg*.whl ${WINDOWS_WHL_HOME}/ | ||||
| echo "" | echo "" | ||||
| echo "##############################################################################################" | echo "##############################################################################################" | ||||
| echo "windows whl package location: ${WINDOWS_WHL_HOME}" | echo "windows whl package location: ${WINDOWS_WHL_HOME}" | ||||
| @@ -215,7 +209,6 @@ function third_party_prepare() { | |||||
| echo "init third_party..." | echo "init third_party..." | ||||
| ${SRC_DIR}/third_party/prepare.sh | ${SRC_DIR}/third_party/prepare.sh | ||||
| if [[ -z ${ALREADY_INSTALL_MKL} ]] | if [[ -z ${ALREADY_INSTALL_MKL} ]] | ||||
| then | then | ||||
| echo "init third_party..." | echo "init third_party..." | ||||
| @@ -35,8 +35,6 @@ | |||||
| #include "megcore_atlas.h" | #include "megcore_atlas.h" | ||||
| #endif | #endif | ||||
| using namespace mgb; | using namespace mgb; | ||||
| /* =================== MegDNNHandle =================== */ | /* =================== MegDNNHandle =================== */ | ||||
| @@ -102,7 +100,6 @@ MegDNNHandle::MegDNNHandle(const CompNodeEnv& env) { | |||||
| } | } | ||||
| #endif | #endif | ||||
| if (env.property().type == CompNode::DeviceType::CPU) { | if (env.property().type == CompNode::DeviceType::CPU) { | ||||
| megcoreCreateDeviceHandle(&m_dev_hdl, megcorePlatformCPU); | megcoreCreateDeviceHandle(&m_dev_hdl, megcorePlatformCPU); | ||||
| megcoreCreateComputingHandleWithCPUDispatcher(&m_comp_hdl, m_dev_hdl, | megcoreCreateComputingHandleWithCPUDispatcher(&m_comp_hdl, m_dev_hdl, | ||||
| @@ -234,7 +231,6 @@ void CompNodeEnv::init_cuda_async(int dev, CompNode comp_node, | |||||
| } | } | ||||
| #endif | #endif | ||||
| #if MGB_ATLAS | #if MGB_ATLAS | ||||
| void mgb::_on_atlas_error(const char* expr, int err, const char* file, | void mgb::_on_atlas_error(const char* expr, int err, const char* file, | ||||
| @@ -258,8 +254,6 @@ void CompNodeEnv::init_atlas(CompNode comp_node, const AtlasEnv& env) { | |||||
| } | } | ||||
| #endif | #endif | ||||
| #if MGB_ROCM | #if MGB_ROCM | ||||
| void mgb::_on_hip_error(const char* expr, hipError_t err, const char* file, | void mgb::_on_hip_error(const char* expr, hipError_t err, const char* file, | ||||
| @@ -381,7 +375,6 @@ void CompNodeEnv::init_cpu(const CpuEnv& env, CompNode comp_node) { | |||||
| MegDNNHandle::get(*this).handle()->alignment_requirement(); | MegDNNHandle::get(*this).handle()->alignment_requirement(); | ||||
| } | } | ||||
| #if MGB_CAMBRICON | #if MGB_CAMBRICON | ||||
| void CompNodeEnv::init_cnrt(int dev, CompNode comp_node, | void CompNodeEnv::init_cnrt(int dev, CompNode comp_node, | ||||
| const ContinuationCtx<cnrtQueue_t>& cont) { | const ContinuationCtx<cnrtQueue_t>& cont) { | ||||
| @@ -446,7 +439,6 @@ void CompNodeEnv::fini() { | |||||
| MGB_ATLAS_CHECK(aclrtDestroyStream(m_atlas_env.stream)); | MGB_ATLAS_CHECK(aclrtDestroyStream(m_atlas_env.stream)); | ||||
| } | } | ||||
| #endif | #endif | ||||
| } | } | ||||
| #if MGB_ENABLE_COMP_NODE_ASYNC_INIT | #if MGB_ENABLE_COMP_NODE_ASYNC_INIT | ||||
| @@ -73,14 +73,11 @@ std::string CudaError::get_cuda_extra_info() { | |||||
| #endif | #endif | ||||
| } | } | ||||
| AtlasError::AtlasError(const std::string &msg): | AtlasError::AtlasError(const std::string &msg): | ||||
| SystemError(msg) | SystemError(msg) | ||||
| { | { | ||||
| } | } | ||||
| ROCmError::ROCmError(const std::string &msg): | ROCmError::ROCmError(const std::string &msg): | ||||
| SystemError(msg) | SystemError(msg) | ||||
| { | { | ||||
| @@ -23,7 +23,6 @@ | |||||
| #include "megbrain/graph/helper.h" | #include "megbrain/graph/helper.h" | ||||
| #include "megbrain/opr/utility.h" | #include "megbrain/opr/utility.h" | ||||
| #if MGB_ENABLE_TENSOR_RT | #if MGB_ENABLE_TENSOR_RT | ||||
| #include "megbrain/tensorrt/opr_replace.h" | #include "megbrain/tensorrt/opr_replace.h" | ||||
| #endif | #endif | ||||
| @@ -554,7 +553,6 @@ ComputingGraphImpl::CompileState ComputingGraphImpl::compile_prepare( | |||||
| } | } | ||||
| #endif | #endif | ||||
| #if MGB_JIT | #if MGB_JIT | ||||
| if (std::abs(options().graph_opt_level) == 0 && | if (std::abs(options().graph_opt_level) == 0 && | ||||
| (options().graph_opt.jit || options().graph_opt.jit_config.enabled())) { | (options().graph_opt.jit || options().graph_opt.jit_config.enabled())) { | ||||
| @@ -445,7 +445,6 @@ class VarNodeMemManager { | |||||
| SyncableCounter m_cpu_async_release_barrier; | SyncableCounter m_cpu_async_release_barrier; | ||||
| #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM | #if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON || MGB_ROCM | ||||
| //! release dynamic var on after compnode event finishes | //! release dynamic var on after compnode event finishes | ||||
| class AsyncVarReleaser; | class AsyncVarReleaser; | ||||
| @@ -508,7 +508,6 @@ class CompNode { | |||||
| */ | */ | ||||
| static bool enable_affinity_for_cpu(bool flag); | static bool enable_affinity_for_cpu(bool flag); | ||||
| protected: | protected: | ||||
| //! ImplBase with env(); defined in CompNodeEnv | //! ImplBase with env(); defined in CompNodeEnv | ||||
| class Impl; | class Impl; | ||||
| @@ -19,8 +19,6 @@ | |||||
| #include "megdnn/handle.h" | #include "megdnn/handle.h" | ||||
| #if MGB_CUDA | #if MGB_CUDA | ||||
| #include <cuda_runtime.h> | #include <cuda_runtime.h> | ||||
| #include <cuda.h> | #include <cuda.h> | ||||
| @@ -90,8 +88,6 @@ | |||||
| #endif // MGB_ATLAS | #endif // MGB_ATLAS | ||||
| #if MGB_ROCM | #if MGB_ROCM | ||||
| #include "hcc_detail/hcc_defs_prologue.h" | #include "hcc_detail/hcc_defs_prologue.h" | ||||
| #include "megcore_rocm.h" | #include "megcore_rocm.h" | ||||
| @@ -196,7 +192,6 @@ namespace mgb { | |||||
| const char* file, const char* func, int line); | const char* file, const char* func, int line); | ||||
| #endif | #endif | ||||
| #if MGB_CUDA | #if MGB_CUDA | ||||
| [[noreturn]] void _on_cuda_error(const char* expr, cudaError_t err, | [[noreturn]] void _on_cuda_error(const char* expr, cudaError_t err, | ||||
| const char* file, const char* func, int line); | const char* file, const char* func, int line); | ||||
| @@ -205,7 +200,6 @@ namespace mgb { | |||||
| int line); | int line); | ||||
| #endif | #endif | ||||
| #if MGB_ROCM | #if MGB_ROCM | ||||
| [[noreturn]] void _on_hip_error(const char* expr, hipError_t err, | [[noreturn]] void _on_hip_error(const char* expr, hipError_t err, | ||||
| const char* file, const char* func, int line); | const char* file, const char* func, int line); | ||||
| @@ -232,7 +226,6 @@ public: | |||||
| mgb_assert(0, "The CompNode set_affinity is not implement"); | mgb_assert(0, "The CompNode set_affinity is not implement"); | ||||
| } | } | ||||
| }; | }; | ||||
| using AtlasDispatcher = CPUDispatcher; | using AtlasDispatcher = CPUDispatcher; | ||||
| /*! | /*! | ||||
| @@ -328,7 +321,6 @@ public: | |||||
| } | } | ||||
| #endif | #endif | ||||
| } | } | ||||
| /*! | /*! | ||||
| @@ -370,7 +362,6 @@ public: | |||||
| const ContinuationCtx<cudaStream_t>& cont); | const ContinuationCtx<cudaStream_t>& cont); | ||||
| #endif | #endif | ||||
| #if MGB_ATLAS | #if MGB_ATLAS | ||||
| struct AtlasEnv { | struct AtlasEnv { | ||||
| int device = -1; | int device = -1; | ||||
| @@ -431,8 +422,6 @@ public: | |||||
| void init_atlas(CompNode comp_node, const AtlasEnv& env); | void init_atlas(CompNode comp_node, const AtlasEnv& env); | ||||
| #endif | #endif | ||||
| #if MGB_ROCM | #if MGB_ROCM | ||||
| struct ROCmEnv { | struct ROCmEnv { | ||||
| int device = -1; | int device = -1; | ||||
| @@ -547,7 +536,6 @@ private: | |||||
| CompNode m_comp_node; | CompNode m_comp_node; | ||||
| Property m_property; | Property m_property; | ||||
| MemEventHandler m_mem_event_handler; | MemEventHandler m_mem_event_handler; | ||||
| #if MGB_CUDA | #if MGB_CUDA | ||||
| CudaEnv m_cuda_env; | CudaEnv m_cuda_env; | ||||
| #endif | #endif | ||||
| @@ -71,7 +71,6 @@ | |||||
| }) \ | }) \ | ||||
| do { \ | do { \ | ||||
| } while (0) | } while (0) | ||||
| namespace mgb { | namespace mgb { | ||||
| //! the most general MegBrain exception type; also base class for all megbrain | //! the most general MegBrain exception type; also base class for all megbrain | ||||
| @@ -149,7 +148,6 @@ public: | |||||
| AtlasError(const std::string& msg); | AtlasError(const std::string& msg); | ||||
| }; | }; | ||||
| class ROCmError final : public SystemError { | class ROCmError final : public SystemError { | ||||
| public: | public: | ||||
| /*! | /*! | ||||
| @@ -224,7 +222,6 @@ public: | |||||
| using MegBrainError::MegBrainError; | using MegBrainError::MegBrainError; | ||||
| }; | }; | ||||
| } // namespace mgb | } // namespace mgb | ||||
| namespace mgb { | namespace mgb { | ||||
| @@ -233,5 +230,4 @@ bool has_uncaught_exception(); | |||||
| } // namespace mgb | } // namespace mgb | ||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -49,7 +49,11 @@ def SVD: MgbHashableOp<"SVD", [SVDParam]>; | |||||
| def Convolution : MgbHashableOp<"Convolution", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>; | def Convolution : MgbHashableOp<"Convolution", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>; | ||||
| def ConvolutionBackwardData: MgbHashableOp<"ConvolutionBackwardData", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>; | |||||
| def ConvolutionBackwardData: MgbHashableOp<"ConvolutionBackwardData", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]> { | |||||
| let extraArguments = (ins | |||||
| MgbDTypeAttr:$dtype | |||||
| ); | |||||
| } | |||||
| def Convolution3D: MgbHashableOp<"Convolution3D", [Convolution3DParam, ExecutionPolicyParamBase<"policy">]>; | def Convolution3D: MgbHashableOp<"Convolution3D", [Convolution3DParam, ExecutionPolicyParamBase<"policy">]>; | ||||
| @@ -40,7 +40,6 @@ TEST(TestCompNode, Parse) { | |||||
| ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23)); | ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23)); | ||||
| ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23)); | ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23)); | ||||
| ASSERT_EQ(L::parse("rocmx"), make_lc(D::ROCM, -1, 0)); | ASSERT_EQ(L::parse("rocmx"), make_lc(D::ROCM, -1, 0)); | ||||
| ASSERT_EQ(L::parse("rocm2"), make_lc(D::ROCM, 2, 0)); | ASSERT_EQ(L::parse("rocm2"), make_lc(D::ROCM, 2, 0)); | ||||
| ASSERT_EQ(L::parse("rocm2:3"), make_lc(D::ROCM, 2, 3)); | ASSERT_EQ(L::parse("rocm2:3"), make_lc(D::ROCM, 2, 3)); | ||||
| @@ -62,7 +61,6 @@ TEST(TestCompNode, Parse) { | |||||
| ASSERT_EQ(L::parse("multithread:default:2"), | ASSERT_EQ(L::parse("multithread:default:2"), | ||||
| make_lc(D::MULTITHREAD, L::DEVICE_MULTITHREAD_DEFAULT, 2)); | make_lc(D::MULTITHREAD, L::DEVICE_MULTITHREAD_DEFAULT, 2)); | ||||
| ASSERT_THROW(L::parse("apu"), MegBrainError); | ASSERT_THROW(L::parse("apu"), MegBrainError); | ||||
| ASSERT_THROW(L::parse("fpgbx"), MegBrainError); | ASSERT_THROW(L::parse("fpgbx"), MegBrainError); | ||||
| ASSERT_THROW(L::parse("cab0"), MegBrainError); | ASSERT_THROW(L::parse("cab0"), MegBrainError); | ||||
| @@ -165,8 +163,6 @@ TEST(TestCompNode, Load) { | |||||
| auto atlas1 = CompNode::load("atlas1"); | auto atlas1 = CompNode::load("atlas1"); | ||||
| ASSERT_NE(atlas0, atlas1); | ASSERT_NE(atlas0, atlas1); | ||||
| #endif | #endif | ||||
| } | } | ||||
| TEST(TestCompNode, FreeAfterFinalize) { | TEST(TestCompNode, FreeAfterFinalize) { | ||||
| @@ -355,7 +351,6 @@ TEST(TestCompNodeAtlas, MemNode) { | |||||
| } | } | ||||
| #endif | #endif | ||||
| TEST(TestCompNodeCPU, PhysicalDispatch) { | TEST(TestCompNodeCPU, PhysicalDispatch) { | ||||
| constexpr int ID = 0x2a6453e0; | constexpr int ID = 0x2a6453e0; | ||||
| using L = CompNode::Locator; | using L = CompNode::Locator; | ||||
| @@ -754,7 +749,6 @@ TEST(TestCompNodeCambricon, P2PCopy) { | |||||
| #endif | #endif | ||||
| #endif // MGB_CAMBRICON | #endif // MGB_CAMBRICON | ||||
| #if MGB_ATLAS | #if MGB_ATLAS | ||||
| TEST(TestCompNodeAtlas, D2DCopy) { | TEST(TestCompNodeAtlas, D2DCopy) { | ||||
| @@ -780,7 +774,6 @@ TEST(TestCompNodeAtlas, D2DCopy) { | |||||
| } | } | ||||
| #endif | #endif | ||||
| namespace { | namespace { | ||||
| class CompNodeDepedentObjectInst final : public CompNodeDepedentObject { | class CompNodeDepedentObjectInst final : public CompNodeDepedentObject { | ||||
| int *m_dst, *m_timer; | int *m_dst, *m_timer; | ||||
| @@ -634,7 +634,6 @@ void test_gather_other(CompNode cn0, CompNode cn1) { | |||||
| opr::Sleep::sleep(cn1, 0.7); | opr::Sleep::sleep(cn1, 0.7); | ||||
| func->execute(); | func->execute(); | ||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| #if MGB_CUDA | #if MGB_CUDA | ||||
| @@ -668,5 +667,4 @@ TEST(TestCudaMemAlloc, FreeMem) { | |||||
| } | } | ||||
| #endif // MGB_CUDA | #endif // MGB_CUDA | ||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -340,7 +340,6 @@ TEST(TestTensor, ValueDump) { | |||||
| auto val = debug::dump_tensor(*gen({23, 45}), "test"); | auto val = debug::dump_tensor(*gen({23, 45}), "test"); | ||||
| debug::write_to_file(output_file("TestTensor.ValueDump.bin").c_str(), val); | debug::write_to_file(output_file("TestTensor.ValueDump.bin").c_str(), val); | ||||
| } | } | ||||
| template <class Src, class Dst> | template <class Src, class Dst> | ||||
| void run_negative_index_test() { | void run_negative_index_test() { | ||||
| constexpr size_t S0 = 200, S1 = 200; | constexpr size_t S0 = 200, S1 = 200; | ||||
| @@ -1912,7 +1912,6 @@ TEST_PASS(FuseConvBiasNonlinPass, Basic) { | |||||
| } | } | ||||
| } | } | ||||
| #if MGB_CUDA | #if MGB_CUDA | ||||
| TEST(TestEnableTensorCore, SmallInputShape) { | TEST(TestEnableTensorCore, SmallInputShape) { | ||||
| @@ -4735,7 +4734,6 @@ TEST(TestGoptInference, PaddingChannelsWithWarpPerspective) { | |||||
| MGB_ASSERT_TENSOR_EQ(t1, t2); | MGB_ASSERT_TENSOR_EQ(t1, t2); | ||||
| } | } | ||||
| #endif | #endif | ||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -67,7 +67,6 @@ | |||||
| #define MGB_CUDA 1 | #define MGB_CUDA 1 | ||||
| #endif | #endif | ||||
| // whether to include file/line location for assert message | // whether to include file/line location for assert message | ||||
| #ifndef MGB_ASSERT_LOC | #ifndef MGB_ASSERT_LOC | ||||
| #define MGB_ASSERT_LOC 1 | #define MGB_ASSERT_LOC 1 | ||||
| @@ -162,7 +161,6 @@ | |||||
| #define MGB_JIT_HALIDE 0 | #define MGB_JIT_HALIDE 0 | ||||
| #endif | #endif | ||||
| #ifndef MEGDNN_WITH_CAMBRICON | #ifndef MEGDNN_WITH_CAMBRICON | ||||
| #define MEGDNN_WITH_CAMBRICON 0 | #define MEGDNN_WITH_CAMBRICON 0 | ||||
| #endif | #endif | ||||
| @@ -182,7 +180,6 @@ | |||||
| #define MGB_ENABLE_FASTRUN 1 | #define MGB_ENABLE_FASTRUN 1 | ||||
| #endif | #endif | ||||
| /* ================= following are more finegrind controls ================= */ | /* ================= following are more finegrind controls ================= */ | ||||
| // whether to enable json dumper | // whether to enable json dumper | ||||
| @@ -162,7 +162,6 @@ namespace opr { | |||||
| using ReduceV2 = opr::Reduce; | using ReduceV2 = opr::Reduce; | ||||
| MGB_SEREG_OPR(ReduceV2, 0); | MGB_SEREG_OPR(ReduceV2, 0); | ||||
| } // namespace opr | } // namespace opr | ||||
| using TypeCvtV2 = opr::TypeCvt; | using TypeCvtV2 = opr::TypeCvt; | ||||
| MGB_SEREG_OPR(TypeCvtV2, 1); | MGB_SEREG_OPR(TypeCvtV2, 1); | ||||
| @@ -97,7 +97,6 @@ MGB_SEREG_OPR(SVD, 1); | |||||
| } // namespace opr | } // namespace opr | ||||
| } // namespace mgb | } // namespace mgb | ||||
| // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -613,7 +613,6 @@ MGB_SEREG_OPR(LSQ, 4); | |||||
| MGB_SEREG_OPR(LSQBackward, 5); | MGB_SEREG_OPR(LSQBackward, 5); | ||||
| } // namespace opr | } // namespace opr | ||||
| } // namespace mgb | } // namespace mgb | ||||
| // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -196,7 +196,6 @@ using DctChannelSelectV1 = opr::DctChannelSelect; | |||||
| MGB_SEREG_OPR(DctChannelSelectV1, 0); | MGB_SEREG_OPR(DctChannelSelectV1, 0); | ||||
| } // namespace opr | } // namespace opr | ||||
| } // namespace mgb | } // namespace mgb | ||||
| // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -57,7 +57,6 @@ namespace serialization { | |||||
| } // namespace serialization | } // namespace serialization | ||||
| namespace opr { | namespace opr { | ||||
| MGB_SEREG_OPR(Argmax, 1); | MGB_SEREG_OPR(Argmax, 1); | ||||
| @@ -14,7 +14,6 @@ | |||||
| namespace mgb { | namespace mgb { | ||||
| namespace opr { | namespace opr { | ||||
| using UniformRNGV1 = opr::UniformRNG; | using UniformRNGV1 = opr::UniformRNG; | ||||
| @@ -120,7 +120,6 @@ namespace serialization { | |||||
| #endif | #endif | ||||
| } // namespace serialization | } // namespace serialization | ||||
| namespace opr { | namespace opr { | ||||
| MGB_SEREG_OPR(Broadcast, 2); | MGB_SEREG_OPR(Broadcast, 2); | ||||
| MGB_SEREG_OPR(Dimshuffle, 1); | MGB_SEREG_OPR(Dimshuffle, 1); | ||||
| @@ -2401,7 +2401,6 @@ TEST(TestOprDNN, ConvolutionMultiCompNode) { | |||||
| worker0.join(); | worker0.join(); | ||||
| worker1.join(); | worker1.join(); | ||||
| } | } | ||||
| #endif | #endif | ||||
| } // anonymous namespace | } // anonymous namespace | ||||
| @@ -37,7 +37,6 @@ GraphLoader::shared_tensor_name_map() { | |||||
| } | } | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| std::unique_ptr<GraphLoader> make_fbs_loader(std::unique_ptr<InputFile> file); | std::unique_ptr<GraphLoader> make_fbs_loader(std::unique_ptr<InputFile> file); | ||||
| std::unique_ptr<GraphDumper> make_fbs_dumper(std::unique_ptr<OutputFile> file); | std::unique_ptr<GraphDumper> make_fbs_dumper(std::unique_ptr<OutputFile> file); | ||||
| bool is_fbs_file(InputFile& file); | bool is_fbs_file(InputFile& file); | ||||
| @@ -502,5 +502,4 @@ TEST(TestExternCOpr, Dedup) { | |||||
| ASSERT_EQ(0, MGBOprDescImpl<>::nr_inst); | ASSERT_EQ(0, MGBOprDescImpl<>::nr_inst); | ||||
| } | } | ||||
| // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | ||||
| @@ -15,7 +15,6 @@ if (MGE_WITH_CUDA AND MGE_WITH_TRT) | |||||
| list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
| endif() | endif() | ||||
| add_executable(megbrain_test ${SOURCES}) | add_executable(megbrain_test ${SOURCES}) | ||||
| target_link_libraries(megbrain_test gtest gmock) | target_link_libraries(megbrain_test gtest gmock) | ||||
| target_link_libraries(megbrain_test megbrain megdnn ${MGE_CUDA_LIBS}) | target_link_libraries(megbrain_test megbrain megdnn ${MGE_CUDA_LIBS}) | ||||
| @@ -63,7 +63,6 @@ pdef('PersistentOutputStorage').add_fields( | |||||
| 'false') | 'false') | ||||
| ) | ) | ||||
| (pdef('CollectiveComm', 'collective communication between multiple computing ' | (pdef('CollectiveComm', 'collective communication between multiple computing ' | ||||
| 'nodes on localhost') | 'nodes on localhost') | ||||
| .add_enum(Doc('Mode', 'mode of collective communication'), | .add_enum(Doc('Mode', 'mode of collective communication'), | ||||