| @@ -40,6 +40,10 @@ TEST(TestCompNode, Parse) { | |||||
| ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23)); | ASSERT_EQ(L::parse("cpu2:23"), make_lc(D::CPU, 2, 23)); | ||||
| ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23)); | ASSERT_EQ(L::parse("cpu21:23"), make_lc(D::CPU, 21, 23)); | ||||
| ASSERT_EQ(L::parse("rocmx"), make_lc(D::ROCM, -1, 0)); | |||||
| ASSERT_EQ(L::parse("rocm2"), make_lc(D::ROCM, 2, 0)); | |||||
| ASSERT_EQ(L::parse("rocm2:3"), make_lc(D::ROCM, 2, 3)); | |||||
| ASSERT_EQ(L::parse("cambriconx"), make_lc(D::CAMBRICON, -1, 0)); | ASSERT_EQ(L::parse("cambriconx"), make_lc(D::CAMBRICON, -1, 0)); | ||||
| ASSERT_EQ(L::parse("cambricon2"), make_lc(D::CAMBRICON, 2, 0)); | ASSERT_EQ(L::parse("cambricon2"), make_lc(D::CAMBRICON, 2, 0)); | ||||
| ASSERT_EQ(L::parse("cambricon2:3"), make_lc(D::CAMBRICON, 2, 3)); | ASSERT_EQ(L::parse("cambricon2:3"), make_lc(D::CAMBRICON, 2, 3)); | ||||
| @@ -66,6 +70,7 @@ TEST(TestCompNode, Parse) { | |||||
| ASSERT_THROW(L::parse("cpu0:"), MegBrainError); | ASSERT_THROW(L::parse("cpu0:"), MegBrainError); | ||||
| ASSERT_THROW(L::parse("cpu0:x"), MegBrainError); | ASSERT_THROW(L::parse("cpu0:x"), MegBrainError); | ||||
| ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError); | ASSERT_THROW(L::parse("cpu2:23x"), MegBrainError); | ||||
| ASSERT_THROW(L::parse("rcom0"), MegBrainError); | |||||
| ASSERT_THROW(L::parse("cmabricon0"), MegBrainError); | ASSERT_THROW(L::parse("cmabricon0"), MegBrainError); | ||||
| ASSERT_THROW(L::parse("atlast0"), MegBrainError); | ASSERT_THROW(L::parse("atlast0"), MegBrainError); | ||||
| ASSERT_THROW(L::parse("multithread"), MegBrainError); | ASSERT_THROW(L::parse("multithread"), MegBrainError); | ||||
| @@ -296,6 +301,18 @@ TEST(TestCompNodeCuda, set_prealloc_config) { | |||||
| 4, CompNode::DeviceType::CUDA); | 4, CompNode::DeviceType::CUDA); | ||||
| } | } | ||||
| #if MGB_ROCM | |||||
| TEST(TestCompNodeROCm, MemNode) { | |||||
| REQUIRE_AMD_GPU(2); | |||||
| auto cn00 = CompNode::load("rocm0"), | |||||
| cn1 = CompNode::load("rocm1"), | |||||
| cn01 = CompNode::load("rocm0:1"); | |||||
| ASSERT_EQ(cn00, CompNode::load("rocm0")); | |||||
| ASSERT_EQ(cn00.mem_node(), cn01.mem_node()); | |||||
| ASSERT_NE(cn00.mem_node(), cn1.mem_node()); | |||||
| } | |||||
| #endif | |||||
| #if MGB_CAMBRICON | #if MGB_CAMBRICON | ||||
| TEST(TestCompNodeCambricon, MemNode) { | TEST(TestCompNodeCambricon, MemNode) { | ||||
| @@ -466,6 +483,10 @@ TEST(TestCompNodeCPU, PeerCopyFromCUDA) { | |||||
| test_peer_copy_from_device("gpux"); | test_peer_copy_from_device("gpux"); | ||||
| } | } | ||||
| TEST(TestCompNodeCPU, PeerCopyFromROCm) { | |||||
| REQUIRE_AMD_GPU(1); | |||||
| test_peer_copy_from_device("rocmx"); | |||||
| } | |||||
| #if MGB_CAMBRICON | #if MGB_CAMBRICON | ||||
| TEST(TestCompNodeCPU, PeerCopyFromCambricon) { | TEST(TestCompNodeCPU, PeerCopyFromCambricon) { | ||||
| @@ -14,6 +14,10 @@ | |||||
| #include "../internal/invoke.h" | #include "../internal/invoke.h" | ||||
| #if MGB_ROCM | |||||
| #include "hcc_detail/hcc_defs_prologue.h" | |||||
| #include "megcore_rocm.h" | |||||
| #endif | |||||
| //! TODO: here has to be know some megdnn::opr when there is produced midout.h | //! TODO: here has to be know some megdnn::opr when there is produced midout.h | ||||
| //! fix it if there is another graceful way. | //! fix it if there is another graceful way. | ||||
| @@ -58,6 +62,11 @@ template <typename Opr> | |||||
| typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl( | typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl( | ||||
| const TParam& raw_param) { | const TParam& raw_param) { | ||||
| MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_impl"))) | MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_impl"))) | ||||
| #if MGB_ROCM | |||||
| bool miopen_algo_search_enabled; | |||||
| megcore::getMIOpenAlgoSearchStatus(&miopen_algo_search_enabled); | |||||
| mgb_assert(miopen_algo_search_enabled, "MIOpen algo search not enabled"); | |||||
| #endif | |||||
| auto&& param = raw_param.as_single_pod<Param>(); | auto&& param = raw_param.as_single_pod<Param>(); | ||||
| CompNode cn = CompNode::load(param.comp_node_loc, param.comp_node_loc); | CompNode cn = CompNode::load(param.comp_node_loc, param.comp_node_loc); | ||||
| auto megdnn_opr = intl::create_megdnn_opr<Opr>(cn); | auto megdnn_opr = intl::create_megdnn_opr<Opr>(cn); | ||||
| @@ -234,6 +243,9 @@ Maybe<typename TimedProfiler<Opr>::Result> TimedProfiler<Opr>::profile( | |||||
| template <typename Opr> | template <typename Opr> | ||||
| void TimedProfiler<Opr>::prof_init_device(const TParam& raw_param) { | void TimedProfiler<Opr>::prof_init_device(const TParam& raw_param) { | ||||
| MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_init_device"))) | MIDOUT_B(Opr, midout_iv(MGB_HASH_STR("TimedProfiler::prof_init_device"))) | ||||
| #if MGB_ROCM | |||||
| megcore::enableMIOpenAlgoSearch(true); | |||||
| #endif | |||||
| auto&& param = raw_param.as_single_pod<Param>(); | auto&& param = raw_param.as_single_pod<Param>(); | ||||
| CompNode cn = CompNode::load(param.comp_node_loc, param.comp_node_loc); | CompNode cn = CompNode::load(param.comp_node_loc, param.comp_node_loc); | ||||
| // wait for cuda init, so its time does not get accounted in timeout | // wait for cuda init, so its time does not get accounted in timeout | ||||
| @@ -327,6 +327,13 @@ bool mgb::check_gpu_available(size_t num) { | |||||
| return true; | return true; | ||||
| } | } | ||||
| bool mgb::check_amd_gpu_available(size_t num) { | |||||
| if (CompNode::get_device_count(CompNode::DeviceType::ROCM) < num) { | |||||
| mgb_log_warn("skip test case that requires %zu AMD GPU(s)", num); | |||||
| return false; | |||||
| } | |||||
| return true; | |||||
| } | |||||
| bool mgb::check_cambricon_device_available(size_t num) { | bool mgb::check_cambricon_device_available(size_t num) { | ||||
| if (CompNode::get_device_count(CompNode::DeviceType::CAMBRICON) < num) { | if (CompNode::get_device_count(CompNode::DeviceType::CAMBRICON) < num) { | ||||
| @@ -460,6 +460,8 @@ std::vector<CompNode> load_multiple_xpus(size_t num); | |||||
| //! check whether given number of GPUs is available | //! check whether given number of GPUs is available | ||||
| bool check_gpu_available(size_t num); | bool check_gpu_available(size_t num); | ||||
| //! check whether given number of AMD GPUs is available | |||||
| bool check_amd_gpu_available(size_t num); | |||||
| //! check whether given number of cambricon devices is available | //! check whether given number of cambricon devices is available | ||||
| bool check_cambricon_device_available(size_t num); | bool check_cambricon_device_available(size_t num); | ||||